Created
November 8, 2022 14:04
-
-
Save kordless/66ef4f3d3b61d177fb3cb4e289d8d4fb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openai | |
import numpy as np | |
from openai.embeddings_utils import get_embedding | |
openai.api_key = "TOKEN" | |
def gpt3_embedding(content, engine='text-similarity-ada-001'): | |
content = content.encode(encoding='ASCII',errors='ignore').decode() | |
response = openai.Embedding.create(input=content,engine=engine) | |
vector = response['data'][0]['embedding'] # this is a normal list | |
return vector | |
fulltexts = ["Y Hacker News new | past | comments | ask | show | jobs | submit 1. Why No Roman Industrial Revolution? ", | |
"(acoup.blog) 163 points by Tomte 2 hours ago | hide | 154 comments 2. Americans Will Soon Be Able to Renew Passports Online (cntraveler.com) 32 points by avonmach 48 minutes ago | hide | 8 comments 3. ", | |
"A Things I Won't Work With: Azidoazide Azides, More or Less (science.org) 30 points by ColinWright 46 minutes ago | hide | 6 comments 4. ", | |
"A Poll: Contractors, what is your hourly rate 187 points by sixhobbits 3 hours ago | hide | 188 comments 5. ", | |
"A Hooking Go from Rust - Hitchhiker's Guide to the Go-Laxy (metalbear.co) 40 points by carride 1 hour ago | hide | 10 comments 6. ", | |
"A Reducing methane is the fastest strategy available to reduce warming (state.gov) 31 points by nipponese 1 hour ago | hide | 21 comments 7. ", | |
"Amid Chip Shortages, Companies Bet on RISC-V (allaboutcircuits.com) 74 points by tomclancy 3 hours ago | hide | 39 comments 8. ", | |
"9. 10. A The Toxic Culture of Rejection in Computer Science (sigbed.org) 146 points by headalgorithm 5 hours ago | hide | 90 comments GitHub-Next (githubnext.com) 84 points by rahulpandita 3 hours ago | hide | 52 comments Oberon (2009) (ignorethecode.net) 169 points by Iproven 6 hours ago | hide | 94 comments 11. ", | |
"A Historian Discovers a Prized Galileo Manuscript Was Forged (smithsonianmag.com) 39 points by Hooke 3 hours ago | hide | 8 comments 12. ", | |
"Tell HN: Spammed by a Hacker News Enthusiast 214 points by ColinWright 5 hours ago | hide | 99 comments 13. ", | |
"AMIT scientists discover neurons that light up whenever we see images of food (news.mit.edu) 23 points by mntn 2 hours ago | hide | 10 comments 14. ", | |
"Why Xen Wasn't Hit by RETBleed on Intel CPUs (xcp-ng.org) 81 points by plam503711 6 hours ago | hide | 31 comments 15. ", | |
"AI Love My PinePhone (jakob.space) 91 points by todsacerdoti 5 hours ago | hide | 74 comments 16. A Next wooden chair could arrive flat, then dry into a 3D shape (phys.org) 19 points by rbanffy 2 hours ago | hide | 11 comments 17. ", | |
"18. Why Aren't There C Conferences? (2018) (nullprogram.com) 67 points by optimalsolver 3 hours ago | hide | 73 comments How discord stores billions of messages (2017) (discord.com) 98 points by greymalik 3 hours ago | hide | 35 comments 19. ", | |
"A The Next Mainstream Programming Language: A Game Developer's Perspective (2005) [pdf] (uni-saarland.de) 47 points by Tomte 5 hours ago | hide | 34 comments 20. ", | |
"A Classic HN Links (posobin.com) 91 points by memorable 7 hours ago | hide | 21 comments 21. Unstripping Stripped Binaries (cmpxchg8b.com) 149 points by taviso 11 hours ago | hide | 14 comments 22. ", | |
"Search over 5M+ Stable Diffusion images and prompts (lexica.art) 108 points by headalgorithm 9 hours ago | hide | 32 comments 23. ", | |
"The Jupyter+Git problem is now solved (fast.ai) 225 points by jph00 13 hours ago | hide | 133 comments 24. ", | |
"Ashby (YC W19, B) Hiring Engineers Who Want to Own Product/Design (ashbyhq.com) 9 hours ago | hide 25. ", | |
"A Espanso Cross-Platform Text Expander (espanso.org) 63 points by enbugger 6 hours ago | hide | 26 comments 26.A Designing for Improvisation (2010) (bobulate.com) 12 points by brm 2 hours ago | hide | 1 comment 27. ", | |
"A German digital signage ban prompts confusion (avinteractive.com) 201 points by amelius 7 hours ago | hide | 369 comments 28. ", | |
"29. Why do some humans love chili peppers? (sapiens.org) 89 points by Petiver 10 hours ago | hide | 142 comments What are Magnus Carlsen's chances of reaching 2900? "] | |
embeddings = [] | |
for fulltext in fulltexts: | |
embeddings.append(gpt3_embedding(fulltext)) | |
# search for these and rank | |
searches = ["Nomadic endeavors.", "Make programming great again.", "I'm interested in hiring.", "All things Artificially Intelligencey"] | |
# sort by | |
def ranker(e): | |
return e['rank'] | |
# run through searches | |
for search in searches: | |
# {'text': text, 'rank': rank} | |
rankings = [] | |
for index, embedding in enumerate(embeddings): | |
print(embedding) | |
rankings.append({"text": fulltexts[index], "rank": np.dot(embedding, gpt3_embedding(search))}) | |
rankings.sort(key=ranker) | |
print("======================================") | |
print(search) | |
print("======================================") | |
for ranked in reversed(rankings): | |
print("%s %s" % (ranked.get('rank'), ranked.get('text')[:50 ])) | |
break | |
""" | |
embeddings = [] | |
for fulltext in fulltexts: | |
embeddings.append(gpt3_embedding(fulltext)) | |
for index, embedding in enumerate(embeddings): | |
rank = np.dot(embedding, gpt3_embedding("Toxic stuff.")) | |
print(rank, fulltexts[index]) | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment