|
import aiohttp, asyncio, yarl, bs4, re, json |
|
|
|
async def get_ollama_library(**query): |
|
def parseage(s): |
|
s = str(s).lower().strip() |
|
if s.endswith(" ago"): |
|
s = " ".join(s.split(" ")[:-1]) |
|
m = re.match("^(\\d*(\\.\\d*)?) *(d(ay)?|h(ou)?r?|mo(nth)?|m(in(ute)?)?|s(ec(ond)?)?|w((ee)?k)?|y((ea)?r)?)s? ago", s) |
|
if m != None: |
|
num = float(m.group(1)) |
|
unit = m.group(3).lower().strip() |
|
units = [('s', 1.0), ('mo', 60.0*60.0*24.0*30.0), ('m', 60.0), ('h', 60.0*60.0), ('d', 60.0*60.0*24.0), ('w', 60*60*24*7), ('y', 60*60*24*365)] |
|
for prefix, factor in units: |
|
if unit.startswith(prefix): |
|
return num * factor |
|
return s |
|
|
|
async with aiohttp.ClientSession() as sess: |
|
url = yarl.URL("https://ollama.com/library") |
|
url.update_query(dict(query)) |
|
res = await sess.get(str(url)) |
|
doc = bs4.BeautifulSoup(await res.text()) |
|
def getmodelinfo(elem): |
|
name = elem.find("h2").text.strip().splitlines()[0] |
|
desc = elem.find_all("p")[0].text.strip() |
|
info = {} |
|
if len(elem.find_all("p")) > 1: |
|
for p in elem.find_all("p")[1:]: |
|
for k,v in [tuple(sorted([w.strip().lower().replace(',','') for w in l.split("\xa0")], key=lambda w: any([ch.isnumeric() for ch in str(w)]) )) for l in p.text.strip().split("\n\n\n") if len(l.strip()) > 0]: |
|
info[k] = float(v) if all([ch.isnumeric() or ch in ['-','.'] for ch in str(v)]) else parseage(v) if str(v).endswith(" ago") else str(v) |
|
return dict(list(info.items()) + list({'name': name, 'desc': desc}.items())) |
|
modelinfos = [] |
|
for h2elem in doc.find_all("h2"): |
|
minfo = getmodelinfo(h2elem.parent.parent) |
|
print(f"{repr(minfo)}") |
|
modelinfos.append(minfo) |
|
return modelinfos, doc |
|
|
|
def get_models(): |
|
import asyncio |
|
return asyncio.run(get_ollama_library()) |
|
|