Skip to content

Instantly share code, notes, and snippets.

@Mohamedemad4
Last active August 24, 2018 15:24
Show Gist options
  • Save Mohamedemad4/7bdb5d1dc5924acb9f5d1234d38b5741 to your computer and use it in GitHub Desktop.
Save Mohamedemad4/7bdb5d1dc5924acb9f5d1234d38b5741 to your computer and use it in GitHub Desktop.
this scripts pulls every tag on stackexchange.com and every site related
import json
import requests
import pickle as pkl
call='https://api.stackexchange.com/2.2/tags?page={page}&order=desc&sort=popular&site={site}'
startFromPage=1
def get(url):
r=requests.get(url)
if r.status_code!=200:
return False
js=r.content
try:
return json.loads(js)
except:
return False
def getTags(page,site):
print('Page:',page,' Site:',site)
js=get(call.format(page=page,site=site))
if js:
hasmore=js['has_more']
if hasmore==False:
return False
for i in js['items']:
tag=str(i['name'])
if tag not in tags:
tags.append(tag)
return True
try:
tags=pkl.load(open('tags.pkl','rb'))
except:
tags=[]
sites=['emacs',
'StackOverflow','serverfault','superuser','webapps','Arqade',
'webmasters','cooking','gamedev','photography','stats','Mathematics',
'diy','gis','tex','askubuntu','money','english','stackapps','ux','unix',
'wordpress','cstheory','apple','rpg','bicycles','softwareengineering',
'electronics','android','boardgames','physics','security','writers',
'video','graphicdesign','dba','scifi','codereview','quant','pm','skeptics','fitness',
'drupal','mechanics','parenting','sharepoint','music','sqa','judaism','german',
'japanese','philosophy','gardening','travel','productivity','cryptography',
'dsp','christianity','bitcoin','linguistics','hermeneutics',
'history','bricks','spanish','scicomp','movies','chinese','biology','mathematica',
'cogsci','outdoors','martialarts','sports','academia',
'cs','workplace','windowsphone','chemistry','chess','raspberrypi',
'russian','islam','salesforce','patents','genealogy','robotics',
'expressionengine','politics','anime','magento','sustainability',
'tridion','reverseengineering','networkengineering','opendata','freelancing','blender','mathoverflow.net',
'space','sound','astronomy','tor','pets','ham','italian',
'aviation','ebooks','softwarerecs','arduino','expatriates',
'matheducators','earthscience','joomla','datascience','puzzling','craftcms','buddhism',
'hinduism','communitybuilding','startups','worldbuilding','hsm','economics','lifehacks','coffee','vi',
'musicfans','woodworking','civicrm','health','mythology','law','opensource','elementaryos','portuguese',
'computergraphics','hardwarerecs','3dprinting','latin','retrocomputing',
'crafts','korean','monero','ai','esperanto','sitecore','iot','literature','vegetarianism','ukrainian',
'devops']
try:
a=open('.sites','r').read().split(':')
sites=sites[sites.index(a[0]):]
startFromPage=a[1]
except:
pass
tasks=[]
resps=[]
try:
print("starting")
print(len(sites))
for site in sites:
for page in range(int(startFromPage),1000):
if getTags(page,site)==False:
break
s=open(".sites",'wb+')
s.write('{0}:{1}'.format(site,page+1).encode('utf-8'))
s.close()
startFromPage=0
except:
print('exception')
pkl.dump(tags,open('tags.pkl','wb+'))
pkl.dump(tags,open('tags.pkl','wb+'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment