Skip to content

Instantly share code, notes, and snippets.

@Sait2000
Created September 10, 2014 11:51
Show Gist options
  • Save Sait2000/520d1a68b53789ae59c5 to your computer and use it in GitHub Desktop.
Save Sait2000/520d1a68b53789ae59c5 to your computer and use it in GitHub Desktop.
Naver Open English User Info Extracter
import json
from collections import OrderedDict
import requests
urls = [
'http://dict-channelgw.naver.com/endict/ko/enko/user/invite/entry/list.dict',
'http://dict-channelgw.naver.com/endict/ko/enko/user/create/entry/list.dict'
]
def walk(it):
if isinstance(it, dict):
for t in it.values():
for v in walk(t):
yield v
elif isinstance(it, list):
for t in it:
for v in walk(t):
yield v
yield it
def extract_info(it):
if not isinstance(it, dict):
return None
keys = ["id", "realName", "nickname", "email"]
res = []
for k in keys:
if not k in it:
return None
res.append((k, it[k]))
return tuple(res)
infos = set()
for url in urls:
sort_type = ''
m_page = 1
m_totalPage = 1
while m_page <= m_totalPage:
r = requests.get(url, params={'sort': 'desc', 'page_size': 20, 'page': m_page, 'sort_type': sort_type})
rj = r.json()
m_totalPage = rj['data']['m_totalPage']
for it in walk(rj):
it = extract_info(it)
if it is not None:
infos.add(it)
m_page += 1
with open('infos.json', 'w', encoding='utf_8') as fd:
json.dump(list(map(OrderedDict, infos)), fd, ensure_ascii=False, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment