-
-
Save zwpaper/8034376 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
import urwid | |
import requests | |
import re | |
import time | |
from bs4 import BeautifulSoup, NavigableString | |
import lxml.html | |
USER = '' | |
PASS = '' | |
KEY_ENTER = ['enter', 'right'] | |
KEY_BACK = ['backspace', 'left'] | |
KEY_REFRESH = ['r', 'f5'] | |
class AttributeDict(dict): | |
__getattr__ = dict.__getitem__ | |
__setattr__ = dict.__setitem__ | |
class Forum: | |
HP_URL = 'http://www.hi-pda.com/forum/' | |
LOGIN_URL = HP_URL + 'logging.php?action=login' | |
LOGIN_SUBMIT_URL = HP_URL + 'logging.php?action=login&loginsubmit=yes' | |
DISPLAY_URL = HP_URL + 'forumdisplay.php?fid=2' | |
THREAD_URL = HP_URL + 'viewthread.php?tid=%d&page=%d' | |
THREAD_RE = re.compile("normalthread_([0-9]+)") | |
USER_RE = re.compile("space.php\?uid=([0-9]+)") | |
POST_RE = re.compile("post_([0-9]+)") | |
POST_DATE_RE = re.compile(u'发表于 ([\w\W]+)') | |
THREAD_DATE_FORMAT = '%Y-%m-%d' | |
REPLY_DATE_FORMAT = '%Y-%m-%d %H:%M' | |
def __init__(self, username, password): | |
self.username = username | |
self.password = password | |
def login(self): | |
r = requests.get(Forum.LOGIN_URL) | |
formhash = re.compile('<input\s*type="hidden"\s*name="formhash"\s*value="([\w\W]+?)"\s*\/>').search(r.text).group(1) | |
params = { | |
"formhash": formhash, | |
"loginfield": "username", | |
"loginsubmit": True, | |
"username": self.username, | |
"password": self.password, | |
"questionid": "0", | |
"answer": "", | |
"referer": "index.php", | |
} | |
r = requests.post(Forum.LOGIN_SUBMIT_URL, params=params) | |
if 'logging.php?action=login' in r.text: | |
return False | |
else: | |
self.cookies = r.cookies | |
return True | |
def display(self): | |
r = requests.get(Forum.DISPLAY_URL, cookies=self.cookies) | |
page = BeautifulSoup(r.content, "lxml", from_encoding="gb18030") | |
threads = [] | |
for thread in page.find_all("tbody", id=Forum.THREAD_RE): | |
title = thread.find("th", class_="subject") | |
author = thread.find("td", class_="author") | |
nums = thread.find("td", class_="nums") | |
lastpost = thread.find("td", class_="lastpost") | |
threads.append(AttributeDict({ | |
'title': title.a.text, | |
'id': int(Forum.THREAD_RE.match(thread['id']).group(1)), | |
'date': time.strptime(author.em.text, Forum.THREAD_DATE_FORMAT), | |
'reply': int(nums.strong.text), | |
'review': int(nums.em.text), | |
'lastpost': AttributeDict({ | |
'author': lastpost.cite.a.text, | |
'date': time.strptime(lastpost.em.a.text, Forum.REPLY_DATE_FORMAT), | |
}), | |
'author': AttributeDict({ | |
'id': int(Forum.USER_RE.match(author.cite.a['href']).group(1)), | |
'name': author.cite.a.text | |
}) | |
})) | |
return threads | |
def viewthread(self, tid, page=1): | |
r = requests.get(Forum.THREAD_URL % (tid, page), cookies=self.cookies) | |
page = BeautifulSoup(r.content, "lxml", from_encoding="gb18030") | |
html = lxml.html.fromstring(r.content) | |
if page.find("a", class_="next") is None: | |
hasNext = False | |
else: | |
hasNext = True | |
posts = [] | |
for post in page.find_all("div", id=Forum.POST_RE): | |
author = post.find("td", class_="postauthor") | |
content = post.find("td", class_="t_msgfont") | |
date = post.find("div", class_="authorinfo").em.text | |
date = time.strptime(Forum.POST_DATE_RE.match(date).group(1), Forum.REPLY_DATE_FORMAT) | |
msg = html.xpath("//div[@id='%s']//td[@class='t_msgfont']" % post['id']) | |
message = "" | |
if len(msg) == 0: | |
message = "作者被禁止或删除 内容自动屏蔽" | |
else: | |
msg = msg[0] | |
reply = None | |
if msg.text: | |
message = msg.text | |
for line in msg: | |
if line.tag == 'i' and 'pstatus' in line.attrib.values(): None #本帖最后XXX编辑 | |
elif line.tag == 'div' and 'quote' in line.attrib.values(): #引用 | |
rly = line.xpath(".//blockquote/font[@size='2']/a") | |
if len(rly) > 0: | |
match = re.compile("pid=([0-9]+)").search(rly[0].attrib['href']) | |
if match is not None: | |
reply = int(match.group(1)) | |
elif line.tag == 'img': None #图片 | |
elif line.tag == 'span' and 'id' in line.attrib: continue #下载 | |
elif line.tag == 'div' and 't_attach' in line.attrib: None #下载 | |
elif line.tag == 'strong' and line.text and u'回复' in line.text: #回复 | |
rly = line.xpath(".//a[@target='_blank']") | |
if len(rly) > 0: | |
reply = int(re.compile("pid=([0-9]+)").search(rly[0].attrib['href']).group(1)) | |
elif line.tag == 'script' or line.tag == 'embed': None #flash | |
elif line.tag == 'a': #链接 | |
message += line.text_content() + ' ' | |
elif line.tag == 'br': | |
if line.text: message += line.text | |
elif not line.tail: | |
message += line.text_content() + "\n" | |
if line.tail: | |
message += line.tail | |
posts.append(AttributeDict({ | |
'author': AttributeDict({ | |
'name': author.a.text, | |
'id': int(Forum.USER_RE.match(author.a['href']).group(1)), | |
}), | |
'content': message, | |
'id': int(Forum.POST_RE.match(post['id']).group(1)), | |
'date': time.strftime("%H:%M", date), | |
'reply': reply | |
})) | |
return hasNext, posts | |
class ThreadListWalker(urwid.ListWalker): | |
def __init__(self, threads): | |
self.focus = 0 | |
self.threads = [] | |
count = 0 | |
for thread in threads: | |
if count % 2 == 0: | |
style = "even" | |
else: | |
style = "odd" | |
self.threads.append(urwid.AttrMap(urwid.Columns([ | |
('weight', 1, urwid.Padding(urwid.Text(thread.author.name, wrap="clip"), align='right', width='pack')), | |
(1, urwid.SelectableIcon('|')), | |
('weight', 9, urwid.Text(thread.title, wrap="clip")) | |
], 1), style, 'focus')) | |
count += 1 | |
def set_focus(self, focus): | |
self.focus = focus | |
def get_focus(self): | |
return self.threads[self.focus], self.focus | |
def get_next(self, position): | |
if position < len(self.threads) - 1: | |
focus = position+1 | |
self.threads[focus]._selectable = True | |
return self.threads[focus], focus | |
else: | |
return None, None | |
def get_prev(self, position): | |
if position > 0: | |
focus = position-1 | |
return self.threads[focus], focus | |
else: | |
return None, None | |
class PostListWalker(urwid.ListWalker): | |
def __init__(self, tid, onNextPage): | |
self.page = 1 | |
self.count = 0 | |
self.focus = 0 | |
self.posts = [] | |
self.tid = tid | |
self.onNextPage = onNextPage | |
self.hasNext, posts = self.onNextPage(self.tid, self.page) | |
self.page += 1 | |
self.append(posts) | |
def append(self, posts): | |
for post in posts: | |
if self.count % 2 == 0: | |
style = "even" | |
else: | |
style = "odd" | |
self.count += 1 | |
self.posts.append(urwid.AttrMap(urwid.Columns([ | |
(3, urwid.Padding(urwid.Text(u"%3d" % self.count), align='right', width='pack')), | |
(1, urwid.Text('|')), | |
('weight', 1, urwid.Padding(urwid.Text(post.author.name, wrap="clip"), align='right', width='pack')), | |
(1, urwid.Text('|')), | |
('weight', 9, urwid.Text(post.content.strip())), | |
(5, urwid.Padding(urwid.Text(post.date, wrap="clip"), align='left', width='pack')) | |
], 1), style)) | |
def set_focus(self, focus): | |
self.focus = focus | |
def get_focus(self): | |
return self.posts[self.focus], self.focus | |
def get_next(self, position): | |
if position < len(self.posts) - 1: | |
focus = position+1 | |
return self.posts[focus], focus | |
elif self.hasNext: | |
self.hasNext, posts = self.onNextPage(self.tid, self.page) | |
self.page += 1 | |
self.append(posts) | |
focus = position+1 | |
return self.posts[focus], focus | |
else: | |
return None, None | |
def get_prev(self, position): | |
if position > 0: | |
focus = position-1 | |
return self.posts[focus], focus | |
else: | |
return None, None | |
class DTerm: | |
def __init__(self): | |
self.status = "login" | |
self.palette = [ | |
('header', 'white', 'dark gray'), | |
('footer', 'white', 'dark gray'), | |
('focus', 'white', 'black'), | |
('odd', 'black', 'white'), | |
('even', 'black', 'light gray'), | |
] | |
self.splash = urwid.BigText(u"Discovery", urwid.font.Thin6x6Font()) | |
self.splash = urwid.Padding(self.splash, 'center', width='clip') | |
self.splash = urwid.Filler(self.splash, 'middle') | |
self.home = urwid.Frame(self.splash) | |
self.body = self.splash | |
self.threads = [] | |
self.forum = Forum(USER, PASS) | |
self.header = urwid.AttrMap(urwid.Columns([ | |
('weight', 1, urwid.Padding(urwid.Text(u"作者"), align='right', width='pack')), | |
(1, urwid.Text('|')), | |
('weight', 9, urwid.Text(u"标题")), | |
], 1), 'header') | |
self.loop = urwid.MainLoop(self.home, self.palette, unhandled_input=self.onKeyDown, handle_mouse=False) | |
self.loop.set_alarm_in(0, self.onStart, self.home) | |
self.loop.run() | |
def refresh(self): | |
self.home.footer = urwid.AttrMap(urwid.Text(u"正在刷新..."), 'footer') | |
self.home.body = self.splash | |
self.loop.draw_screen() | |
self.threads = self.forum.display() | |
self.home.body = urwid.ListBox(ThreadListWalker(self.threads)) | |
self.home.footer = urwid.AttrMap(urwid.Text(u"刷新完成"), 'footer') | |
self.loop.draw_screen() | |
self.body = self.home.body | |
def viewthread(self, thread): | |
postlist = PostListWalker(thread.id, self.onNextPage) | |
self.home.header = urwid.AttrMap(urwid.Columns([ | |
(3, urwid.Padding(urwid.Text(u"#"), align='right', width='pack')), | |
(1, urwid.Text('|')), | |
('weight', 1, urwid.Padding(urwid.Text(u"作者"), align='right', width='pack')), | |
(1, urwid.Text('|')), | |
('weight', 9, urwid.Text(thread.title, wrap="clip")), | |
(5, urwid.Padding(urwid.Text(u"时间"), align='left', width='pack')), | |
], 1), 'header') | |
self.home.body = urwid.ListBox(postlist) | |
self.loop.draw_screen() | |
def onNextPage(self, tid, page): | |
self.home.footer = urwid.AttrMap(urwid.Text(u"正在载入..."), 'footer') | |
self.loop.draw_screen() | |
hasNext, posts = self.forum.viewthread(tid, page) | |
self.home.footer = urwid.AttrMap(urwid.Text(u"载入完成"), 'footer') | |
self.loop.draw_screen() | |
return hasNext, posts | |
def onStart(self, loop, home): | |
footer = urwid.AttrMap(urwid.Text(u"正在登陆..."), 'footer') | |
self.home.footer = footer | |
self.loop.draw_screen() | |
if self.forum.login(): | |
footer = urwid.AttrMap(urwid.Text(u"登陆成功"), 'footer') | |
self.home.header = self.header | |
self.refresh() | |
else: | |
footer = urwid.AttrMap(urwid.Text(u"登陆失败"), 'footer') | |
self.home.footer = footer | |
self.status = "home" | |
def onKeyDown(self, key): | |
changed = False | |
if self.status == "home" or self.status == "back": | |
if key in KEY_ENTER: | |
self.status = "post" | |
changed = True | |
elif key in KEY_REFRESH: | |
self.status = "home" | |
changed = True | |
elif self.status == "post": | |
if key in KEY_BACK: | |
self.status = "back" | |
changed = True | |
if changed: | |
if self.status == "home": | |
self.refresh() | |
elif self.status == "post": | |
widget, index = self.home.body.get_focus() | |
thread = self.threads[index] | |
self.viewthread(thread) | |
elif self.status == "back": | |
self.home.header = self.header | |
self.home.body = self.body | |
term = DTerm() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment