Created
December 26, 2021 07:53
-
-
Save fy0/1635173e74f04431b648f92af9911e35 to your computer and use it in GitHub Desktop.
幻塔抽卡概率统计 v1.0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 起因:https://nga.178.com/read.php?tid=30008586 | |
# 依赖库:pip install requests lxml cssselect | |
import re | |
import requests | |
from lxml import etree | |
headers = { | |
'authority': 'nga.178.com', | |
'pragma': 'no-cache', | |
'cache-control': 'no-cache', | |
'upgrade-insecure-requests': '1', | |
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36 OPR/74.0.3911.218', | |
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', | |
'sec-fetch-site': 'same-origin', | |
'sec-fetch-mode': 'navigate', | |
'sec-fetch-user': '?1', | |
'sec-fetch-dest': 'document', | |
'referer': 'https://nga.178.com/read.php?tid=30008586&_ff=836', | |
'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', | |
'cookie': '', # cookeis 写自己的 | |
} | |
# 单条数据处理 | |
def result_solve(x): | |
if x: | |
return list(map(int, x)) | |
def match_pattern1(txt): | |
try: | |
return re.search(r'抽卡[::]?(\d+?)[抽|发]\s?(\d+?)金', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'金[核河][::]?\s?(\d+?)[抽|发]\s?(\d+?)金?', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'金[::](\d+?)\s(\d+?)', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'金[核河](\d+?)[抽|发],(\d+?)金?', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'金\s[::]\s(\d+?)\s(\d+?)', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'(\d+?)[抽|发]\s?(\d+?)(?:金|ssr)', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'金核[::]?(\d+)\s?(\d+?)金?', txt).groups() | |
except: | |
pass | |
def match_pattern2(txt): | |
try: | |
return re.search(r'黑核[::]?(\d+?)页\s?(\d+?)金?', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑[::]?(\d+?)\s(\d+?)', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑核[::]?(\d+?)页出了\s?(\d+?)金?', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑核[::]?\s?(\d+)页\s?(\d+?)金?', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑\s[::]\s(\d+?)\s(\d+?)', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑[::]?\s?(\d+?)页(\d+?)', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑核(\d+?)页,(\d+?)金?', txt).groups() | |
except: | |
pass | |
try: | |
return re.search(r'黑核[::]?刚到(\d+?)页\s?(\d+?)金?', txt).groups() | |
except: | |
pass | |
def solve_item(item): | |
txt = '\n'.join(item.itertext()) | |
floor = int(item.get('id')[len('postcontent'):]) | |
draw_ssr = result_solve(match_pattern1(txt)) or [0, 0] | |
black_ssr = result_solve(match_pattern2(txt)) or [0, 0] | |
attach = bool(item.getparent().getparent().xpath("span[contains(@id, 'postattach')]")) | |
is_data = not (draw_ssr == [0, 0] and black_ssr == [0, 0]) | |
if is_data: | |
print(f'{floor}楼: {draw_ssr[0]}抽{draw_ssr[1]}金,{black_ssr[0]}页黑核{black_ssr[1]}金,图 {"有" if attach else "无"}') | |
return { 'floor': floor, 'draw_ssr': draw_ssr, 'black_ssr': black_ssr, 'attach': attach, 'is_data': is_data } | |
# 每页抓取 | |
def fetch_page(page): | |
params = ( | |
('tid', '30008586'), | |
('_ff', '836'), | |
('page', page) | |
) | |
resp = requests.get('https://nga.178.com/read.php', headers=headers, params=params) | |
page = etree.HTML(resp.text) | |
items = page.cssselect("span.postcontent") | |
data = [] | |
for i in items: | |
data.append(solve_item(i)) | |
return data | |
# main | |
all_data = [] | |
for i in range(1, 8): | |
all_data.extend(fetch_page(i)) | |
# 导出excel | |
import csv | |
import time | |
with open('幻塔概率统计-%s.csv' % time.strftime('%Y-%m-%d %H_%M_%S', time.localtime()) | |
, 'w', newline='', encoding='utf_8_sig') as f: | |
writer = csv.writer(f) # BOM utf-8, 这样excel不乱码 | |
writer.writerow(['楼层', '抽卡(金/红)', '抽卡SSR数', '黑核抽数', '黑核SSR数', '带图', '存在数据']) | |
for i in all_data: | |
writer.writerow([i['floor'], i['draw_ssr'][0], i['draw_ssr'][1], i['black_ssr'][0], i['black_ssr'][1], i['attach'], i['is_data']]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment