Last active
November 12, 2023 23:43
-
-
Save farzadhallaji/bbe5ac054e9f2e552d282233fc8b6aab to your computer and use it in GitHub Desktop.
convert list of words to html file to memrize it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
"""vocab.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1GNxF9QyJc8hL3KgVKB8t4GPgH8sGNTci | |
""" | |
# ! pip install google-translate-for-goldendict | |
class Token: | |
""" | |
https://www.52pojie.cn/thread-707169-1-1.html | |
https://www.jianshu.com/p/af74f0719267 | |
""" | |
def __init__(self, tkk): | |
self.tkk = tkk | |
def calculate_token(self, text): | |
if self.tkk == "": | |
""" | |
422392.71207223 | |
406644.3293161072 | |
431767.4042228602 | |
440498.1287591069 | |
""" | |
self.tkk = "440498.1287591069" | |
[first_seed, second_seed] = self.tkk.split(".") | |
try: | |
d = bytearray(text.encode('UTF-8')) | |
except UnicodeDecodeError: | |
d = bytearray(text) | |
a = int(first_seed) | |
for value in d: | |
a += value | |
a = self._work_token(a, "+-a^+6") | |
a = self._work_token(a, "+-3^+b+-f") | |
a ^= int(second_seed) | |
if 0 > a: | |
a = (a & 2147483647) + 2147483648 | |
a %= 1E6 | |
a = int(a) | |
return str(a) + "." + str(a ^ int(first_seed)) | |
@staticmethod | |
def _rshift(val, n): | |
return val >> n if val >= 0 else (val + 0x100000000) >> n | |
def _work_token(self, a, seed): | |
for i in range(0, len(seed) - 2, 3): | |
char = seed[i + 2] | |
d = ord(char[0]) - 87 if char >= "a" else int(char) | |
d = self._rshift(a, d) if seed[i + 1] == "+" else a << d | |
a = a + d & 4294967295 if seed[i] == "+" else a ^ d | |
return a | |
import requests | |
import sys | |
import urllib.parse | |
import asyncio | |
from functools import partial | |
import re | |
import argparse | |
class GoogleTranslate(object): | |
def __init__(self, args): | |
self.http_host = args.host | |
self.http_proxy = args.proxy | |
self.synonyms_en = args.synonyms | |
self.definitions_en = args.definitions | |
self.examples_en = args.examples | |
self.result_code = 'utf-8' if args.type == 'html' else sys.stdout.encoding | |
# sys.stdout.reconfigure(encoding=self.result_code) if args.type == 'html' else None | |
self.result_code = 'utf-8' if args.type == 'html' else sys.stdout.encoding | |
if hasattr(sys.stdout, 'reconfigure'): | |
sys.stdout.reconfigure(encoding=self.result_code) | |
self.alternative_language = args.alternative | |
self.result_type = args.type | |
self.target_language = '' | |
self.query_string = '' | |
self.result = '' | |
def get_url(self, tl, qry, tk): | |
url = f'https://{self.http_host}/translate_a/single?client=gtx&sl=auto&tl={tl}&hl=en&dt=at&dt=bd&dt=ex&' \ | |
f'dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=sos&dt=ss&dt=t&ssel=0&tsel=0&kc=1&tk={tk}&q={qry}' | |
return url | |
def get_synonym(self, resp): | |
if resp[1]: | |
self.result += '\n=========\n' | |
self.result += f'Translations of {self.query_string}\n' | |
for x in resp[1]: | |
self.result += f'# {x[0][0]}.\n' | |
for y in x[2]: | |
self.result += f'{y[0]}: {", ".join(y[1])}\n' | |
def get_result(self, resp): | |
for x in resp[0]: | |
self.result += x[0] if x[0] else '' | |
self.result += '\n' | |
def get_definitions(self, resp): | |
self.result += '\n=========\n' | |
self.result += f'0_0: Definitions of {self.query_string}\n' | |
for x in resp[12]: | |
self.result += f'# {x[0] if x[0] else ""}.\n' | |
for y in x[1]: | |
self.result += f' - {y[0]}\n' | |
self.result += f' * {y[2]}\n' if len(y) >= 3 else '' | |
def get_examples(self, resp): | |
self.result += '\n=========\n' | |
self.result += f'0_0: Examples of {self.query_string}\n' | |
for x in resp[13][0]: | |
self.result += f' * {x[0]}\n' | |
def get_synonyms_en(self, resp): | |
self.result += '\n=========\n' | |
self.result += f'0_0: Synonyms of {self.query_string}\n' | |
for idx, x in enumerate(resp[11]): | |
self.result += f'# {x[0]}.\n' | |
for y in x[1]: | |
self.result += ', '.join(y[0]) + '\n' | |
def get_resp(self, url): | |
proxies = { | |
'http': f'http://{self.http_proxy.strip() if self.http_proxy.strip() else "127.0.0.1:1080"}', | |
'https': f'http://{self.http_proxy.strip() if self.http_proxy.strip() else "127.0.0.1:1080"}' | |
} | |
base_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0'} | |
session = requests.Session() | |
session.headers = base_headers | |
resp = session.get(url, proxies=proxies if self.http_proxy.strip() else None, timeout=5).json() | |
return resp | |
def result_to_html(self): | |
css_text = """\ | |
<style type="text/css"> | |
p {white-space: pre-wrap;} | |
pos {color: #afb7f3;} | |
example {color: #008080;} | |
gray {color: #606060;} | |
</style>""" | |
self.result = re.sub(r'(?m)^(#.*)', r'<pos><b>\1</b></pos>', self.result) | |
self.result = re.sub(r'(?m)^([*].*)', r'<example>\1</example>', self.result) | |
self.result = re.sub(r'(?m)^(0_0:.*?of)(.*)', r'<gray>\1</gray>\2', self.result) | |
match = re.compile(rf"(?m)^({re.escape('^_^')}: Translate)(.*)(To)(.*)") | |
self.result = match.sub(r'<gray>\1</gray>\2<gray>\3</gray>\4', self.result) | |
self.result = f'<html>\n<head>\n{css_text}\n</head>\n<body>\n<p>{self.result}</p>\n</body>\n</html>' | |
async def get_translation(self, target_language, query_string, tkk=''): | |
self.result = '' | |
self.target_language = target_language | |
self.query_string = query_string | |
tk = Token(tkk).calculate_token(self.query_string) | |
if len(self.query_string) > 5000: | |
return '(╯‵□′)╯︵┻━┻: Maximum characters exceeded...' | |
parse_query = urllib.parse.quote_plus(self.query_string) | |
url = self.get_url(self.target_language, parse_query, tk) | |
url_alt = self.get_url(self.alternative_language, parse_query, tk) | |
try: | |
loop = asyncio.get_running_loop() | |
resp = loop.run_in_executor(None, partial(self.get_resp, url)) | |
resp_alt = loop.run_in_executor(None, partial(self.get_resp, url_alt)) | |
[resp, resp_alt] = await asyncio.gather(resp, resp_alt) | |
if resp[2] == self.target_language: | |
self.result += f'Translate {resp[2]} To {self.alternative_language}\n' | |
self.get_result(resp) | |
self.result += '---------\n' | |
self.get_result(resp_alt) | |
self.get_synonym(resp_alt) | |
else: | |
self.result += f'Translate {resp[2]} To {self.target_language}\n{self.query_string}\n---------\n' | |
self.get_result(resp) | |
self.get_synonym(resp) | |
if self.synonyms_en and len(resp) >= 12 and resp[11]: | |
self.get_synonyms_en(resp) | |
if self.definitions_en and len(resp) >= 13 and resp[12]: | |
self.get_definitions(resp) | |
if self.examples_en and len(resp) >= 14 and resp[13]: | |
self.get_examples(resp) | |
if self.result_type == 'html': | |
self.result_to_html() | |
else: | |
self.result = self.result.replace('<b>', '').replace('</b>', '') | |
return self.result.encode(self.result_code, 'ignore').decode(self.result_code) | |
except requests.exceptions.ReadTimeout: | |
return '╰(‵□′)╯: ReadTimeout...' | |
except requests.exceptions.ProxyError: | |
return '(╯‵□′)╯︵┻━┻: ProxyError...' | |
except Exception as e: | |
return f'Errrrrrrrrror: {e}' | |
def get_args(): | |
default = '(default: %(default)s)' | |
parser = argparse.ArgumentParser() | |
parser.add_argument('target', type=str, default='en', help=f'target language, eg: zh-CN, {default}') | |
parser.add_argument('query', type=str, default='', help='query string') | |
parser.add_argument('-s', dest='host', type=str, default='translate.googleapis.com', help=f'host name {default}') | |
parser.add_argument('-p', dest='proxy', type=str, default='', help='proxy server (eg: 127.0.0.1:1080)') | |
parser.add_argument('-a', dest='alternative', type=str, default='en', help=f'alternative language {default}') | |
parser.add_argument('-r', dest='type', type=str, default='html', help=f'result type {default}') | |
parser.add_argument('-k', dest='tkk', type=str, default='', help='tkk') | |
parser.add_argument('-m', dest='synonyms', action='store_true', help='show synonyms') | |
parser.add_argument('-d', dest='definitions', action='store_true', help='show definitions') | |
parser.add_argument('-e', dest='examples', action='store_true', help='show examples') | |
return parser.parse_args() | |
from collections import namedtuple | |
def create_args(target='en', query='', host='translate.googleapis.com', proxy='', alternative='en', type='html', tkk='', synonyms=False, definitions=False, examples=False): | |
Args = namedtuple('Args', ['target', 'query', 'host', 'proxy', 'alternative', 'type', 'tkk', 'synonyms', 'definitions', 'examples']) | |
return Args(target, query, host, proxy, alternative, type, tkk, synonyms, definitions, examples) | |
async def main_async(args=None): | |
args = args if args else get_args() | |
g_trans = GoogleTranslate(args) | |
trans = await g_trans.get_translation(args.target, args.query, tkk=args.tkk) | |
return trans | |
# import os | |
# os.listdir() | |
words = [] | |
with open('./4000-1-2') as f: | |
words = f.readlines() | |
words = [word.replace('\n','') for word in words] | |
words = sorted(list(set(words))) | |
word_htmls = {} | |
for word in words: | |
result = await main_async(create_args(query=word, target='fa')) | |
word_htmls[word]=result | |
combined_html = ''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<style> | |
body { | |
background-color: #1e1e1e; | |
color: #cfcfcf; | |
font-family: Arial, sans-serif; | |
margin: 0; | |
padding: 0; | |
font-size: 16px; /* Default font size for desktop */ | |
} | |
.word-container { | |
border: 1px solid #a33; | |
background-color: #2a2a2a; | |
margin: 15px; | |
padding: 5px; | |
cursor: pointer; | |
position: relative; | |
} | |
.word-title { | |
flex-grow: 1; | |
cursor: pointer; | |
} | |
.mark-read-button { | |
background-color: #3a3a3a; | |
color: white; | |
border: 1px solid #a33; | |
border-radius: 5px; | |
position: absolute; | |
top: 50%; | |
right: 10px; | |
transform: translateY(-50%); | |
padding: 5px 10px; | |
cursor: pointer; | |
} | |
.content { | |
display: none; | |
color: #f3f3f3; | |
clear: both; | |
padding-top: 10px; | |
} | |
.separator { | |
width: 100%; | |
border-top: 2px solid #a33; | |
margin: 20px 0; | |
} | |
.toggle { | |
cursor: pointer; | |
color: #a33; | |
} | |
.fab { | |
padding: 10px 15px; | |
color: white; | |
border: none; | |
border-radius: 5px; | |
cursor: pointer; | |
position: fixed; | |
bottom: 20px; | |
z-index: 1000; | |
} | |
#collapseAllBtn { | |
background-color: #a33; | |
right: 20px; | |
} | |
#scrollToSeparatorBtn { | |
background-color: #3a3a3a; | |
right: 140px; | |
} | |
#resetBtn { | |
background-color: #4a4a4a; | |
left: 20px; | |
} | |
/* Responsive font size for smaller screens */ | |
@media screen and (max-width: 600px) { | |
body { | |
font-size: 18px; /* Slightly larger font size for mobile */ | |
} | |
.word-container { | |
margin: 10px 5px; | |
padding: 3px; | |
} | |
/* Additional styles for smaller screens */ | |
} | |
</style> | |
</head> | |
<body> | |
<div id="aboveLine"> | |
<!-- Words above the line will go here --> | |
</div> | |
<div class="separator"></div> | |
<div id="belowLine"> | |
<!-- Words below the line will go here initially --> | |
</div> | |
<button id="collapseAllBtn" class="fab" onclick="collapseAll()">Collapse All</button> | |
<button id="scrollToSeparatorBtn" class="fab" onclick="scrollToSeparator()">Scroll to Separator</button> | |
<button id="resetBtn" class="fab" onclick="resetLayout()">Reset Changes</button> | |
<script> | |
function togglePosition(id) { | |
var element = document.getElementById(id); | |
var nextElementToExpand = element.nextElementSibling; | |
var aboveLine = document.getElementById('aboveLine'); | |
var belowLine = document.getElementById('belowLine'); | |
var movingToAbove = belowLine.contains(element); // Check if the element is in belowLine | |
var movingToBelow = aboveLine.contains(element); // Check if the element is in aboveLine | |
// Insert the element in the target parent while maintaining order | |
var targetParent = movingToAbove ? aboveLine : belowLine; | |
var children = Array.from(targetParent.children); | |
var index = children.findIndex(child => parseInt(child.id.replace('word', '')) > parseInt(id.replace('word', ''))); | |
if (index === -1) { | |
targetParent.appendChild(element); | |
} else { | |
targetParent.insertBefore(element, children[index]); | |
} | |
// Collapse all contents in both aboveLine and belowLine | |
document.querySelectorAll('.word-container .content').forEach(function(content) { | |
content.style.display = 'none'; | |
}); | |
//automatically expand the first word in belowLine | |
// Expand the content of the word immediately following the moved element in its new container | |
if (nextElementToExpand) { | |
var contentToExpand = nextElementToExpand.querySelector('.content'); | |
contentToExpand.style.display = 'block'; | |
scrollToElement(nextElementToExpand); | |
} | |
saveState(); | |
} | |
function toggleContent(id) { | |
var allContents = document.querySelectorAll('.word-container .content'); | |
var targetElement = null; | |
allContents.forEach(function(content) { | |
if (content.parentNode.id !== id) { | |
content.style.display = 'none'; // Collapse all other contents | |
} else { | |
targetElement = content.parentNode; // Target element to scroll to | |
} | |
}); | |
var content = document.getElementById(id).querySelector('.content'); | |
if (content.style.display === 'none') { | |
content.style.display = 'block'; // Expand the content | |
scrollToElement(targetElement); // Scroll to the expanded element | |
} else { | |
content.style.display = 'none'; | |
} | |
} | |
function scrollToElement(element) { | |
if (element) { | |
element.scrollIntoView({ | |
behavior: 'smooth', | |
block: 'start' | |
}); | |
} | |
} | |
function getLocalStorageKey() { | |
// Use window.location.pathname to get the file name or a unique part of the URL | |
// This assumes your HTML files have different names or paths | |
return 'wordState-' + window.location.pathname; | |
} | |
function saveState() { | |
var state = []; | |
document.querySelectorAll('.word-container').forEach(function(container) { | |
var isAbove = document.getElementById('aboveLine').contains(container); | |
var isCollapsed = container.querySelector('.content').style.display === 'none'; | |
state.push({id: container.id, isAbove: isAbove, isCollapsed: isCollapsed}); | |
}); | |
localStorage.setItem(getLocalStorageKey(), JSON.stringify(state)); | |
} | |
function loadState() { | |
var state = JSON.parse(localStorage.getItem(getLocalStorageKey())); | |
if (!state) { | |
// No saved state, save the current state as the initial state | |
saveInitialState(); | |
state = JSON.parse(localStorage.getItem(getLocalStorageKey())); // Reload the state after saving the initial state | |
} | |
// Load the saved state | |
applyState(state); | |
} | |
function saveInitialState() { | |
var initialState = []; | |
document.querySelectorAll('.word-container').forEach(function(container) { | |
initialState.push({ | |
id: container.id, | |
isAbove: false, // Assuming all words are initially below | |
isCollapsed: true // Assuming all contents are initially collapsed | |
}); | |
}); | |
localStorage.setItem(getLocalStorageKey(), JSON.stringify(initialState)); | |
} | |
function applyState(state) { | |
var aboveLine = document.getElementById('aboveLine'); | |
var belowLine = document.getElementById('belowLine'); | |
state.forEach(function(item) { | |
var element = document.getElementById(item.id); | |
if (element) { | |
if (item.isAbove) { | |
aboveLine.appendChild(element); | |
} else { | |
belowLine.appendChild(element); | |
} | |
var contentDisplay = item.isCollapsed ? 'none' : 'block'; | |
element.querySelector('.content').style.display = contentDisplay; | |
} | |
}); | |
} | |
window.onload = function() { | |
loadState(); | |
} | |
function collapseAll() { | |
document.querySelectorAll('.word-container .content').forEach(function(content) { | |
content.style.display = 'none'; | |
}); | |
} | |
function scrollToSeparator() { | |
document.querySelector('.separator').scrollIntoView({ | |
behavior: 'smooth' | |
}); | |
} | |
function resetLayout() { | |
// Retrieve and apply the initial state | |
var initialState = JSON.parse(localStorage.getItem(getLocalStorageKey())); | |
if (initialState) { | |
applyState(initialState); | |
} | |
// Clear saved state in localStorage | |
localStorage.removeItem(getLocalStorageKey()); | |
// Reload the page to reinitialize event handlers and state | |
// Wait a brief moment before reloading the page | |
setTimeout(function() { | |
window.location.reload(); | |
}, 100); // Wait 100 milliseconds | |
} | |
document.getElementById('collapseAllBtn').onclick = collapseAll; | |
</script> | |
</body> | |
</html> | |
''' | |
# Append each word's HTML to the belowLine div initially | |
for index, (word, html) in enumerate(word_htmls.items(), start=1): | |
combined_html += f''' | |
<div id="word{index}" class="word-container"> | |
<div class="word-title" onclick="toggleContent('word{index}')"><span class="toggle">></span> {word}</div> | |
<button class="mark-read-button" onclick="togglePosition('word{index}')">mark</button> | |
<div class="content" onclick="toggleContent('word{index}')">{html}</div> | |
</div> | |
''' | |
# Add the closing tags for belowLine, body, and html | |
combined_html += '\n</div>\n</body>\n</html>' | |
# Write to a file | |
with open("combined_words.html", "w", encoding="utf-8") as file: | |
file.write(combined_html) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
pip install google-translate-for-goldendict