Last active
February 12, 2019 12:03
-
-
Save yoavst/923a4e50638c9af2b802eea7f1161f21 to your computer and use it in GitHub Desktop.
Download tests from TAU tests storage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Download tests from the tests bank, run with -help for parameters | |
Dependencies: | |
pip install pyquery | |
pip install requests | |
Usage: python3 Downloader.py -username USERNAME -id USER_ID -password PASS_IN_BASE64 -faculty FACULTY -department DEPARTMENT -filters FILTERS_FILE.txt | |
GUI usage: python3 Downloader.py gui | |
Use it only for downloading tests for self-usage. Do not use it for piracy. | |
Do Follow the site's terms of service: https://tinyurl.com/ya3qjbbn | |
TAU-Downloader Copyright (C) 2018 Yoav Sternberg | |
This program is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program. If not, see <http://www.gnu.org/licenses | |
""" | |
import json | |
import string | |
import os | |
import requests | |
import base64 | |
import sys | |
import argparse | |
import logging | |
from pyquery import PyQuery as pq | |
from collections import namedtuple | |
import urllib3 | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
BASE_HEADERS = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0", | |
"Accept": "application/json, text/javascript, */*; q=0.01", | |
} | |
DEFAULT_DOWNLOAD_DIR = "Downloads" | |
LIST_OF_IDS = """ | |
Options for faculty: | |
<option value="all">בחר</option> | |
<option value="3294">ביהס פורטר ללימודי סביבה</option> | |
<option value="2874">רפואה ומקצועות הבריאות</option> | |
<option value="32">מדעי החברה</option> | |
<option value="229" >מדעים מדויקים</option> | |
<option value="62">ביהס לעבודה סוציאלית</option> | |
<option value="63">היחידה ללימודי שפות</option> | |
<option value="60">משפטים</option> | |
<option value="61">ביהס לחינוך</option> | |
<option value="52">אמנויות</option> | |
<option value="53">מדעי החיים</option> | |
<option value="55">מדעי הרוח</option> | |
<option value="44">ניהול</option> | |
<option value="42">הנדסה</option> | |
Options for department for Exact Science: | |
<option value="all">בחר</option> | |
<option value="2539">גיאופיזיקה ומדעים פלנטריים</option> | |
<option value="3307">מתמטיקה וסטטיסטיקה</option> | |
<option value="3303">פיזיקה</option> | |
<option value="289">מדעי המחשב</option> | |
<option value="231">כימיה</option> | |
<option value="230">פיזיקה ואסטרונומיה</option> | |
""" | |
def flatten(listOfLists): | |
items = [] | |
for lst in listOfLists: | |
items.extend(lst) | |
return items | |
def parse_command_line_args(): | |
if len(sys.argv) == 2 and sys.argv[1] == '-list': | |
try: | |
print(LIST_OF_IDS) | |
except UnicodeEncodeError: | |
print("Bad encoding, see https://pastebin.com/raw/sGGkZnMe for list") | |
exit(0) | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--list", help="Show list of ids for faculties and departments") | |
# parser.add_argument("gui", help="Open the experimental GUI") | |
parser.add_argument("--verbose", "-v", action="store_true", help="Increase log level for the program") | |
parser.add_argument("-dir", help="Directory for storing downloaded files", default=DEFAULT_DOWNLOAD_DIR) | |
requiredNamed = parser.add_argument_group('required named arguments') | |
requiredNamed.add_argument("-username", help="TAU username for login", type=str, required=True) | |
requiredNamed.add_argument("-id", help="TAU user identity number for login", type=str, required=True) | |
requiredNamed.add_argument("-password", help="TAU password in BASE64 format for login", type=str, required=True) | |
requiredNamed.add_argument("-faculty", help="Faculty id for search", type=int, required=True) | |
requiredNamed.add_argument("-department", help="Department id for search", type=int, required=True) | |
requiredNamed.add_argument("-filters", help="path for file with filters each in different line", type=str, | |
required=True) | |
return parser.parse_args() | |
def get_filters(args): | |
with open(args.filters, 'r', encoding='UTF-8') as f: | |
return [filter_text.replace('\r', '').replace('\n', '') for filter_text in f.readlines()] | |
def connect(username, id_number, password): | |
session = requests.Session() | |
request = session.post("https://store.student.co.il/ajax/student-login", | |
data={ | |
"student_id": id_number, | |
"student_username": username, | |
"student_password": password | |
}, headers=dict(BASE_HEADERS).update({"Content-Type": "application/x-www-form-urlencoded"}), | |
verify=False) | |
logging.info("Login request: " + str(request)) | |
data = request.json() | |
logging.info("Login request content: " + str(data)) | |
if data['isLogedIn']: | |
return session | |
else: | |
return None | |
def get_build_id(session): | |
request = session.get('https://store.student.co.il/en?dest_form=tests_popup_wrapper', headers=dict(BASE_HEADERS)) | |
logging.info("Build ID request: " + str(request)) | |
return pq(request.text)('#ajax_enabled_select_tests')('input[name=form_build_id]').attr('value') | |
def _request_course(session, build_id, faculty, department): | |
request = session.post('https://store.student.co.il/en/system/ajax', data={ | |
"faculty": faculty if faculty else "all", | |
"department": department if department else "all", | |
"course": "all", | |
"text_type": "tests", | |
"form_build_id": build_id, | |
"form_id": "customization_text_popup_search_form_tests", | |
"_triggering_element_name": "department" | |
}) | |
return request.json() | |
def _parse_courses(content): | |
html = pq(content[1]["data"]) | |
return [(it.text, it.attrib['value']) for it in html('[name=course]')('option')] | |
def get_courses(session, faculty, department): | |
build_id = get_build_id(session) | |
faculty_request = _request_course(session, build_id, faculty, None) | |
if not department: | |
return _parse_courses(faculty_request) | |
# In order to request a specific department, one cannot ask directly without asking for the right faculty before. | |
content = _request_course(session, build_id, faculty, department) | |
return _parse_courses(content) | |
def get_tests(courses, faculty, department): | |
return flatten(get_tests_for_course(course, faculty, department) for course in courses) | |
def test_url(course, faculty, department): | |
faculty = faculty if faculty else "all" | |
department = department if department else "all" | |
return "https://store.student.co.il/en/text/tests/%s/%s/%s/all" % (faculty, department, course) | |
TestInfo = namedtuple('TestInfo', ['name', 'lecturer', 'year', 'semester', 'term', 'link']) | |
def get_tests_for_course(course, faculty, department): | |
request = requests.get(test_url(course, faculty, department), headers=BASE_HEADERS, verify=False) | |
html = pq(request.text) | |
links = _parse_links_page(html) | |
pager = html('.pager') | |
if len(pager): | |
for item in pager('.pager-item'): | |
for link in pq(item)('a'): | |
href = link.attrib['href'] | |
r = requests.get("https://store.student.co.il" + href, headers=BASE_HEADERS, verify=False) | |
links.extend(_parse_links_page(pq(r.text))) | |
return links | |
def or_none(text): | |
if not text: | |
return None | |
return text.strip() | |
def _parse_links_page(html): | |
elements = html('.views-table') | |
if not len(elements): | |
return [] | |
tests = [] | |
for element in elements('tbody')('tr'): | |
if not len(pq(element)('a')): | |
continue | |
tests.append(TestInfo(element[0].text.strip(), or_none(element[1].text), | |
or_none(element[2].text), or_none(element[3].text), or_none(element[4].text), | |
pq(element[5])('a').attr('href'))) | |
return tests | |
def format_filename(proposed_filename): | |
valid_chars = "-_.() %s%s%s" % (string.ascii_letters, string.digits, "פםןוטארקףךלחיעכגדשץתצמנהבסז") | |
filename = ''.join(c for c in proposed_filename if c in valid_chars) | |
filename = filename.replace(' ', '_') | |
return filename | |
# Experimental | |
def gui(): | |
import tkinter | |
import tkinter.filedialog | |
def download(): | |
# global faculty_entry, department_entry, username_entry, id_entry, password_entry, filters_entry | |
directory = tkinter.filedialog.askdirectory() | |
if directory != "": | |
temp = os.path.join(directory, '__temp_filters_file.txt') | |
with open(temp, 'wb') as f: | |
f.write(filters_entry.get("1.0", tkinter.END).encode('utf-8')) | |
args = { | |
'filters': f.name, | |
'dir': directory, | |
'username': username_entry.get(), | |
'id': id_entry.get(), | |
'password': base64.b64encode(password_entry.get().encode('utf-8')).decode('utf-8'), | |
'department': department_entry.get(), | |
'faculty': faculty_entry.get(), | |
'verbose': False | |
} | |
main(namedtuple('Args', args.keys())(*args.values())) | |
os.remove(temp) | |
master = tkinter.Tk() | |
master.title('TAU tests bank downloader') | |
tkinter.Label(master, text="Faculty").grid(row=0) | |
tkinter.Label(master, text="Department").grid(row=1) | |
tkinter.Label(master, text="Username").grid(row=2, pady=(20, 0)) | |
tkinter.Label(master, text="Id").grid(row=3) | |
tkinter.Label(master, text="Password").grid(row=4) | |
tkinter.Label(master, text="Filters").grid(row=5, pady=(20, 0)) | |
faculty_entry = tkinter.Entry(master) | |
faculty_entry.grid(row=0, column=1) | |
department_entry = tkinter.Entry(master) | |
department_entry.grid(row=1, column=1) | |
username_entry = tkinter.Entry(master) | |
username_entry.grid(row=2, column=1, pady=(20, 0)) | |
id_entry = tkinter.Entry(master) | |
id_entry.grid(row=3, column=1) | |
password_entry = tkinter.Entry(master, show="*") | |
password_entry.grid(row=4, column=1) | |
filters_entry = tkinter.Text(width=40, height=5) | |
filters_entry.tag_configure('tag-right', justify='right') | |
filters_entry.grid(row=5, column=1, pady=(20, 0)) | |
tkinter.Button(master, text="Download", command=download).grid(row=6) | |
tkinter.mainloop() | |
def main(args=None): | |
if not args: | |
if len(sys.argv) == 2 and sys.argv[1] == 'gui': | |
gui() | |
return | |
args = parse_command_line_args() | |
filters = get_filters(args) | |
if not len(filters): | |
print("error: filters not provided via stdin. Abort.") | |
sys.exit(1) | |
print("Applying for", len(filters), "filters...") | |
if args.verbose: | |
logging.basicConfig(level=logging.INFO) | |
session = connect(args.username, args.id, args.password) | |
if not session: | |
print("Error: Login info is incorrect") | |
sys.exit(1) | |
courses = get_courses(session, args.faculty, args.department) | |
print("There are", len(courses), "courses on list") | |
filtered_courses = [] | |
for course in courses: | |
for keyword in filters: | |
if keyword in course[0]: | |
filtered_courses.append(course[1]) | |
break | |
print("But only", len(filtered_courses), "after the filter") | |
tests = get_tests(filtered_courses, args.faculty, args.department) | |
print(len(tests), "tests found, starts downloading them...") | |
with open('urls.json', 'w') as file: | |
# noinspection PyProtectedMember | |
json.dump([test._asdict() for test in tests], file) | |
for i, test in enumerate(tests): | |
r = requests.get(test.link, verify=False) | |
if not os.path.exists(args.dir): | |
os.makedirs(args.dir) | |
filename = os.path.join(args.dir, format_filename( | |
test.name + " " + test.lecturer + " " + test.year + " " + test.semester + " " + test.term) + "_" + str( | |
i) + '.pdf') | |
with open(filename.encode('utf-8'), 'wb') as f: | |
f.write(r.content) | |
print('Done! have a good night.') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment