Last active
April 1, 2018 14:54
-
-
Save Xmoe/bc9f1e10a15a1a46297f02515a9381f2 to your computer and use it in GitHub Desktop.
Simulate the login process to eKVV in python to obtain cookies.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests_html | |
import json | |
def login(username=None, password=None): | |
""" | |
:param username: | |
:param password: | |
:return: A session with all the necessary context ( cookies etc ) to make further requests to the server | |
By GETting the jsession_id_url first, we are given a jsession cookie. I am not sure yet, whether it's needed | |
Afterwards we GET the login page, which redirects us to itself, but with additional header data. | |
Hidden inside the login form is a one-time secret token called "nnc" which needs to be submitted as well as the credentials. | |
Therefore we need to extract the value and add it to our payload. This payload is then POSTed back to the same page. | |
The response from the server is a file called trust from which we have to extract ALL its form data to POST them to the same page. | |
Doing so completes the login and yields us the mighty SP_AUTH cookie, | |
""" | |
if username is None or password is None: | |
print("Cannot login without credentials!") | |
return | |
jsession_id_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Benvw_Loggedin.jsp" | |
login_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/benvw_Login_MatrikelAct" | |
with requests_html.HTMLSession() as session: | |
r = session.get(jsession_id_url) | |
r = session.get(login_url) | |
# after we have been redirected to the correct url, update the path | |
login_url = r.url | |
# find the first element with id "nnc" and retrieve it's value | |
secret_token = r.html.find("#nnc")[0].attrs["value"] | |
# construct the payload for POSTing | |
payload = {"username": username, | |
"password": password, | |
"nnc": secret_token} | |
r = session.post(login_url, data=payload) | |
# get all the input fields and values which authenticate us, so we can POST them | |
submit_url, payload = form_crawler(r) | |
r = session.post(submit_url, data=payload) | |
return session | |
def form_crawler(response, selector="form"): | |
# take the first form which fits the selector | |
form = response.html.find(selector, first=True) | |
# find all input fields inside this form | |
input_elements = form.find("input") | |
# construct a dictionary of {name: value} for all inputs | |
input_fields = {elem.attrs["name"]: elem.attrs["value"] for elem in input_elements} | |
# if the form has no action tag, the the submit_url to the page itself | |
submit_url = response.url | |
# however if there is an action tag, we have to POST our data to that URL | |
try: | |
submit_url = form.attrs["action"] | |
except KeyError as e: | |
raise e | |
return (submit_url, input_fields) | |
if __name__ == "__main__": | |
with open("login_data.json", "r") as file: | |
credentials = json.loads(file.read()) | |
session = login(**credentials) | |
# here you can do whatever you want | |
other_url = "https://ekvv.uni-bielefeld.de/kvv_publ/publ/Stundenplan_Liste_Kompakt.jsp" | |
r = session.get(other_url) | |
print(r.text) | |
print(session.cookies) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment