|
import json |
|
import re |
|
from ast import literal_eval |
|
from urllib import parse |
|
|
|
|
|
# Functions for read and write url params |
|
def url_loads(url_params: str) -> dict: |
|
parsed = parse.parse_qs(url_params) |
|
# parse_qs returns a dict of lists, |
|
# we don't allow url duplicates (e.g. ?a=1&a=2) |
|
# so we take the first item from each list |
|
return {k: literal_eval(v[0]) for k, v in parsed.items()} |
|
|
|
|
|
def url_dumps(params: dict) -> str: |
|
# we need to dump strings in extra quotes to distinguish them from other types |
|
# we also need to escape them to correctly parse them back |
|
# let's use json.dumps for that |
|
params = {k: json.dumps(v) if isinstance(v, str) else v for k, v in params.items()} |
|
return parse.urlencode(params, encoding="utf-8") |
|
|
|
|
|
# Dumping/extracting hidden params from html text (message.html_text) |
|
# The idea is to create a link with data encoded in the url params |
|
# and hide it in the message text using zero-width space |
|
|
|
# ZERO_WIDTH_SPACE = "\u3164" # Hangul Filler |
|
ZERO_WIDTH_SPACE = "" # "​" |
|
HIDDEN_LINK_TEMPLATE = f'<a href="{{url}}">{ZERO_WIDTH_SPACE}</a>' |
|
HIDDEN_CONTAINER = HIDDEN_LINK_TEMPLATE.format( |
|
url="http://t.me/x/hidden_container{params}" |
|
) |
|
|
|
TEMPLATE = "{hidden_params}{text}" |
|
|
|
REGEX = TEMPLATE.format( |
|
hidden_params=HIDDEN_CONTAINER.format(params=r"\?(?P<hidden_params>.+?)"), |
|
text=r"(?P<text>.*)", |
|
) |
|
|
|
|
|
def parse_hidden_params(html_text: str) -> tuple[dict, str]: |
|
m = re.fullmatch(REGEX, html_text, re.DOTALL) |
|
if not m: |
|
raise ValueError(f"Can't parse hidden container from text: {html_text}") |
|
|
|
task_dict = m.groupdict() |
|
if task_dict.get("hidden_params") is None: |
|
raise ValueError(f"Can't parse hidden params from text: {html_text}") |
|
|
|
if task_dict.get("text") is None: |
|
raise ValueError(f"Can't parse text from text: {html_text}") |
|
|
|
return url_loads(task_dict["hidden_params"]), task_dict["text"] |
|
|
|
|
|
def dump_hidden_params(hidden_params: dict, text: str) -> str: |
|
result = TEMPLATE.format( |
|
hidden_params=HIDDEN_CONTAINER.format(params=f"?{url_dumps(hidden_params)}"), |
|
text=text, |
|
) |
|
return result |
|
|
|
|
|
if __name__ == "__main__": |
|
data = { |
|
"x": "y", |
|
"z": 1, |
|
"y": True, |
|
"array": [1, 2, 3, True, "1", "1", {"nested": {"even": "more"}}], |
|
"strange": '"enquoted" string with special characters: \t\n\r\\', |
|
"dict": {"a": "b", "c": ["d"]}, |
|
} |
|
|
|
print("Data:", data, sep="\n", end="\n\n") |
|
|
|
text_with_data = dump_hidden_params( |
|
data, "some free form text visible in a message" |
|
) |
|
print("Text with hidden data:", text_with_data, sep="\n", end="\n\n") |
|
|
|
extracted_data, _ = parse_hidden_params(text_with_data) |
|
|
|
print("Extracted data:", extracted_data, sep="\n", end="\n\n") |
|
|
|
print("Data is valid:", data == extracted_data) |