Created
September 30, 2023 11:18
-
-
Save adlerweb/2be1a440bfadff1a521bfbb841c1d5ec to your computer and use it in GitHub Desktop.
Extract target URL from a mandrill tracking link. Based on https://gist.github.com/medmunds/1b696ee88ccb0480d71f
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import json | |
import base64 | |
import argparse | |
from binascii import Error as BinasciiError | |
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, parse_qsl | |
version = "0.1" | |
def urlsafe_base64_decode(s): | |
""" | |
Decode a base64 encoded string. Add back any trailing equal signs that | |
might have been stripped. | |
(via https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/#urlsafe_base64_decode) | |
""" | |
s = s.encode() | |
try: | |
return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"=")) | |
except (LookupError, BinasciiError) as e: | |
raise ValueError(e) | |
def remove_utm(s): | |
""" | |
Remove URL parametes starting with utm_, as these are usually only used | |
for tracking. | |
(see https://en.wikipedia.org/wiki/UTM_parameters) | |
""" | |
parsed_url = urlparse(s) | |
# Reconstruct the original URL without utm_ parameters | |
query_parameters = parse_qsl(parsed_url.query) | |
filtered_parameters = [(key, value) for key, value in query_parameters if not key.startswith('utm_')] | |
new_query = urlencode(filtered_parameters) | |
# Reconstruct the URL with the modified query parameters | |
filtered_url = urlunparse(parsed_url._replace(query=new_query)) | |
return filtered_url | |
def mandrill_extract(tracking_url): | |
""" | |
Extract original URL from a mandrill link. | |
(via https://gist.github.com/medmunds/1b696ee88ccb0480d71f) | |
""" | |
# Parse the URL | |
parsed_url = urlparse(tracking_url) | |
# Extract components | |
account_id = parsed_url.path.split('/')[-2] # Extract account ID | |
base_url = parsed_url.path.split('/')[-1] # Extract base URL | |
data_field = parse_qs(parsed_url.query)['p'][0] # Extract data field | |
payload = json.loads(urlsafe_base64_decode(data_field)) | |
params = json.loads(payload['p']) | |
original_url = params['url'] | |
filtered_url = remove_utm(original_url) | |
return account_id, base_url, data_field, original_url, filtered_url | |
def main(): | |
parser = argparse.ArgumentParser(description='Decode a mandrill tracking link and reconstruct the original URL without tracking') | |
parser.add_argument('tracking_url', help='The tracking URL to process') | |
parser.add_argument('--version', action='store_true', help='Show version info') | |
parser.add_argument('-j', '--json', action='store_true', help='Output all fields as JSON') | |
parser.add_argument('-v', '--verbose', action='store_true', help='Output all variants individually') | |
parser.add_argument('-u', '--unfiltered', action='store_true', help='Output decoded link with tracking parameters') | |
args = parser.parse_args() | |
if args.version: | |
print(f"Mandrill link decoder\n Version {version}") | |
sys.exit(0) | |
tracking_url = args.tracking_url | |
try: | |
account_id, base_url, data_field, original_url, filtered_url = mandrill_extract(tracking_url) | |
except Exception as e: | |
print("Unable to parse link; exiting.", file=sys.stderr) | |
print(f"Error: {str(e)}", file=sys.stderr) | |
sys.exit(1) | |
if args.json: | |
result = { | |
"AccountID": account_id, | |
"BaseURL": base_url, | |
"DataField": data_field, | |
"OriginalURL": original_url, | |
"FilteredURL": filtered_url | |
} | |
print(json.dumps(result, indent=4)) | |
elif args.verbose: | |
print("Account ID:", account_id) | |
print("Base URL:", base_url) | |
print("Original URL:", original_url) | |
print("Filtered URL:", filtered_url) | |
elif args.unfiltered: | |
print(original_url) | |
else: | |
print(filtered_url) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment