Skip to content

Instantly share code, notes, and snippets.

@pwneddesal
Created April 20, 2021 07:56

Revisions

  1. pwneddesal created this gist Apr 20, 2021.
    40 changes: 40 additions & 0 deletions waybackurls.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import requests
    import sys
    import json


    def waybackurls(host, with_subs):
    if with_subs:
    url = 'http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey' % host
    else:
    url = 'http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey' % host
    r = requests.get(url)
    results = r.json()
    return results[2:]


    if __name__ == '__main__':
    argc = len(sys.argv)
    if argc < 2:
    print('Usage:\n\tpython3 waybackurls.py <url> <include_subdomains:optional>')
    sys.exit()

    host = sys.argv[1]
    with_subs = False
    if argc > 3:
    with_subs = True

    urls = waybackurls(host, with_subs)
    string_urls = ""
    for i in range(len(urls)):
    string_urls = string_urls+urls[i][0]+"\n"
    json_urls = json.dumps(urls)
    if urls:
    filename = '%s-waybackurls.json' % host
    with open('output/waybackurls/'+filename, 'w') as f:
    f.write(string_urls)
    print('[*] Saved results to %s' % filename)
    else:
    print('[-] Found nothing')