Last active
June 5, 2023 18:27
-
-
Save rene-d/8a5161f95365343e9a24c73640d12ff7 to your computer and use it in GitHub Desktop.
Chromium-like browsers cache viewer (Linux, macOS)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Chromium-like browsers cache viewer (Linux, macOS) | |
import argparse | |
import binascii | |
import struct | |
from datetime import datetime | |
from pathlib import Path | |
import traceback | |
from cryptography.x509 import load_der_x509_certificate # pip3 install cryptography | |
# https://chromium.googlesource.com/chromium/src/+/master/net/disk_cache/simple/simple_entry_format.h | |
kSimpleInitialMagicNumber = 0xFCFB6D1BA7725C30 | |
kSimpleFinalMagicNumber = 0xF4FA6F45970D41D8 | |
FLAG_HAS_CRC32 = 1 | |
FLAG_HAS_KEY_SHA256 = 2 | |
# https://chromium.googlesource.com/chromium/src/+/master/base/time/time.h | |
kTimeTToMicrosecondsOffset = 11644473600000000 | |
# https://chromium.googlesource.com/chromium/src/+/master/net/http/http_response_info.cc | |
RESPONSE_INFO_HAS_CERT = 1 << 8 | |
RESPONSE_INFO_HAS_SECURITY_BITS = 1 << 9 | |
RESPONSE_INFO_HAS_CERT_STATUS = 1 << 10 | |
RESPONSE_INFO_HAS_VARY_DATA = 1 << 11 | |
RESPONSE_INFO_TRUNCATED = 1 << 12 | |
RESPONSE_INFO_WAS_SPDY = 1 << 13 | |
RESPONSE_INFO_WAS_ALPN = 1 << 14 | |
RESPONSE_INFO_WAS_PROXY = 1 << 15 | |
RESPONSE_INFO_HAS_SSL_CONNECTION_STATUS = 1 << 16 | |
RESPONSE_INFO_HAS_ALPN_NEGOTIATED_PROTOCOL = 1 << 17 | |
RESPONSE_INFO_HAS_CONNECTION_INFO = 1 << 18 | |
RESPONSE_INFO_USE_HTTP_AUTHENTICATION = 1 << 19 | |
RESPONSE_INFO_HAS_SIGNED_CERTIFICATE_TIMESTAMPS = 1 << 20 | |
RESPONSE_INFO_UNUSED_SINCE_PREFETCH = 1 << 21 | |
RESPONSE_INFO_HAS_KEY_EXCHANGE_GROUP = 1 << 22 | |
RESPONSE_INFO_PKP_BYPASSED = 1 << 23 | |
RESPONSE_INFO_HAS_STALENESS = 1 << 24 | |
RESPONSE_INFO_HAS_PEER_SIGNATURE_ALGORITHM = 1 << 25 | |
RESPONSE_INFO_RESTRICTED_PREFETCH = 1 << 26 | |
RESPONSE_INFO_HAS_DNS_ALIASES = 1 << 27 | |
RESPONSE_INFO_SINGLE_KEYED_CACHE_ENTRY_UNUSABLE = 1 << 28 | |
RESPONSE_INFO_ENCRYPTED_CLIENT_HELLO = 1 << 29 | |
RESPONSE_INFO_BROWSER_RUN_ID = 1 << 30 | |
# https://chromium.googlesource.com/chromium/src/+/master/net/ssl/ssl_connection_status_flags.h | |
SSL_CONNECTION_VERSION_SHIFT = 20 | |
SSL_CONNECTION_VERSION_MASK = 7 | |
# https://chromium.googlesource.com/chromium/src/+/master/net/ssl/ssl_connection_status_flags.h | |
def SSLConnectionStatusToVersion(connection_status): | |
return (connection_status >> SSL_CONNECTION_VERSION_SHIFT) & SSL_CONNECTION_VERSION_MASK | |
def bits(a): | |
s = [] | |
b = 0 | |
while a != 0: | |
if a & 1 != 0: | |
s.append(b) | |
b += 1 | |
a = a // 2 | |
return ",".join(map(str, s)) | |
class ChromiumCache: | |
def __init__(self): | |
self.verbose = False | |
self.show_url = [] | |
def extract(self, f: Path): | |
b = f.read_bytes() | |
# A file containing stream 0 and stream 1 in the Simple cache consists of: | |
# - a SimpleFileHeader. | |
# - the key. | |
# - the data from stream 1. | |
# - a SimpleFileEOF record for stream 1. | |
# - the data from stream 0. | |
# - (optionally) the SHA256 of the key. | |
# - a SimpleFileEOF record for stream 0. | |
################################################################## | |
# SimpleFileHeader (24 bytes) | |
magic, version, key_length, _, zero = struct.unpack("<QIIII", b[:24]) | |
assert magic == kSimpleInitialMagicNumber | |
assert version == 5 | |
assert zero == 0 | |
################################################################## | |
# the key | |
key = b[24 : 24 + key_length] | |
if key[:8] == b"1/0/_dk_" or key[:4] == b"_dk_": | |
url = key.decode().split(" ", maxsplit=2)[2] | |
else: | |
url = key.decode().split(" ")[-1] | |
################################################################## | |
# SimpleFileEOF of stream0 (24 bytes) | |
magic, flags, _, stream0_length, zero = struct.unpack("<QIIII", b[-24:]) | |
assert magic == kSimpleFinalMagicNumber | |
assert flags == FLAG_HAS_CRC32 + FLAG_HAS_KEY_SHA256 | |
assert zero == 0 | |
################################################################## | |
# SimpleFileEOF of stream1 (24 bytes) | |
offset = len(b) - (24 + stream0_length + 32 + 24) | |
magic, flags, _, stream1_length, zero = struct.unpack("<QIIII", b[offset : offset + 24]) | |
offset += 24 # skip the SimpleFileEOF of stream1 | |
assert magic == kSimpleFinalMagicNumber | |
assert flags == FLAG_HAS_CRC32 | |
assert zero == 0 | |
assert offset == 24 + key_length + stream1_length + 24 | |
################################################################## | |
# stream1 (e.g. the payload) | |
data = b[24 + key_length : 24 + key_length + stream1_length] | |
################################################################## | |
# stream0 (HTTP response info) | |
if stream0_length == 0: | |
if not self.show_url or any(i in url for i in self.show_url): | |
print(url) | |
if not self.verbose: | |
print(" stream0_length", stream0_length) | |
print(" stream1_length", stream1_length) | |
return | |
# header of the steam0: cf. HttpResponseInfo::InitFromPickle | |
length, flags, request_time, response_time = struct.unpack("<IIQQ", b[offset : offset + 24]) | |
assert stream0_length == length + 4 | |
offset += 24 | |
# the HTTP headers | |
(http_headers_length,) = struct.unpack("<I", b[offset : offset + 4]) | |
http_headers = b[offset + 4 : offset + 4 + http_headers_length] | |
assert http_headers[-2:] == b"\x00\x00" | |
http_headers = list(map(bytes.decode, http_headers[:-2].split(b"\0"))) | |
offset += 4 + ((http_headers_length + 3) // 4) * 4 # alignment on 4 bytes | |
if not self.show_url or any(i in url for i in self.show_url): | |
request_time = datetime.fromtimestamp((request_time - kTimeTToMicrosecondsOffset) / 1_000_000) | |
response_time = datetime.fromtimestamp((response_time - kTimeTToMicrosecondsOffset) / 1_000_000) | |
print(url) | |
if not self.verbose: | |
return | |
print(" stream0_length", stream0_length) | |
print(" stream1_length", stream1_length) | |
print(" http_headers") | |
for h in http_headers: | |
print(f" {h}") | |
print(" payload length", stream1_length, "bytes") | |
print(" flags ", hex(flags), f"version:{flags & 0xFF} bits:{bits(flags & ~0xFF)}") | |
print(" request_time ", request_time) | |
print(" response_time ", response_time) | |
if flags & RESPONSE_INFO_HAS_CERT != 0: | |
(nb_certs,) = struct.unpack("<I", b[offset : offset + 4]) | |
offset += 4 | |
for i in range(nb_certs): | |
(length,) = struct.unpack("<I", b[offset : offset + 4]) | |
# certificate in DER format | |
certicate = b[offset + 4 : offset + 4 + length] | |
offset = offset + 4 + (length + 3) // 4 * 4 # align 4 | |
certicate = load_der_x509_certificate(certicate) | |
print( | |
f" certificate ({length} bytes) {certicate.issuer.rfc4514_string()} {certicate.not_valid_after}" | |
) | |
if flags & RESPONSE_INFO_HAS_CERT_STATUS != 0: | |
(zero,) = struct.unpack("<I", b[offset : offset + 4]) | |
# assert zero == 0 | |
offset += 4 | |
print(f" cert_status {hex(zero)}") | |
assert flags & RESPONSE_INFO_HAS_SECURITY_BITS == 0 | |
if flags & RESPONSE_INFO_HAS_SSL_CONNECTION_STATUS != 0: | |
(connection_status,) = struct.unpack("<I", b[offset : offset + 4]) | |
offset += 4 | |
print( | |
" ssl_connection_status", | |
hex(connection_status), | |
f"version:{SSLConnectionStatusToVersion(connection_status)}", | |
) | |
assert flags & RESPONSE_INFO_HAS_SIGNED_CERTIFICATE_TIMESTAMPS == 0 | |
# Read vary-data | |
if flags & RESPONSE_INFO_HAS_VARY_DATA != 0: | |
print(" vary_data", binascii.b2a_hex(b[offset : offset + 16])) | |
# it is actually a MD5Digest | |
# cf. https://chromium.googlesource.com/chromium/src/+/master/net/http/http_vary_data.h | |
offset += 16 | |
# Read socket_address. | |
(ip_length,) = struct.unpack("<I", b[offset : offset + 4]) | |
ip = b[offset + 4 : offset + 4 + ip_length].decode() | |
print(" ip", ip) | |
offset = offset + 4 + (ip_length + 3) // 4 * 4 # align 4 | |
(port,) = struct.unpack("<I", b[offset : offset + 4]) | |
print(" port", port) | |
offset += 4 | |
if flags & RESPONSE_INFO_HAS_ALPN_NEGOTIATED_PROTOCOL != 0: | |
(length,) = struct.unpack("<I", b[offset : offset + 4]) | |
alpn_negotiated_protocol = b[offset + 4 : offset + 4 + length].decode() | |
print(" alpn_negotiated_protocol", alpn_negotiated_protocol) | |
offset = offset + 4 + (length + 3) // 4 * 4 # align 4 | |
if flags & RESPONSE_INFO_HAS_CONNECTION_INFO != 0: | |
(unk,) = struct.unpack("<I", b[offset : offset + 4]) | |
offset += 4 | |
print(" connection_info", hex(unk)) | |
if flags & RESPONSE_INFO_HAS_KEY_EXCHANGE_GROUP != 0: | |
(key_exchange_group,) = struct.unpack("<I", b[offset : offset + 4]) | |
offset += 4 | |
print(" key_exchange_group", hex(key_exchange_group)) | |
assert flags & RESPONSE_INFO_HAS_STALENESS == 0 | |
if flags & RESPONSE_INFO_HAS_PEER_SIGNATURE_ALGORITHM != 0: | |
(peer_signature_algorithm,) = struct.unpack("<I", b[offset : offset + 4]) | |
offset += 4 | |
print(" peer_signature_algorithm", hex(peer_signature_algorithm)) | |
if flags & RESPONSE_INFO_HAS_DNS_ALIASES != 0: | |
(nb_dns,) = struct.unpack("<I", b[offset : offset + 4]) | |
offset += 4 | |
for i in range(nb_dns): | |
(length,) = struct.unpack("<I", b[offset : offset + 4]) | |
dns = b[offset + 4 : offset + 4 + length].decode() | |
offset = offset + 4 + (length + 3) // 4 * 4 # align 4 | |
print(" dns", dns) | |
assert offset == len(b) - 24 - 32 | |
def analyze(self, path): | |
for f in Path(path).expanduser().glob("*_0"): | |
try: | |
self.extract(f) | |
except (AssertionError, UnicodeDecodeError, struct.error) as e: | |
print(traceback.print_exception(e)) | |
print(f"error reading: {f}") | |
exit() | |
def add_show_url(self, pattern): | |
self.show_url.append(pattern) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-v", "--verbose", action="store_true") | |
parser.add_argument("-u", "--url") | |
parser.add_argument("-f", "--file", type=Path) | |
args = parser.parse_args() | |
bc = ChromiumCache() | |
if args.file: | |
bc.verbose = True | |
bc.extract(args.file) | |
else: | |
bc.verbose = args.verbose | |
if args.url: | |
bc.show_url.append(args.url) | |
bc.analyze("~/.cache/chromium/Default/Cache") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment