Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active May 18, 2025 09:08
Show Gist options
  • Save UserUnknownFactor/653c6e07df920d2a253997b1b1860ccc to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/653c6e07df920d2a253997b1b1860ccc to your computer and use it in GitHub Desktop.
Split a file into two by binary signature
import pefile
import sys
import os
import glob
def extract_exe_from_bundle(bundle_path, output_path=None, data_path=None):
if output_path is None:
output_path = os.path.splitext(bundle_path)[0] + "_real.exe"
if data_path is None:
data_path = os.path.splitext(bundle_path)[0] + "data.bin"
try:
# Get PE headers to determine size without loading entire file
pe = pefile.PE(bundle_path, fast_load=True)
last_section = pe.sections[-1]
exe_size = last_section.PointerToRawData + last_section.SizeOfRawData
with open(bundle_path, 'rb') as source:
with open(output_path, 'wb') as dest:
# Use a reasonable chunk size (e.g., 1MB)
chunk_size = 1024 * 1024
bytes_copied = 0
while bytes_copied < exe_size:
bytes_to_read = min(chunk_size, exe_size - bytes_copied)
chunk = source.read(bytes_to_read)
if not chunk:
break # End of file reached before exe_size?
dest.write(chunk)
bytes_copied += len(chunk)
print(f"Successfully extracted executable of size {bytes_copied} to:\n{output_path}")
with open(data_path, 'wb') as dest:
chunk_size = 1024 * 1024
bytes_copied = 0
while True:
chunk = source.read(bytes_to_read)
if not chunk:
break
dest.write(chunk)
bytes_copied += len(chunk)
print(f"Successfully extracted data of size {bytes_copied} to:\n{data_path}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python extract_exe.py <bundle_path> [output_exe_path] [data_path]")
bundle_path = sys.argv[1] if len(sys.argv) > 1 else "*.exe"
output_path = sys.argv[2] if len(sys.argv) > 2 else None
data_path = sys.argv[3] if len(sys.argv) > 3 else None
bundle_path = glob.glob(bundle_path)
if bundle_path:
extract_exe_from_bundle(bundle_path[0], output_path, data_path)
import argparse, shutil, os
def find_and_dump(file_path, signature, ext1, ext2):
"""Finds a byte signature in a file and dumps the content before and after that point.
Args:
file_path: Path to the file to search.
signature: Byte signature to search for.
ext: Extension to use for the output files.
Returns:
True if the signature was found and content dumped, False otherwise.
"""
with open(file_path, 'rb') as f:
buffer_size = 4096
print(f"signature: {signature} buffer: {buffer_size}")
prev_buffer = b''
buffer = f.read(buffer_size)
while buffer:
# Search for the signature in the current buffer and the overlapping region
combined_buffer = prev_buffer + buffer
pos = combined_buffer.find(signature)
if pos != -1:
# Signature found!
# Calculate the correct position in the file
file_pos = f.tell() - len(combined_buffer) + pos
# Create output file names (append "_before" and "_after" to the original name)
base, _ = os.path.splitext(file_path)
before_file_path = base + ext2
after_file_path = base + ext1
# Dump content before the signature
with open(before_file_path, 'wb') as before_file:
f.seek(0)
before_file.write(f.read(file_pos))
print(f"Content before signature dumped to: {before_file_path}")
# Dump content after the signature
with open(after_file_path, 'wb') as after_file:
f.seek(file_pos)
shutil.copyfileobj(f, after_file) # Efficiently copy the remaining content
print(f"Content after signature dumped to: {after_file_path}")
return True
prev_buffer = buffer[-len(signature):] # Store the overlapping region for the next iteration
buffer = f.read(buffer_size) # Read the next chunk
# Signature not found
print(f"Signature not found in {file_path}")
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find a byte signature and dump content.')
parser.add_argument('file', help='Path to the file to search.')
parser.add_argument('-s', '--signature', default=None, help='Byte signature to search for (e.g., "DE AD BE EF").')
parser.add_argument('-b', '--beforeext', default='.bin', help='Extension of the first file.')
parser.add_argument('-a', '--aftertext', default='.xp3', help='Extension of the second file.')
args = parser.parse_args()
# Convert signature string to bytes object
signature_bytes = b'XP3\r\n\x20\x0A\x1A\x8B\x67\x01' if not args.signature else bytes.fromhex(args.signature.replace(" ", ""))
find_and_dump(args.file, signature_bytes, args.aftertext, args.beforeext)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment