Last active
May 18, 2025 09:08
-
-
Save UserUnknownFactor/653c6e07df920d2a253997b1b1860ccc to your computer and use it in GitHub Desktop.
Split a file into two by binary signature
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pefile | |
import sys | |
import os | |
import glob | |
def extract_exe_from_bundle(bundle_path, output_path=None, data_path=None): | |
if output_path is None: | |
output_path = os.path.splitext(bundle_path)[0] + "_real.exe" | |
if data_path is None: | |
data_path = os.path.splitext(bundle_path)[0] + "data.bin" | |
try: | |
# Get PE headers to determine size without loading entire file | |
pe = pefile.PE(bundle_path, fast_load=True) | |
last_section = pe.sections[-1] | |
exe_size = last_section.PointerToRawData + last_section.SizeOfRawData | |
with open(bundle_path, 'rb') as source: | |
with open(output_path, 'wb') as dest: | |
# Use a reasonable chunk size (e.g., 1MB) | |
chunk_size = 1024 * 1024 | |
bytes_copied = 0 | |
while bytes_copied < exe_size: | |
bytes_to_read = min(chunk_size, exe_size - bytes_copied) | |
chunk = source.read(bytes_to_read) | |
if not chunk: | |
break # End of file reached before exe_size? | |
dest.write(chunk) | |
bytes_copied += len(chunk) | |
print(f"Successfully extracted executable of size {bytes_copied} to:\n{output_path}") | |
with open(data_path, 'wb') as dest: | |
chunk_size = 1024 * 1024 | |
bytes_copied = 0 | |
while True: | |
chunk = source.read(bytes_to_read) | |
if not chunk: | |
break | |
dest.write(chunk) | |
bytes_copied += len(chunk) | |
print(f"Successfully extracted data of size {bytes_copied} to:\n{data_path}") | |
except Exception as e: | |
print(f"Error: {e}") | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: python extract_exe.py <bundle_path> [output_exe_path] [data_path]") | |
bundle_path = sys.argv[1] if len(sys.argv) > 1 else "*.exe" | |
output_path = sys.argv[2] if len(sys.argv) > 2 else None | |
data_path = sys.argv[3] if len(sys.argv) > 3 else None | |
bundle_path = glob.glob(bundle_path) | |
if bundle_path: | |
extract_exe_from_bundle(bundle_path[0], output_path, data_path) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, shutil, os | |
def find_and_dump(file_path, signature, ext1, ext2): | |
"""Finds a byte signature in a file and dumps the content before and after that point. | |
Args: | |
file_path: Path to the file to search. | |
signature: Byte signature to search for. | |
ext: Extension to use for the output files. | |
Returns: | |
True if the signature was found and content dumped, False otherwise. | |
""" | |
with open(file_path, 'rb') as f: | |
buffer_size = 4096 | |
print(f"signature: {signature} buffer: {buffer_size}") | |
prev_buffer = b'' | |
buffer = f.read(buffer_size) | |
while buffer: | |
# Search for the signature in the current buffer and the overlapping region | |
combined_buffer = prev_buffer + buffer | |
pos = combined_buffer.find(signature) | |
if pos != -1: | |
# Signature found! | |
# Calculate the correct position in the file | |
file_pos = f.tell() - len(combined_buffer) + pos | |
# Create output file names (append "_before" and "_after" to the original name) | |
base, _ = os.path.splitext(file_path) | |
before_file_path = base + ext2 | |
after_file_path = base + ext1 | |
# Dump content before the signature | |
with open(before_file_path, 'wb') as before_file: | |
f.seek(0) | |
before_file.write(f.read(file_pos)) | |
print(f"Content before signature dumped to: {before_file_path}") | |
# Dump content after the signature | |
with open(after_file_path, 'wb') as after_file: | |
f.seek(file_pos) | |
shutil.copyfileobj(f, after_file) # Efficiently copy the remaining content | |
print(f"Content after signature dumped to: {after_file_path}") | |
return True | |
prev_buffer = buffer[-len(signature):] # Store the overlapping region for the next iteration | |
buffer = f.read(buffer_size) # Read the next chunk | |
# Signature not found | |
print(f"Signature not found in {file_path}") | |
return False | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Find a byte signature and dump content.') | |
parser.add_argument('file', help='Path to the file to search.') | |
parser.add_argument('-s', '--signature', default=None, help='Byte signature to search for (e.g., "DE AD BE EF").') | |
parser.add_argument('-b', '--beforeext', default='.bin', help='Extension of the first file.') | |
parser.add_argument('-a', '--aftertext', default='.xp3', help='Extension of the second file.') | |
args = parser.parse_args() | |
# Convert signature string to bytes object | |
signature_bytes = b'XP3\r\n\x20\x0A\x1A\x8B\x67\x01' if not args.signature else bytes.fromhex(args.signature.replace(" ", "")) | |
find_and_dump(args.file, signature_bytes, args.aftertext, args.beforeext) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment