Last active
January 5, 2022 08:38
-
-
Save Gemba/c793590c32a6c121445fa14ed361e51d to your computer and use it in GitHub Desktop.
lab_copier.py: Enhanched Implementation of LABcopy in Python for 'Grim Fandando' and 'Escape From Monkey Island'
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# lab_copier.py: Modern implementation of labcopy in Python | |
# | |
# Rewrites size-forged LAB files to represent their actual size. | |
# E.g., for usage and proper game detection in SCUMMVM. | |
# | |
# Primary use is for "Grim Fandango" LAB files, but does handle | |
# "Escape from Monkey Island" files too. | |
# | |
# (LAB files are encoded in the LucasArts Binary archive format) | |
# | |
# Sample: | |
# Note: integers are LE encoded | |
# | |
# 00000000: 4c41 424e LABN ; 'LABN' file id | |
# 00000004: 4141 4141 AAAA ; reserved (copy as is) | |
# 00000008: 0300 0000 .... ; count of nested files | |
# 0000000c: 3200 0000 2... ; length of concatenated nested filename strings | |
# 00000010: 0000 0000 .... ; offset to file A filename (Grim only) | |
# 00000014: 7200 0000 r... ; offset to file A data | |
# 00000018: 0400 0000 .... ; bytesize of file A | |
# 0000001c: 4242 4242 BBBB ; reserved (usually 0x00, copy as is) | |
# 00000020: 0a00 0000 .... ; offset to file B filename | |
# 00000024: 7600 0000 v... ; offset to file B data | |
# 00000028: 0800 0000 .... ; bytesize of file B | |
# 0000002c: 4242 4242 BBBB ; reserved | |
# 00000030: 1b00 0000 .... ; offset to file C filename | |
# 00000034: 7e00 0000 ~... ; offset to file C data | |
# 00000038: 0c00 0000 .... ; bytesize of file C | |
# 0000003c: 4242 4242 BBBB ; reserved | |
# 00000040: 6669 6c65 file ; filenames concatenated. filename A from pos 0x40 | |
# 00000044: 5f41 2e77 _A.w ; (0x40 = 16 + 3 files * 16) | |
# 00000048: 6176 0061 av.a ; 0x40+0x0a: B filename | |
# 0000004c: 6e6f 7468 noth | |
# 00000050: 6572 6669 erfi | |
# 00000054: 6c65 5f42 le_B | |
# 00000058: 2e6a 7067 .jpg | |
# 0000005c: 0079 6574 .yet ; 0x40+0x1b: C filename | |
# 00000060: 616e 6f74 anot | |
# 00000064: 6865 7266 herf | |
# 00000068: 696c 655f ile_ | |
# 0000006c: 432e 7478 C.tx | |
# 00000070: 7400 4343 t.CC ; 0x6e start of fila data A file | |
# 00000074: 4343 4444 CCDD ; 0x76 start of fila data B file | |
# 00000078: 4444 4444 DDDD | |
# 0000007c: 4444 4545 DDEE ; 0x7e start of fila data C file | |
# 00000080: 4545 4545 EEEE | |
# 00000084: 4545 4545 EEEE | |
# 00000088: 4545 EE | |
# | |
# Inspired by discontinued labcopy C++ implementation: | |
# https://github.com/klusark/residual-tools/blob/master/tools/labcopy.cpp | |
# | |
# (C) 2021 Gemba | |
# | |
# This program is free software: you can redistribute it and/or modify it | |
# under the terms of the GNU General Public License as published by the | |
# Free Software Foundation, either version 3 of the License, or (at your | |
# option) any later version. | |
# This program is distributed in the hope that it will be useful, but | |
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
# for more details. | |
# You should have received a copy of the GNU General Public License along | |
# with this program. If not, see <https://www.gnu.org/licenses/>. | |
import argparse | |
import shutil | |
import struct | |
import sys | |
from collections import namedtuple | |
from hashlib import md5 | |
OUTFILE_SUFFIX = "_rewritten" | |
OFFENDING_NESTED_FILES = ["cp_0_intha.bm"] # Grim | |
EMI_FN_TABLE_OFFSET = 0x13d0f | |
def copy_lab(in_data, list_content=False): | |
"""Read LAB file and read nested file parameters. | |
Return byte data for out file.""" | |
hdr = in_data.read(16) | |
if hdr[:4].decode("ascii") != "LABN": | |
print("[!] Not a LAB file") | |
sys.exit(1) | |
num_entries = int.from_bytes(hdr[8:12], byteorder='little', signed=False) | |
nested_filenames_size = int.from_bytes( | |
hdr[12:16], byteorder='little', signed=False) | |
# read next four bytes to detect EMI or Grim LAB file | |
offset_nested_fn_bytes = in_data.read(4) | |
nested_fn_table_offset = int.from_bytes( | |
offset_nested_fn_bytes, byteorder='little', signed=False) | |
is_emi = nested_fn_table_offset > 0 | |
nested_fn_array, nested_fn_bytes = get_nested_filenames( | |
in_data, num_entries, nested_fn_table_offset, nested_filenames_size, | |
is_emi) | |
if num_entries != len(nested_fn_array): | |
print("[!] Filename table size does not match count of nested files.") | |
sys.exit(1) | |
# write header | |
out_data = bytearray(hdr) | |
lab_entries = decode_and_copy_labentry_info( | |
in_data, out_data, nested_fn_array, offset_nested_fn_bytes, is_emi) | |
# write nested filenames as-is | |
if is_emi: | |
# copy blob back with offset | |
copy_nestedfile(in_data, out_data, nested_fn_table_offset - | |
EMI_FN_TABLE_OFFSET, nested_filenames_size) | |
else: | |
out_data.extend(nested_fn_bytes) | |
if list_content: | |
print(f"[*] Size [bytes] File") | |
print(f" ------------ ------------") | |
# write nested filedata | |
size_tot = 0 | |
for lab_entry in lab_entries: | |
if list_content: | |
print(f" {lab_entry['size']:>12} {lab_entry['nested_fn']}") | |
size_tot += lab_entry['size'] | |
if lab_entry['nested_fn'] in OFFENDING_NESTED_FILES: | |
print(f"[*] Skipping offending file '{lab_entry['nested_fn']}'.") | |
continue | |
copy_nestedfile(in_data, out_data, | |
lab_entry['start'], lab_entry['size']) | |
if list_content: | |
s = "" if len(lab_entries) == 1 else "s" | |
print(f" ------------ ------------") | |
print(f" {size_tot:>12} {len(lab_entries)} file{s}") | |
return out_data | |
def get_nested_filenames(in_data, num_entries, nested_fn_table_offset, | |
nested_filenames_size, is_emi): | |
"""Get an array of filenames from the archive's filename 'table'. | |
The 'table' contains the filenames each null terminated as bytes. | |
The return array consists of ASCII formatted strings.""" | |
# seek fwd to retrieve nested filenames | |
if is_emi: | |
# Escape from Monkey Island | |
in_data.seek(nested_fn_table_offset - EMI_FN_TABLE_OFFSET) | |
else: | |
# Grim Fandango | |
# +1 for header | |
in_data.seek((num_entries + 1) * 16, 0) | |
nested_filenames_bytes = in_data.read(nested_filenames_size) | |
if is_emi: | |
nested_fns_decoded = bytearray() | |
for b in nested_filenames_bytes: | |
nested_fns_decoded.append(b ^ 0x96 if b else b) | |
else: | |
nested_fns_decoded = nested_filenames_bytes | |
# cut off surplus null byte, assume ASCII filenames | |
return (nested_fns_decoded[:- 1].decode("ascii").split('\x00'), | |
nested_filenames_bytes) | |
def decode_and_copy_labentry_info(in_data, out_data, nested_fn_array, | |
offset_nested_fn_bytes, is_emi): | |
"""Decodes each lab entry meta info and copies to out.""" | |
# reset seek to first lab_entry | |
in_data.seek(20 if is_emi else 16, 0) | |
_lab_entry = namedtuple('lab_entry', 'fname_offset start size reserved') | |
lab_entries = [] | |
if is_emi: | |
# write out bytes 16 to 19 | |
out_data.extend(offset_nested_fn_bytes) | |
for nested_fn in nested_fn_array: | |
entry = in_data.read(16) | |
out_data.extend(entry) | |
data = struct.unpack("<IIII", entry) | |
lab_entry = _lab_entry._asdict(_lab_entry._make(data)) | |
lab_entry['nested_fn'] = nested_fn | |
# print (lab_entry) | |
lab_entries.append(lab_entry) | |
return lab_entries | |
def copy_nestedfile(in_data, out_data, offset, size): | |
"""Copy a nested file of LAB file to target byte array.""" | |
in_data.seek(offset, 0) | |
data = in_data.read(size) | |
# don't assume offset is strictly ordered | |
add_len = offset + size - len(out_data) | |
if add_len > 0: | |
out_data.extend(b'\x00' * add_len) | |
for idx, b in enumerate(data): | |
out_data[offset + idx] = b | |
def init_cli_parser(): | |
"""Init command line argument parser.""" | |
parser = argparse.ArgumentParser( | |
description='Restores a size-forged LAB file to its pristine size.') | |
parser.add_argument("in_file", help="input LAB file") | |
parser.add_argument("-o", "--outfile", help=f"file to write to, if not " | |
f"given in_file plus suffix '{OUTFILE_SUFFIX}' will be" | |
" used if overwrite is not set", | |
type=str, dest='out_file') | |
parser.add_argument("-f", "--overwrite", help="overwrite inputfile", | |
action="store_true", default=False) | |
parser.add_argument("-l", "--list", help="list nested files identified in" | |
" LAB file", action="store_true", default=False) | |
parser.add_argument("-s", "--skipidentical", help=f"skip writing of " | |
f"out_file if hash is identical to in_file", | |
action='store_true', default=False, | |
dest='skip_identical') | |
parser.add_argument("-b", "--md5bytes", help=f"number of bytes to " | |
f"calculate MD5 (default: full file)", | |
type=int, dest='md5_bytes', default=0) | |
return parser | |
if __name__ == "__main__": | |
parser = init_cli_parser() | |
args = parser.parse_args() | |
in_file = args.in_file | |
overwrite = args.overwrite | |
if args.out_file and overwrite: | |
print("[*] Parameter outfile will be ignored, because overwrite" | |
" (=same file) is set.") | |
out_file = args.out_file if not overwrite else None | |
if not out_file: | |
out_file = f"{in_file}{OUTFILE_SUFFIX}" | |
with open(in_file, 'rb') as in_data: | |
print(f"[+] Opened '{in_file}'") | |
size = -1 if not args.md5_bytes else args.md5_bytes | |
md5_infile = md5(in_data.read(size)).hexdigest() | |
in_data.seek(0) | |
print(f"[+] Copying ...") | |
out_data = copy_lab(in_data, args.list) | |
md5_bytes = len(out_data) if not args.md5_bytes else args.md5_bytes | |
md5_outfile = md5(out_data[:md5_bytes]).hexdigest() | |
mib = md5_bytes / 1024.0 / 1024 | |
if args.skip_identical and md5_outfile == md5_infile: | |
print("[+] Skip identical: No outfile written, MD5 is identical to" | |
" infile.") | |
else: | |
with open(out_file, 'wb') as of: | |
of.write(out_data) | |
if overwrite: | |
shutil.move(out_file, in_file) | |
out_file = in_file | |
print(f"[+] ... written to '{out_file}'") | |
print(f"[*] MD5 {md5_outfile} over {md5_bytes} bytes ({mib:.3f} MiB).") | |
print("[*] Done.") | |
# run with pytest-3 | |
def test_copy_lab(): | |
from os import remove | |
md5_expected = "776138f5eb890123c49bb3046d7dd4f5" | |
f = _create_dummy_labfile( | |
["file_A.wav", "anotherfile_B.jpg", "yetanotherfile_C.txt"]) | |
with open(f.name, 'rb') as test_data: | |
# print (md5(in_data.read()).hexdigest()) | |
# in_data.seek(0) | |
md5_real = md5(copy_lab(test_data)) | |
remove(f.name) | |
assert md5_expected == md5_real.hexdigest() | |
def test_copy_lab_remove_nested_file(): | |
from os import remove | |
# breaks if OFFENDING_NESTED_FILES is modified | |
md5_expected = "c46fcc87f65df671ac93e9319f779f88" | |
f = _create_dummy_labfile( | |
["file_A.wav"] + OFFENDING_NESTED_FILES + ["yetanotherfile_C.txt"]) | |
with open(f.name, 'rb') as test_data: | |
# print (md5(in_data.read()).hexdigest()) | |
# in_data.seek(0) | |
md5_real = md5(copy_lab(test_data)) | |
# remove(f.name) | |
assert md5_expected == md5_real.hexdigest() | |
def _create_dummy_labfile(filenames): | |
from tempfile import NamedTemporaryFile | |
f = NamedTemporaryFile(mode='wb', delete=False) | |
f.write(b"LABN") | |
f.write(b"AAAA") # reserved | |
file_count = len(filenames) | |
f.write(int.to_bytes(file_count, length=4, byteorder='little', | |
signed=False)) | |
# land behind header and lab entries | |
nested_fn_list_start = (file_count + 1) * 16 | |
f.seek(nested_fn_list_start, 0) | |
nested_fn_length = 0 | |
# write concatenated list of filenames, 0x00 terminated | |
for fn in filenames: | |
f.write(bytearray(fn, "ascii")) | |
f.write(b'\x00') | |
nested_fn_length += len(fn) + 1 | |
# write length of concatenated nested filenames in header | |
f.seek(12, 0) | |
f.write(int.to_bytes(nested_fn_length, length=4, | |
byteorder='little', signed=False)) | |
sizes_sum = 0 | |
# start of binary blob of all nested file content | |
raw_data_start = nested_fn_list_start + nested_fn_length | |
# lab entries from bytes 16 onwards | |
fsizes = [] | |
for le_meta in range(file_count): | |
if le_meta > 0: | |
# offset to nested filename string | |
fn_offset = int.to_bytes(len(''.join(filenames[0:le_meta])), | |
length=4, byteorder='little', signed=False) | |
f.write(fn_offset) | |
else: | |
f.write(b'\x00' * 4) # Grim Fandango only | |
nested_file_data_start = int.to_bytes(raw_data_start + sizes_sum, | |
length=4, byteorder='little', | |
signed=False) | |
f.write(nested_file_data_start) | |
fsize = 4 + 4 * le_meta # sample size | |
sizes_sum = sizes_sum + fsize | |
fsizes.append(fsize) | |
size = int.to_bytes(fsize, length=4, | |
byteorder='little', signed=False) | |
f.write(size) | |
f.write(b"BBBB") # reserved | |
buf = bytearray() | |
for idx, fsize in enumerate(fsizes): | |
buf.extend(bytes(chr(0x43 + idx), 'ascii') * fsize) | |
# per file raw data after filename list | |
f.seek(raw_data_start, 0) | |
f.write(buf) | |
f.close() | |
return f |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment