Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save bakueikozo/e41a7a788fe19335b5a1ebc7248c4ca5 to your computer and use it in GitHub Desktop.

Select an option

Save bakueikozo/e41a7a788fe19335b5a1ebc7248c4ca5 to your computer and use it in GitHub Desktop.
・MIDIデータを含むバイナリファイルからSMFファイルを出力する フォーマットは道なのでこれから解析していく
・フォルダ内にある拡張子tmpのファイルが解析対象になる このファイルは先頭部分に一定サイズのヘッダがあり、続いてMIDIイベントタイムスタンプまたはデルタタイムを含むインデックスエリア、そのあとにタイムスタンプの無いMIDIイベントの羅列が含まれる タイムスタンプデータをもとに、適切な数のMIDIイベントを拾い上げて、タイムスタンプ付きのMIDIデータに再構成する必要がある
・仮の構造として、0x78バイトまでは未解析のヘッダとして取り扱う タイムスタンプブロックが始まり、「8バイトのイベントサイズ構造体(先頭4バイトは未解析のビッグエンディアンのDWORD値、2バイトでイベントのバイトサイズ、2バイトは未定義)、続いて8バイトのタイムコード(先頭2バイトは未解析、2バイトのイベントコードA、2バイトのデルタタイム、2バイトのイベントコードB)」の16バイトセットが続く イベントコードBが0003になったところでタイムスタンプは終わる
・TImestamp blockのlogのINFOで、何バイト目を処理中なのかを表示して また、仮に10ブロック読んだところでいったん強制終了して
イベントサイズ2バイト、他のもBigEndian
・0003のコード取得後、4バイトの未解析データをスキップすると、0xF0が見えるはず ここをMIDIデータの先頭とし、各タイムスタンプブロックに​記述されたイベントサイズのバイト数を割り当てていく
・10ブロックリミットを解除して動作が正しいか確認する
・タイムスタンプブロックに割り当てられたMIDIイベントをログに出して
・ログが長くなってしまったので、タイムスタンプに割り当てられたMIDIイベントを表示する際には最初の10ブロックだけに限定して
・ブロック数制限を外して、代わりにログをファイルに出力してみるにはどうすればいい
・NameError: name 'logger' is not defined
・指定したログファイルが空になってしまう。また、コンソール出力はなしでいい
・タイムスタンプブロックに割り当てられるイベントサイズはMIDIイベントの個数ではなく、イベントを構成するバイト数のこと
・イベントブロック一つに対して、既定のバイト数読み込んだらMIDIデータのカーソルはそのバイト分進めて、次のイベントブロックを処理し、そこに割り当てる いまの動作は違う気がする
・MIDIデータにランニングステータスが使用されているように見える そのためUnknownと表示されているようだ ランニングステータスを解釈できるようにしつつ、MIDIイベントとして破綻したデータが発見されたら直ちにそのバイト位置を​表示して異常終了して
・Eventの表示の際に、ファイル内何バイト目から拾ったデータなのかをhex表示して
・Final MIDI data position: 等もファイルオフセット16進で表示して
・デルタタイムの処理を明確にする タイムスタンプブロックに入っているデルタタイムは、そのブロック処理後に現在再生Tickに追加される これをもとに、MIDIイベントの再生ポジションを設定し、表示して この時点で処理可能ならSMFのためのストリームに設定してエクスポートしてみて
・SMF出力時の時間分解能の値を48に
・absolute tickのカウントアップは、次のTimeBlockを読み込んだ時で、同じTimeBlock内にあるMIDIイベントは同一absolute timeni
・0x48から始まる2バイト、BEがSMFの分解能になる
・4バイトだと思っていたが、1バイト目はどうやらフラグの可能性がある 下位3バイトだけをテンポとし、1バイト目はフラグとして扱ってみて また、フラグがあったブロックはそれもプロパティとして
・テンポデータはBPMではなく、SMFで使われる内部単位そのものである これをそのまま保存し、メタイベントとして出力すればいい
・イベントが重なってわかりにくいので、出力フォーマットをFormat1にする 各チャンネルのデータを各トラックに割り当てて
・MIDIシーケンサで開くと異常なフォーマットといわれてしまう
#!/usr/bin/env python3
"""
Specialized MIDI extractor for .tmp files with custom format.
This script handles the specific structure:
- 0x78 bytes of unparsed header
- Timestamp blocks starting with 16-byte sets:
- 8 bytes: event size structure (4 bytes unparsed BE, 2 bytes event size, 2 bytes undefined)
- 8 bytes: timecode (2 bytes unparsed, 2 bytes event code A, 2 bytes delta time, 2 bytes event code B)
- Timestamp blocks end when event code B becomes 0x0003
- Raw MIDI events follow
"""
import os
import struct
import argparse
import logging
from dataclasses import dataclass
from typing import List, Tuple, Optional, Dict
from pathlib import Path
# Create logger instance
logger = logging.getLogger(__name__)
def setup_logging(log_file: str = None, log_level: str = 'INFO'):
"""Setup logging with both console and file handlers"""
# Clear existing handlers
logger.handlers.clear()
# Create formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
# Console handler - disabled
# console_handler = logging.StreamHandler()
# console_handler.setLevel(getattr(logging, log_level))
# console_handler.setFormatter(formatter)
# logger.addHandler(console_handler)
# File handler (if specified)
if log_file:
file_handler = logging.FileHandler(log_file, mode='w', encoding='utf-8')
file_handler.setLevel(logging.DEBUG) # File gets all log levels
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# Force immediate flush
file_handler.flush()
# Set logger level to DEBUG to capture all messages
logger.setLevel(logging.DEBUG)
return logger
@dataclass
class TMPHeader:
"""Header structure for .tmp files"""
header_size: int = 0x78
timestamp_start: int = 0x78
@dataclass
class TMPTimestampBlock:
"""16-byte timestamp block structure"""
event_size_struct: bytes # 8 bytes: 4 bytes unparsed BE + 2 bytes event size + 2 bytes undefined
timecode: bytes # 8 bytes: 2 bytes unparsed + 2 bytes event code A + 2 bytes delta time + 2 bytes event code B
@property
def flag(self) -> int:
"""Extract flag from the first byte of event_size_struct"""
return self.event_size_struct[0]
@property
def tempo_data(self) -> int:
"""Extract tempo data from the lower 3 bytes (Big Endian)"""
# Take bytes 1-3 and treat as Big Endian 24-bit value
return struct.unpack('>I', b'\x00' + self.event_size_struct[1:4])[0]
@property
def event_size(self) -> int:
"""Extract event size from the structure"""
return struct.unpack('>H', self.event_size_struct[4:6])[0]
@property
def event_code_a(self) -> int:
"""Extract event code A from timecode"""
return struct.unpack('>H', self.timecode[2:4])[0]
@property
def delta_time(self) -> int:
"""Extract delta time from timecode"""
return struct.unpack('>H', self.timecode[4:6])[0]
@property
def event_code_b(self) -> int:
"""Extract event code B from timecode"""
return struct.unpack('>H', self.timecode[6:8])[0]
def get_tempo_microseconds(self) -> int:
"""Get tempo data in microseconds per quarter note for SMF"""
# tempo_data is already in the correct SMF internal units
return self.tempo_data
@dataclass
class MIDIEvent:
"""MIDI event with timing information"""
delta_time: int
absolute_tick: int # Added: absolute tick position
event_type: int
channel: int
data: bytes
raw_bytes: bytes
file_order: int = 0 # Added: maintain original order from file
@property
def note_name(self) -> str:
"""Get note name for NoteOn/NoteOff events (e.g., C4, D#5)"""
if self.event_type not in [0x8, 0x9]: # Note Off, Note On
return ""
if len(self.data) < 1:
return ""
note_number = self.data[0]
if note_number < 0 or note_number > 127:
return f"Note {note_number} (Invalid)"
# Define note names for each octave
notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
# Calculate octave and note name
octave = note_number // 12
note_index = note_number % 12
note_name = notes[note_index]
return f"{note_name}{octave}"
@property
def pitch(self) -> int:
"""Get pitch (note number) for NoteOn/NoteOff events"""
if self.event_type not in [0x8, 0x9]: # Note Off, Note On
return -1
if len(self.data) < 1:
return -1
return self.data[0]
@property
def velocity(self) -> int:
"""Get velocity for NoteOn/NoteOff events"""
if self.event_type not in [0x8, 0x9]: # Note Off, Note On
return -1
if len(self.data) < 2:
return -1
return self.data[1]
class TMPFileAnalyzer:
"""Analyzer for .tmp files with custom MIDI format"""
def __init__(self, file_path: str):
self.file_path = file_path
self.data = None
self.header = None
self.timestamp_blocks = []
self.midi_events = []
self.midi_data_start = 0
# Initialize running status variables
self._last_event_type = None
self._last_channel = None
self._event_counter = 0 # Added: counter for maintaining file order
def load_file(self):
"""Load the .tmp file into memory"""
try:
with open(self.file_path, 'rb') as f:
self.data = f.read()
logger.info(f"Loaded file: {self.file_path} ({len(self.data)} bytes)")
return True
except Exception as e:
logger.error(f"Failed to load file: {e}")
return False
def analyze_header(self) -> bool:
"""Analyze the fixed-size header (0x78 bytes)"""
if not self.data or len(self.data) < 0x78:
logger.error(f"File too small: {len(self.data)} < 0x78 bytes")
return False
try:
self.header = TMPHeader()
logger.info(f"Header size: 0x{self.header.header_size:02X} bytes")
logger.info(f"Timestamp blocks start at: 0x{self.header.timestamp_start:02X}")
return True
except Exception as e:
logger.error(f"Header analysis failed: {e}")
return False
def parse_timestamp_blocks(self) -> bool:
"""Parse timestamp blocks until event code B becomes 0x0003"""
if not self.header:
logger.error("Timestamp blocks not parsed yet")
return False
try:
current_offset = self.header.timestamp_start
self.timestamp_blocks = []
block_count = 0
logger.info(f"Starting timestamp block parsing at offset 0x{current_offset:04X}")
while current_offset + 16 <= len(self.data):
# Read 16-byte timestamp block
block_data = self.data[current_offset:current_offset + 16]
# Parse the block
event_size_struct = block_data[:8]
timecode = block_data[8:16]
block = TMPTimestampBlock(
event_size_struct=event_size_struct,
timecode=timecode
)
# Check if this is the end marker (event code B = 0x0003)
if block.event_code_b == 0x0003:
logger.info(f"Found end marker at offset 0x{current_offset:04X} (event code B = 0x0003)")
current_offset += 16
# Skip 4 bytes of unparsed data after the end marker
current_offset += 4
logger.info(f"Skipped 4 bytes after end marker, now at offset 0x{current_offset:04X}")
break
self.timestamp_blocks.append(block)
block_count += 1
logger.info(f"Processing timestamp block {block_count} at offset 0x{current_offset:02X}: "
f"flag=0x{block.flag:02X}, "
f"tempo_data={block.tempo_data} μs/qn, "
f"event_size={block.event_size}, "
f"event_code_a=0x{block.event_code_a:04X}, "
f"delta_time={block.delta_time}, "
f"event_code_b=0x{block.event_code_b:04X}")
# Force termination after 10 blocks (commented out for testing)
# if block_count >= 10:
# logger.info(f"Reached limit of 10 blocks, forcing termination at offset 0x{current_offset:02X}")
# current_offset += 16
# break
current_offset += 16
# MIDI data starts after the timestamp blocks
self.midi_data_start = current_offset
logger.info(f"Parsed {len(self.timestamp_blocks)} timestamp blocks")
logger.info(f"MIDI data starts at offset 0x{self.midi_data_start:04X}")
return True
except Exception as e:
logger.error(f"Timestamp block parsing failed: {e}")
return False
def parse_midi_events(self) -> bool:
"""Parse MIDI events using the timestamp information"""
if not self.timestamp_blocks:
logger.error("Timestamp blocks not parsed yet")
return False
try:
midi_data = self.data[self.midi_data_start:]
self.midi_events = []
current_pos = 0
event_index = 0
current_tick = 0 # Track current tick position
for i, block in enumerate(self.timestamp_blocks):
# Parse events for this timestamp block
bytes_to_parse = block.event_size # Bytes to read for this block
bytes_parsed = 0
events_in_block = 0
logger.debug(f"Processing timestamp block {i}: {bytes_to_parse} bytes, delta_time={block.delta_time}")
logger.info(f"Starting timestamp block {i} at MIDI data position 0x{current_pos:04X} (file offset: 0x{self.midi_data_start + current_pos:04X})")
logger.info(f"Current tick position: {current_tick}")
# Parse events within the specified byte count for this block
while bytes_parsed < bytes_to_parse and current_pos < len(midi_data):
if current_pos >= len(midi_data):
logger.warning(f"Reached end of MIDI data while parsing block {i}")
break
# Parse MIDI event
event = self._parse_midi_event(midi_data, current_pos)
if not event:
logger.error(f"Failed to parse MIDI event at position {current_pos}")
logger.error(f"Aborting due to corrupted MIDI data at offset 0x{self.midi_data_start + current_pos:04X}")
return False
# Set delta time and absolute tick position
if events_in_block == 0:
event.delta_time = block.delta_time
event.absolute_tick = current_tick
else:
event.delta_time = 0 # Events in same group have 0 delta time
event.absolute_tick = current_tick # Same absolute_tick for all events in this block
self.midi_events.append(event)
event_bytes = len(event.raw_bytes)
bytes_parsed += event_bytes
current_pos += event_bytes
events_in_block += 1
logger.debug(f"Parsed event {event_index}: type=0x{event.event_type:02X}, "
f"channel={event.channel}, delta={event.delta_time}, absolute_tick={event.absolute_tick}, "
f"bytes={event_bytes}, file_offset=0x{self.midi_data_start + current_pos - event_bytes:04X}")
event_index += 1
logger.info(f"Timestamp block {i}: parsed {bytes_parsed}/{bytes_to_parse} bytes, {events_in_block} events")
logger.info(f"MIDI data cursor advanced to position 0x{current_pos:04X} (file offset: 0x{self.midi_data_start + current_pos:04X})")
logger.info(f"Tick position for this block: {current_tick}")
# Log the events assigned to this timestamp block
block_start_index = len(self.midi_events) - events_in_block
block_end_index = len(self.midi_events)
logger.info(f" Events for timestamp block {i} (delta_time={block.delta_time}):")
for j in range(block_start_index, block_end_index):
event = self.midi_events[j]
event_type_name = self._get_event_type_name(event.event_type)
# Calculate file offset for this event
event_file_offset = self.midi_data_start + (j - block_start_index) * len(event.raw_bytes)
# Build detailed event info
event_info = f" Event {j}: {event_type_name} (type=0x{event.event_type:02X}, "
event_info += f"channel={event.channel}, delta_time={event.delta_time}, absolute_tick={event.absolute_tick}, "
event_info += f"file_offset=0x{event_file_offset:04X}, file_order={event.file_order}"
# Add note-specific information for NoteOn/NoteOff events
if event.event_type in [0x8, 0x9]: # Note Off, Note On
if event.note_name:
event_info += f", pitch={event.pitch} ({event.note_name})"
if event.velocity >= 0:
event_info += f", velocity={event.velocity}"
# Special case: Note On with velocity 0 is actually Note Off
if event.event_type == 0x9 and event.velocity == 0:
event_info += " [Note Off]"
# Add Meta Event specific information
elif event.event_type == 0xF and event.channel == 0: # Meta Event
if len(event.data) >= 1:
meta_type = event.data[0]
meta_name = self._get_meta_event_name(meta_type)
event_info += f", meta_type={meta_name}"
if meta_type == 0x51 and len(event.data) >= 4: # Tempo
tempo_microseconds = struct.unpack('>I', b'\x00' + event.data[1:4])[0]
tempo_bpm = int(60000000 / tempo_microseconds) if tempo_microseconds > 0 else 0
event_info += f", tempo={tempo_bpm} BPM"
event_info += f", data={event.data.hex()})"
logger.info(event_info)
# Advance tick position for the next timestamp block
current_tick += block.delta_time
logger.info(f"Tick position advanced to: {current_tick} (for next block)")
logger.info(f"Total MIDI events parsed: {len(self.midi_events)}")
logger.info(f"Final MIDI data position: 0x{current_pos:04X} (file offset: 0x{self.midi_data_start + current_pos:04X})")
logger.info(f"Final tick position: {current_tick}")
# Detect tempo changes and add tempo Meta Events
logger.info("Detecting tempo changes...")
self._detect_tempo_changes()
logger.info(f"Total events after tempo detection: {len(self.midi_events)}")
return True
except Exception as e:
logger.error(f"MIDI event parsing failed: {e}")
return False
def _parse_midi_event(self, data: bytes, offset: int) -> Optional[MIDIEvent]:
"""Parse a single MIDI event from the data"""
if offset >= len(data):
return None
try:
# Get event byte
event_byte = data[offset]
# Check if this is a status byte (0x80-0xFF) or data byte (0x00-0x7F)
if event_byte < 0x80:
# This is a data byte, meaning running status
# We need the previous event type and channel
if not hasattr(self, '_last_event_type') or not hasattr(self, '_last_channel'):
logger.error(f"Running status detected but no previous event at offset 0x{offset:04X}")
return None
event_type = self._last_event_type
channel = self._last_channel
data_start = offset # Data starts immediately
else:
# This is a status byte
if event_byte == 0xFF: # Meta Event
event_type = 0xFF
channel = 0 # Meta events use channel 0
else:
event_type = (event_byte >> 4) & 0x0F
channel = event_byte & 0x0F
data_start = offset + 1 # Data starts after status byte
# Store for running status (only for MIDI events, not meta events)
if event_type != 0xFF:
self._last_event_type = event_type
self._last_channel = channel
# Determine event data length
if event_type == 0x8: # Note Off
data_length = 2
elif event_type == 0x9: # Note On
data_length = 2
elif event_type == 0xA: # Polyphonic Key Pressure
data_length = 2
elif event_type == 0xB: # Control Change
data_length = 2
elif event_type == 0xC: # Program Change
data_length = 1
elif event_type == 0xD: # Channel Pressure
data_length = 1
elif event_type == 0xE: # Pitch Bend
data_length = 2
elif event_type == 0xF: # System Message
if channel == 0: # System Exclusive
# Find end of SysEx
end_pos = data_start
while end_pos < len(data) and data[end_pos] != 0xF7:
end_pos += 1
if end_pos >= len(data):
logger.error(f"SysEx event not terminated with 0xF7 at offset 0x{offset:04X}")
return None
data_length = end_pos - data_start + 1
elif channel == 0xF: # Meta Event (0xFF)
# Meta events have variable length, first byte after FF is meta type
if data_start >= len(data):
logger.error(f"Meta event truncated at offset 0x{offset:04X}")
return None
meta_type = data[data_start]
# Determine meta event length based on type
if meta_type == 0x00: # Sequence Number
data_length = 2
elif meta_type == 0x01: # Text Event
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x02: # Copyright Notice
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x03: # Track Name
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x04: # Instrument Name
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x05: # Lyric
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x06: # Marker
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x07: # Cue Point
# Find null terminator
end_pos = data_start + 1
while end_pos < len(data) and data[end_pos] != 0x00:
end_pos += 1
data_length = end_pos - data_start
elif meta_type == 0x20: # MIDI Channel Prefix
data_length = 1
elif meta_type == 0x21: # MIDI Port
data_length = 1
elif meta_type == 0x2F: # End of Track
data_length = 0
elif meta_type == 0x51: # Set Tempo
data_length = 3
elif meta_type == 0x54: # SMPTE Offset
data_length = 5
elif meta_type == 0x58: # Time Signature
data_length = 4
elif meta_type == 0x59: # Key Signature
data_length = 2
elif meta_type == 0x7F: # Sequencer-Specific
# Variable length, read length byte
if data_start + 1 >= len(data):
logger.error(f"Sequencer-specific meta event truncated at offset 0x{offset:04X}")
return None
length_byte = data[data_start + 1]
data_length = length_byte
else:
# Unknown meta event, assume 0 length
data_length = 0
logger.warning(f"Unknown meta event type 0x{meta_type:02X} at offset 0x{offset:04X}")
else:
data_length = 1
else:
# Unknown event type
logger.error(f"Unknown MIDI event type 0x{event_type:02X} at offset 0x{offset:04X}")
return None
# Check if we have enough data
if data_start + data_length > len(data):
logger.error(f"MIDI event data truncated at offset 0x{offset:04X}, expected {data_length} bytes but only {len(data) - data_start} available")
return None
# Extract event data
event_data = data[data_start:data_start + data_length]
raw_bytes = data[offset:data_start + data_length]
# Validate data bytes (should be 0x00-0x7F for most events)
if event_type in [0x8, 0x9, 0xA, 0xB, 0xE]: # Events with 2 data bytes
if len(event_data) >= 2:
if event_data[0] > 0x7F or event_data[1] > 0x7F:
logger.error(f"Invalid MIDI data bytes at offset 0x{offset:04X}: {event_data.hex()}")
return None
elif event_type in [0xC, 0xD]: # Events with 1 data byte
if len(event_data) >= 1:
if event_data[0] > 0x7F:
logger.error(f"Invalid MIDI data byte at offset 0x{offset:04X}: {event_data.hex()}")
return None
# Increment file_order for each event
self._event_counter += 1
event = MIDIEvent(
delta_time=0, # Will be set later
absolute_tick=0, # Placeholder, will be updated later
event_type=event_type,
channel=channel,
data=event_data,
raw_bytes=raw_bytes,
file_order=self._event_counter
)
return event
except Exception as e:
logger.error(f"Event parsing failed at offset 0x{offset:04X}: {e}")
return None
def _get_event_type_name(self, event_type: int) -> str:
"""Get human-readable name for MIDI event type"""
event_names = {
0x8: "Note Off",
0x9: "Note On",
0xA: "Polyphonic Key Pressure",
0xB: "Control Change",
0xC: "Program Change",
0xD: "Channel Pressure",
0xE: "Pitch Bend",
0xF: "System Message"
}
return event_names.get(event_type, f"Unknown (0x{event_type:02X})")
def _get_meta_event_name(self, meta_type: int) -> str:
"""Get human-readable name for Meta Event type"""
meta_names = {
0x51: "Tempo",
0x2F: "End of Track",
0x58: "Time Signature",
0x59: "Key Signature",
0x00: "Sequence Number",
0x01: "Text",
0x02: "Copyright",
0x03: "Track Name",
0x04: "Instrument Name",
0x05: "Lyric",
0x06: "Marker",
0x07: "Cue Point"
}
return meta_names.get(meta_type, f"Meta (0x{meta_type:02X})")
def generate_smf(self, output_path: str) -> bool:
"""Generate Standard MIDI File from parsed data in Format 1 (multi-track)"""
if not self.midi_events:
logger.error("No MIDI events to write")
return False
try:
# Sort events by absolute tick position, then by priority within same tick
# Priority: Note Off (including Note On with velocity 0) first, then others, maintaining file order
def sort_key(event):
# Primary sort: absolute tick
# Secondary sort: Note Off first (0x8 or 0x9 with velocity 0), then others
# Tertiary sort: file order (to maintain original sequence)
is_note_off = (event.event_type == 0x8) or (event.event_type == 0x9 and event.velocity == 0)
return (event.absolute_tick, not is_note_off, event.file_order)
sorted_events = sorted(self.midi_events, key=sort_key)
# Separate events by channel and create tracks
tracks = {}
meta_events = []
for event in sorted_events:
if event.event_type == 0xFF: # Meta Event
meta_events.append(event)
else:
channel = event.channel
if channel not in tracks:
tracks[channel] = []
tracks[channel].append(event)
# Calculate delta times for each track
for track_events in tracks.values():
for i, event in enumerate(track_events):
if i == 0:
event.delta_time = event.absolute_tick
else:
event.delta_time = event.absolute_tick - track_events[i-1].absolute_tick
# Calculate delta times for meta events
for i, event in enumerate(meta_events):
if i == 0:
event.delta_time = event.absolute_tick
else:
event.delta_time = event.absolute_tick - meta_events[i-1].absolute_tick
logger.info(f"Separated {len(sorted_events)} events into {len(tracks)} channels + meta events")
logger.info(f"Tick range: {sorted_events[0].absolute_tick} to {sorted_events[-1].absolute_tick}")
# Log track information
for channel, track_events in tracks.items():
logger.info(f"Channel {channel}: {len(track_events)} events")
if track_events:
logger.info(f" First event: {self._get_event_type_name(track_events[0].event_type)} at tick {track_events[0].absolute_tick}")
logger.info(f" Last event: {self._get_event_type_name(track_events[-1].event_type)} at tick {track_events[-1].absolute_tick}")
with open(output_path, 'wb') as f:
# Write MThd chunk
self._write_mthd_chunk(f, len(tracks) + 1) # +1 for meta events track
# Write meta events track first
if meta_events:
self._write_mtrk_chunk(f, meta_events, "Meta Events")
# Write each channel track
for channel in sorted(tracks.keys()):
track_events = tracks[channel]
self._write_mtrk_chunk(f, track_events, f"Channel {channel}")
logger.info(f"SMF Format 1 file generated: {output_path}")
# Verify the generated file structure
if self._verify_smf_structure(output_path):
logger.info("SMF file structure verification passed")
else:
logger.warning("SMF file structure verification failed - file may have issues")
return True
except Exception as e:
logger.error(f"SMF generation failed: {e}")
return False
def _get_time_division(self) -> int:
"""Read time division from file offset 0x48 (2 bytes, Big Endian)"""
try:
if len(self.data) < 0x48 + 2:
logger.warning(f"File too short to read time division at offset 0x48, using default value 48")
return 48
time_division = struct.unpack('>H', self.data[0x48:0x48+2])[0]
logger.info(f"Read time division from file offset 0x48: {time_division}")
return time_division
except Exception as e:
logger.warning(f"Failed to read time division from offset 0x48: {e}, using default value 48")
return 48
def _write_mthd_chunk(self, f, track_count: int):
"""Write MIDI header chunk"""
# MThd signature
f.write(b'MThd')
# Header length (always 6)
f.write(struct.pack('>I', 6))
# Format type, number of tracks, time division
format_type = 1 # Multi-track format
time_division = self._get_time_division() # Read from file instead of hardcoded value
f.write(struct.pack('>HHH', format_type, track_count, time_division))
def _write_mtrk_chunk(self, f, events: List[MIDIEvent], track_name: str = ""):
"""Write MIDI track chunk"""
# MTrk signature
f.write(b'MTrk')
# Track length (placeholder, will update later)
track_start = f.tell()
f.write(struct.pack('>I', 0))
# Write track name if provided
if track_name:
# Write track name meta event at tick 0
f.write(b'\x00') # Delta time = 0
f.write(b'\xFF\x03') # Meta event: Track Name
name_bytes = track_name.encode('utf-8')
self._write_vlq(f, len(name_bytes))
f.write(name_bytes)
# Write events
for event in events:
# Write delta time as VLQ
self._write_vlq(f, event.delta_time)
# Write event data
f.write(event.raw_bytes)
# Write end of track
self._write_vlq(f, 0)
f.write(b'\xFF\x2F\x00')
# Update track length (track length is the number of bytes in the track data)
track_end = f.tell()
track_length = track_end - track_start - 4 # Subtract 4 for the length field itself
f.seek(track_start)
f.write(struct.pack('>I', track_length))
f.seek(track_end)
def _write_vlq(self, f, value: int):
"""Write variable-length quantity"""
if value < 0x80:
f.write(bytes([value]))
elif value < 0x4000:
f.write(bytes([0x80 | (value >> 7), value & 0x7F]))
elif value < 0x200000:
f.write(bytes([0x80 | (value >> 14), 0x80 | ((value >> 7) & 0x7F), value & 0x7F]))
else:
f.write(bytes([0x80 | (value >> 21), 0x80 | ((value >> 14) & 0x7F), 0x80 | ((value >> 7) & 0x7F), value & 0x7F]))
def _verify_smf_structure(self, file_path: str) -> bool:
"""Verify the basic structure of the generated SMF file"""
try:
with open(file_path, 'rb') as f:
# Read and verify MThd chunk
mthd_signature = f.read(4)
if mthd_signature != b'MThd':
logger.error(f"Invalid MThd signature: {mthd_signature.hex()}")
return False
header_length = struct.unpack('>I', f.read(4))[0]
if header_length != 6:
logger.error(f"Invalid header length: {header_length}")
return False
format_type, track_count, time_division = struct.unpack('>HHH', f.read(6))
logger.info(f"SMF Header - Format: {format_type}, Tracks: {track_count}, Time Division: {time_division}")
if format_type != 1:
logger.error(f"Expected Format 1, got: {format_type}")
return False
# Verify each track
for track_num in range(track_count):
mtrk_signature = f.read(4)
if mtrk_signature != b'MTrk':
logger.error(f"Invalid MTrk signature in track {track_num}: {mtrk_signature.hex()}")
return False
track_length = struct.unpack('>I', f.read(4))[0]
logger.info(f"Track {track_num}: length = {track_length} bytes")
# Skip track data
f.seek(track_length, 1)
logger.info("SMF file structure verification completed successfully")
return True
except Exception as e:
logger.error(f"SMF verification failed: {e}")
return False
def analyze_file(self) -> bool:
"""Complete analysis of the .tmp file"""
logger.info(f"Starting analysis of {self.file_path}")
if not self.load_file():
return False
if not self.analyze_header():
return False
if not self.parse_timestamp_blocks():
return False
if not self.parse_midi_events():
return False
logger.info("File analysis completed successfully")
return True
def export_to_smf(self, output_path: str) -> bool:
"""Export the parsed data to a Standard MIDI File"""
if not self.midi_events:
logger.error("No data to export")
return False
return self.generate_smf(output_path)
def _create_tempo_event(self, tempo_microseconds: int, absolute_tick: int) -> MIDIEvent:
"""Create a tempo Meta Event for SMF"""
# Meta Event: FF 51 03 [tempo in microseconds, 3 bytes, Big Endian]
tempo_bytes = struct.pack('>I', tempo_microseconds)[1:4] # Take last 3 bytes
meta_data = b'\xFF\x51\x03' + tempo_bytes
raw_bytes = meta_data
# Create MIDIEvent with Meta Event type
event = MIDIEvent(
delta_time=0, # Will be set later
absolute_tick=absolute_tick,
event_type=0xFF, # Meta Event (0xFF)
channel=0, # Meta events use channel 0
data=tempo_bytes,
raw_bytes=raw_bytes,
file_order=self._event_counter
)
self._event_counter += 1
return event
def _detect_tempo_changes(self) -> None:
"""Detect tempo changes in timestamp blocks and add tempo Meta Events"""
current_tempo = None
current_tick = 0
for i, block in enumerate(self.timestamp_blocks):
block_tempo = block.tempo_data
block_tick = current_tick
# Check if tempo has changed
if current_tempo is None or block_tempo != current_tempo:
if current_tempo is not None:
logger.info(f"Tempo change detected at block {i}: {current_tempo} -> {block_tempo} μs/qn (tick: {block_tick}, flag: 0x{block.flag:02X})")
else:
logger.info(f"Initial tempo set at block {i}: {block_tempo} μs/qn (tick: {block_tick}, flag: 0x{block.flag:02X})")
# Create tempo Meta Event
tempo_event = self._create_tempo_event(block.get_tempo_microseconds(), block_tick)
self.midi_events.append(tempo_event)
current_tempo = block_tempo
# Advance tick position for next block
current_tick += block.delta_time
if current_tempo is not None:
# Find the last block with this tempo to get its flag
last_block = None
for block in reversed(self.timestamp_blocks):
if block.tempo_data == current_tempo:
last_block = block
break
if last_block:
logger.info(f"Final tempo: {current_tempo} μs/qn (flag: 0x{last_block.flag:02X})")
else:
logger.info(f"Final tempo: {current_tempo} μs/qn")
def main():
parser = argparse.ArgumentParser(description='Extract MIDI data from .tmp files with custom format')
parser.add_argument('input_file', help='Input .tmp file path')
parser.add_argument('-o', '--output', help='Output SMF file path (default: input_name.mid)')
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose logging')
parser.add_argument('--debug', action='store_true', help='Enable debug logging')
parser.add_argument('-l', '--log-file', help='Log file path for detailed logging output')
args = parser.parse_args()
# Setup logging
log_level = 'DEBUG' if args.debug else ('INFO' if args.verbose else 'INFO')
setup_logging(args.log_file, log_level)
if args.log_file:
logger.info(f"Detailed logging enabled - output will be saved to: {args.log_file}")
# Test log file writing
logger.debug("Log file test message")
logger.info("Log file is working correctly")
# Set output path
if not args.output:
input_path = Path(args.input_file)
args.output = input_path.with_suffix('.mid')
# Analyze the file
analyzer = TMPFileAnalyzer(args.input_file)
if analyzer.analyze_file():
logger.info("File analysis completed successfully")
# Export to SMF
if analyzer.export_to_smf(args.output):
logger.info(f"Successfully exported {args.input_file} to {args.output}")
logger.info("SMF file is ready for playback")
else:
logger.error("Failed to export SMF file")
return 1
else:
logger.error("File analysis failed")
return 1
return 0
if __name__ == '__main__':
exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment