Skip to content

Instantly share code, notes, and snippets.

@spicyjpeg
Last active August 1, 2024 04:44
Show Gist options
  • Save spicyjpeg/a46d73d28780c626eb7646d285e0f301 to your computer and use it in GitHub Desktop.
Save spicyjpeg/a46d73d28780c626eb7646d285e0f301 to your computer and use it in GitHub Desktop.
MIDI loop unrolling tool for Beatnik MIDI files
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""MIDI loop unrolling tool for Beatnik MIDI files
A simple (and ridiculously specific) command-line tool to "unroll" looping MIDI
files into a regular full-length MIDI file. This is a *very* specific format
used by the Beatnik MIDI engine that allows for MIDI files to be "compressed" by
storing patterns rather than full tracks for each instrument, in a similar way
to how modern DAWs like FL Studio allow for a song to be arranged from patterns.
The way the format works is actually fairly trivial. Each pattern is stored as a
separate track in the MIDI file and starts with a bunch of CC messages
containing the playlist/order list, i.e. a list of timestamps the pattern should
be played at. In particular, CC85 is used to store the total duration of the
entire arrangement, while CC87 is used to add an entry to the current track's
order list. Once these events are extracted, unrolling the file is simply a
matter of copying each pattern's events and pasting them at each timestamp
listed in its order list.
The only thing I know of that actually supports and uses this format is WebTV
boxes, and even then I was only able to test this script on one file (namely,
the music that plays when the "Connecting to WebTV" screen is shown). The actual
reason I am publishing it is because the MIDI parsing/serialization classes are
fairly generic and can be used for other purposes; feel free to rip them off as
long as you give credit.
"""
__version__ = "0.1.0"
__author__ = "spicyjpeg"
from argparse import ArgumentParser, FileType
from struct import Struct
from enum import IntEnum
## Big-endian base 128 value reader/writer (used by MIDI classes)
def _parseBEB128(data, maxLength = 8):
value = 0
for offset in range(maxLength):
value <<= 7
value |= data[offset] & 0x7f
if not (data[offset] & 0x80):
return value, offset + 1
raise ValueError(f"value is larger than {maxLength} bytes")
def _toBEB128(value, maxLength = 8):
if value < 0:
raise ValueError("value must be positive")
data = bytearray()
for _ in range(maxLength):
data.insert(0, (value & 0x7f) | 0x80)
value >>= 7
if not value:
data[-1] &= 0x7f
return data
raise ValueError(f"value is larger than {maxLength} bytes")
## MIDI event class
class EventType(IntEnum):
NOTE_OFF = 0x8
NOTE_ON = 0x9
POLY_AFTERTOUCH = 0xa
CONTROLLER = 0xb
PROGRAM_CHANGE = 0xc
AFTERTOUCH = 0xd
PITCH_BEND = 0xe
SYSTEM_EVENT = 0xf
class SystemEventType(IntEnum):
SYSEX = 0x0
RAW_SYSEX = 0x7
META_EVENT = 0xf
class MetaEventType(IntEnum):
SEQUENCE_NUM = 0x00
TEXT = 0x01
COPYRIGHT = 0x02
TRACK_NAME = 0x03
INST_NAME = 0x04
LYRIC = 0x05
MARKER = 0x06
CUE_POINT = 0x07
CHANNEL_PREFIX = 0x20
END_OF_TRACK = 0x2f
TEMPO = 0x51
SMTPE_OFFSET = 0x54
TIME_SIGNATURE = 0x58
KEY_SIGNATURE = 0x59
OTHER = 0x7f
class MIDIEvent:
def __init__(self, timestamp, status, param):
self.timestamp = timestamp
self.channel = status & 0xf
self.eventType = EventType(status >> 4)
self.systemEventType = None
self.metaEventType = None
match self.eventType:
case EventType.SYSTEM_EVENT:
self.systemEventType = SystemEventType(self.channel)
match self.systemEventType:
case SystemEventType.SYSEX | SystemEventType.RAW_SYSEX:
dataLength, length = _parseBEB128(param)
self.length = dataLength + length
case SystemEventType.META_EVENT:
self.metaEventType = MetaEventType(param[0])
self.length = param[1] + 2
case EventType.PROGRAM_CHANGE | EventType.AFTERTOUCH:
self.length = 1
case _:
self.length = 2
self.param = bytes(param[0:self.length])
def getStatus(self):
return self.channel | (self.eventType << 4)
def copy(self, timestampOffset = 0):
return MIDIEvent(
self.timestamp + timestampOffset, self.getStatus(), self.param
)
def serialize(self, lastTimestamp = None):
if lastTimestamp is None:
data = bytearray()
else:
data = bytearray(_toBEB128(self.timestamp - lastTimestamp))
data.append(self.getStatus())
data.extend(self.param)
return data
## MIDI track data parser/generator
class MIDITrack:
def __init__(self, duration = 0):
self.duration = duration
self.events = []
def _getEndOfTrack(self):
return MIDIEvent(
self.duration,
SystemEventType.META_EVENT | (EventType.SYSTEM_EVENT << 4),
( MetaEventType.END_OF_TRACK, 0 )
)
def parse(self, data):
self.duration = 0
self.events = []
offset = 0
status = None
while offset < len(data):
delta, length = _parseBEB128(data[offset:])
self.duration += delta
offset += length
if data[offset] & 0x80:
status = data[offset]
offset += 1
elif status is None:
raise RuntimeError("running status is not available")
event = MIDIEvent(self.duration, status, data[offset:])
offset += event.length
if event.metaEventType == MetaEventType.END_OF_TRACK:
break
if event.eventType == EventType.SYSTEM_EVENT:
status = None
self.events.append(event)
def serialize(self, timestampOffset = 0):
data = bytearray()
lastTimestamp = timestampOffset
for event in self.events:
data.extend(event.serialize(lastTimestamp))
lastTimestamp = event.timestamp
data.extend(self._getEndOfTrack().serialize(lastTimestamp))
return data
## MIDI file parser/generator
_FILE_HEADER = Struct("> 4s I 3H")
_FILE_MAGIC = b"MThd"
_TRACK_HEADER = Struct("> 4s I")
_TRACK_MAGIC = b"MTrk"
class MIDIFile:
def __init__(self, formatType = 1, division = 0):
self.formatType = formatType
self.division = division
self.metaTrack = None
self.tracks = []
def parse(self, data):
magic, length, self.formatType, numTracks, self.division = \
_FILE_HEADER.unpack(data[0:_FILE_HEADER.size])
offset = _FILE_HEADER.size
if magic != _FILE_MAGIC:
raise RuntimeError(f"invalid file magic: {magic}")
if length != 6:
raise RuntimeError(f"invalid file header length: {length}")
self.tracks = []
for _ in range(numTracks):
magic, length = \
_TRACK_HEADER.unpack(data[offset:offset + _TRACK_HEADER.size])
offset += _TRACK_HEADER.size
if magic != _TRACK_MAGIC:
raise RuntimeError(f"invalid track magic: {magic}")
track = MIDITrack()
track.parse(data[offset:offset + length])
self.tracks.append(track)
offset += length
self.metaTrack = self.tracks.pop(0)
def serialize(self):
data = bytearray(_FILE_HEADER.pack(
_FILE_MAGIC, 6, self.formatType, len(self.tracks) + 1, self.division
))
for track in ( self.metaTrack, *self.tracks ):
trackData = track.serialize()
data.extend(_TRACK_HEADER.pack(_TRACK_MAGIC, len(trackData)))
data.extend(trackData)
return data
## Pattern parsing helper
class Pattern:
def __init__(self):
self.metaEvents = []
self.events = []
self.orderList = set()
class BeatnikCC(IntEnum):
ARRANGEMENT_DURATION = 85
ORDER_LIST_EXCLUDE = 86
ORDER_LIST_INCLUDE = 87
def _importPatterns(tracks, keepMarkers = False):
patterns = []
numPatterns = None
duration = 0
for track in tracks:
duration = max(duration, track.duration)
pattern = Pattern()
patterns.append(pattern)
for event in track.events:
# Filter out all meta events (track names and so on). These are
# going to be kept as-is in the generated tracks.
if event.metaEventType is not None:
pattern.metaEvents.append(event)
continue
if event.eventType == EventType.CONTROLLER:
controller, value = event.param
match controller:
case BeatnikCC.ARRANGEMENT_DURATION:
if numPatterns is None:
numPatterns = value + 1
elif numPatterns != (value + 1):
raise RuntimeError(
"conflicting arrangement duration values found"
)
if not keepMarkers:
continue
case BeatnikCC.ORDER_LIST_INCLUDE:
pattern.orderList.add(value)
if not keepMarkers:
continue
#case BeatnikCC.ORDER_LIST_EXCLUDE:
#if value in pattern.orderList:
#pattern.orderList.remove(value)
#if not keepMarkers:
#continue
pattern.events.append(event)
if numPatterns is None:
raise RuntimeError(
"no arrangement duration value found (file has no loop markers?)"
)
return patterns, numPatterns, duration
## Main
def _createParser():
parser = ArgumentParser(
description = \
"Unrolls a looping MIDI file containing Beatnik loop markers into "
"a regular MIDI file.",
add_help = False
)
group = parser.add_argument_group("Tool options")
group.add_argument(
"-h", "--help",
action = "help",
help = "Show this help message and exit"
)
group = parser.add_argument_group("Conversion options")
group.add_argument(
"-k", "--keep-markers",
action = "store_true",
help = "Do not remove loop markers from output file"
)
group.add_argument(
"-s", "--strip-meta",
action = "store_true",
help = "Remove all meta events (track names, etc.) from each track"
)
group = parser.add_argument_group("File paths")
group.add_argument(
"inputFile",
type = FileType("rb"),
help = "Path to input MIDI file"
)
group.add_argument(
"outputFile",
type = FileType("wb"),
help = "Path to unrolled MIDI file to generate"
)
return parser
def main():
parser = _createParser()
args = parser.parse_args()
midiFile = MIDIFile()
with args.inputFile as _file:
midiFile.parse(_file.read())
patterns, numPatterns, duration = _importPatterns(
midiFile.tracks, args.keep_markers
)
midiFile.metaTrack.duration = numPatterns * duration
for track, pattern in zip(midiFile.tracks, patterns):
track.events = []
track.duration = numPatterns * duration
# Copy all meta events over, then copy the pattern multiple times (one
# for each time this pattern is listed in the order list).
if not args.strip_meta:
for event in pattern.metaEvents:
track.events.append(event.copy())
for index in sorted(pattern.orderList):
for event in pattern.events:
track.events.append(event.copy(index * duration))
with args.outputFile as _file:
_file.write(midiFile.serialize())
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment