JazzJackrabbit Community Forums - View Single Post

InfamousStar · Oct 6, 2015, 08:13 AM

I wrote a Python script to extract the sound effects from Anims.j2a. Information on the Jazz2's sample format is scarce, so I ended up relying heavily on DJazz's PHP code. Anyway, here it is:

Code:

#!/usr/bin/env python2

import wave

from os import mkdir
from StringIO import StringIO
from struct import unpack
from zlib import decompress

# These functions return the next 8/16/32 bits of a file as a little endian int.
def readInt8(f): return unpack('<b', f.read(1))[0]
def readInt16(f): return unpack('<h', f.read(2))[0]
def readInt32(f): return unpack('<l', f.read(4))[0]


def parseAlibHeader(j2a):
    """Parses a j2a file's header, and returns a dict of the contents.
    
    Entries in returned dict:
    "magic number" -- 'ALIB'; identifies file format
    "unknown1" -- 0x00beba00; purpose unknown
    "header size" -- size of this header, in bytes
    "version" -- 0x0200; probably means v2.0
    "unknown2" -- 0x1808; purpose unknown
    "file size" -- obvious
    "crc32" -- CRC32 hash of file
    "set count" -- number of animation sets in file
    "set offsets" -- list containing the offsets for each set, in bytes
    """

    j2a.seek(0)

    header = {}
    header["magic number"] = j2a.read(4)
    header["unknown1"] = readInt32(j2a)
    header["header size"] = readInt32(j2a)
    header["version"] = readInt16(j2a)
    header["unknown2"] = readInt16(j2a)
    header["file size"] = readInt32(j2a)
    header["crc32"] = readInt32(j2a)
    header["set count"] = readInt32(j2a)
    header["set offsets"] = [readInt32(j2a) for i in range(header["set count"])]

    return header

def parseAnimHeader(j2a, offset):
    """Parses an animation set's header, and returns a dict of the contents.

    Entries in returned dict:
    "magic number" -- 'ANIM'; marks start of set
    "animation count" -- number of animations in this set
    "sample count" -- number of sound samples in set
    "frame count" -- total number of frames in all animations in set
    "prior sample count" -- total number of samples before this set
    "animation info",
    "frame info",
    "image data",
    "sample data" -- dicts detailing the size of each part of the set

    The rest of the set is compressed with zlib, so the last 4 entries above
    contain these entries:
    "compressed" -- actual size in file
    "uncompressed" -- size when uncompressed
    """

    j2a.seek(offset)

    header = {}
    header["magic number"] = j2a.read(4)
    header["animation count"] = readInt8(j2a)
    header["sample count"] = readInt8(j2a)
    header["frame count"] = readInt16(j2a)
    header["prior sample count"] = readInt32(j2a)

    for key in "animation info", "frame info", "image data", "sample data":
        header[key] = {
            "compressed": readInt32(j2a),
            "uncompressed":readInt32(j2a)
        }

    return header

def sampleDataOffset(setOffset, animHeader):
    """Returns the offset of an animation set's sample data."""

    offset = setOffset + 44  # animHeader is 44 bytes
    for key in "animation info", "frame info", "image data":
        offset += animHeader[key]["compressed"]

    return offset

def decompressInFile(f, offset, size):
    """Decompresses portion of file, returning result as a StringIO."""

    f.seek(offset)
    return StringIO(decompress(f.read(size)))

def parseSample(data, offset):
    """Parses a sample, returning a dict of the contents.

    Entries in returned dict:
    "size" -- combined size of all entries
    "chunk id" -- 'RIFF'; identifies chunk type
    "chunk size" -- size of everything below this, except "padding3"
    "format" -- 'AS  '; identifies format contained in RIFF
    "samp subchunk" -- dict containing format info
    "data" -- waveform data of sample
    "padding" -- 8 bytes of padding

    "samp subchunk" contains these entries:
    "id" -- 'SAMP'; identifies header
    "padding1" -- 48 bytes of padding (probably)
    "data size" -- size of data, in bytes
    "padding2" -- 8 bytes of padding
    "sample rate" -- samples per second
    """

    data.seek(offset)

    sample = {}
    sample["size"] = readInt32(data)
    sample["chunk id"] = data.read(4)
    sample["chunk size"] = readInt32(data)
    sample["format"] = data.read(4)

    samp = {}
    samp["id"] = data.read(4)
    samp["padding1"] = data.read(48)
    samp["data size"] = readInt32(data)
    samp["padding2"] = data.read(8)
    samp["sample rate"] = readInt32(data)

    sample["samp subchunk"] = samp

    sample["data"] = data.read(samp["data size"])
    sample["padding3"] = data.read(8)

    return sample

def parseSampleData(data, sampleCount):
    """Parses sample data, returning a list of samples.

    See parseSample for info on elements.
    """
    
    samples = []

    data.seek(0)

    # In theory, parsing one sample should put me at the start of the next.
    # Unfortunately, there's at least one place where this fails, so I have to
    # keep track of the offset, manually.
    offset = 0
    for i in range(sampleCount):
        samples.append(parseSample(data, offset))
        offset += samples[-1]["size"]

    return samples

def writeWav2(filename, sample):
    # todo: don't overwrite file if it already exists
    wavFile = wave.open(filename, 'w')

    # the 1st two are hardcoded until I find this info in the sample
    wavFile.setnchannels(1)
    wavFile.setsampwidth(2)
    wavFile.setframerate(sample["samp subchunk"]["sample rate"])

    wavFile.writeframes(sample["data"])

    wavFile.close()


def main():
    filename = "Anims.j2a"  # I can probably just leave this hardcoded

    # todo: throw error if file not found
    with open(filename, 'r') as j2a:
        alibHeader = parseAlibHeader(j2a)  # find animation sets
        # todo: verify file with checksum

        print("{} found.".format(filename))
        print("Total size: {} bytes".format(alibHeader["file size"]))
        print("{} animation sets found.\n".format(alibHeader["set count"]))

        # todo: don't crash if this exists already
        mkdir("sounds")
        
        # for each set
        for setNum, offset in enumerate(alibHeader["set offsets"]):
            animHeader = parseAnimHeader(j2a, offset)  # find sample data

            print("Decompressing data for set {}...".format(\
                    alibHeader["set offsets"].index(offset)))

            # find samples
            decompressed = decompressInFile(j2a,\
                    sampleDataOffset(offset, animHeader),\
                    animHeader["sample data"]["compressed"])
            samples = parseSampleData(decompressed, animHeader["sample count"])

            print("{} samples found. Writing to disk...".format(len(samples)))

            # write samples
            wavName = "sounds/sfx_{}_{}.wav"
            for sampleNum, sample in enumerate(samples):
                writeWav2(wavName.format(setNum, sampleNum), sample)

if __name__ == '__main__':
    main()