I wrote a Python script to extract the sound effects from Anims.j2a. Information on the Jazz2's sample format is scarce, so I ended up relying heavily on DJazz's PHP code. Anyway, here it is:
Code:
#!/usr/bin/env python2
import wave
from os import mkdir
from StringIO import StringIO
from struct import unpack
from zlib import decompress
# These functions return the next 8/16/32 bits of a file as a little endian int.
def readInt8(f): return unpack('<b', f.read(1))[0]
def readInt16(f): return unpack('<h', f.read(2))[0]
def readInt32(f): return unpack('<l', f.read(4))[0]
def parseAlibHeader(j2a):
"""Parses a j2a file's header, and returns a dict of the contents.
Entries in returned dict:
"magic number" -- 'ALIB'; identifies file format
"unknown1" -- 0x00beba00; purpose unknown
"header size" -- size of this header, in bytes
"version" -- 0x0200; probably means v2.0
"unknown2" -- 0x1808; purpose unknown
"file size" -- obvious
"crc32" -- CRC32 hash of file
"set count" -- number of animation sets in file
"set offsets" -- list containing the offsets for each set, in bytes
"""
j2a.seek(0)
header = {}
header["magic number"] = j2a.read(4)
header["unknown1"] = readInt32(j2a)
header["header size"] = readInt32(j2a)
header["version"] = readInt16(j2a)
header["unknown2"] = readInt16(j2a)
header["file size"] = readInt32(j2a)
header["crc32"] = readInt32(j2a)
header["set count"] = readInt32(j2a)
header["set offsets"] = [readInt32(j2a) for i in range(header["set count"])]
return header
def parseAnimHeader(j2a, offset):
"""Parses an animation set's header, and returns a dict of the contents.
Entries in returned dict:
"magic number" -- 'ANIM'; marks start of set
"animation count" -- number of animations in this set
"sample count" -- number of sound samples in set
"frame count" -- total number of frames in all animations in set
"prior sample count" -- total number of samples before this set
"animation info",
"frame info",
"image data",
"sample data" -- dicts detailing the size of each part of the set
The rest of the set is compressed with zlib, so the last 4 entries above
contain these entries:
"compressed" -- actual size in file
"uncompressed" -- size when uncompressed
"""
j2a.seek(offset)
header = {}
header["magic number"] = j2a.read(4)
header["animation count"] = readInt8(j2a)
header["sample count"] = readInt8(j2a)
header["frame count"] = readInt16(j2a)
header["prior sample count"] = readInt32(j2a)
for key in "animation info", "frame info", "image data", "sample data":
header[key] = {
"compressed": readInt32(j2a),
"uncompressed":readInt32(j2a)
}
return header
def sampleDataOffset(setOffset, animHeader):
"""Returns the offset of an animation set's sample data."""
offset = setOffset + 44 # animHeader is 44 bytes
for key in "animation info", "frame info", "image data":
offset += animHeader[key]["compressed"]
return offset
def decompressInFile(f, offset, size):
"""Decompresses portion of file, returning result as a StringIO."""
f.seek(offset)
return StringIO(decompress(f.read(size)))
def parseSample(data, offset):
"""Parses a sample, returning a dict of the contents.
Entries in returned dict:
"size" -- combined size of all entries
"chunk id" -- 'RIFF'; identifies chunk type
"chunk size" -- size of everything below this, except "padding3"
"format" -- 'AS '; identifies format contained in RIFF
"samp subchunk" -- dict containing format info
"data" -- waveform data of sample
"padding" -- 8 bytes of padding
"samp subchunk" contains these entries:
"id" -- 'SAMP'; identifies header
"padding1" -- 48 bytes of padding (probably)
"data size" -- size of data, in bytes
"padding2" -- 8 bytes of padding
"sample rate" -- samples per second
"""
data.seek(offset)
sample = {}
sample["size"] = readInt32(data)
sample["chunk id"] = data.read(4)
sample["chunk size"] = readInt32(data)
sample["format"] = data.read(4)
samp = {}
samp["id"] = data.read(4)
samp["padding1"] = data.read(48)
samp["data size"] = readInt32(data)
samp["padding2"] = data.read(8)
samp["sample rate"] = readInt32(data)
sample["samp subchunk"] = samp
sample["data"] = data.read(samp["data size"])
sample["padding3"] = data.read(8)
return sample
def parseSampleData(data, sampleCount):
"""Parses sample data, returning a list of samples.
See parseSample for info on elements.
"""
samples = []
data.seek(0)
# In theory, parsing one sample should put me at the start of the next.
# Unfortunately, there's at least one place where this fails, so I have to
# keep track of the offset, manually.
offset = 0
for i in range(sampleCount):
samples.append(parseSample(data, offset))
offset += samples[-1]["size"]
return samples
def writeWav2(filename, sample):
# todo: don't overwrite file if it already exists
wavFile = wave.open(filename, 'w')
# the 1st two are hardcoded until I find this info in the sample
wavFile.setnchannels(1)
wavFile.setsampwidth(2)
wavFile.setframerate(sample["samp subchunk"]["sample rate"])
wavFile.writeframes(sample["data"])
wavFile.close()
def main():
filename = "Anims.j2a" # I can probably just leave this hardcoded
# todo: throw error if file not found
with open(filename, 'r') as j2a:
alibHeader = parseAlibHeader(j2a) # find animation sets
# todo: verify file with checksum
print("{} found.".format(filename))
print("Total size: {} bytes".format(alibHeader["file size"]))
print("{} animation sets found.\n".format(alibHeader["set count"]))
# todo: don't crash if this exists already
mkdir("sounds")
# for each set
for setNum, offset in enumerate(alibHeader["set offsets"]):
animHeader = parseAnimHeader(j2a, offset) # find sample data
print("Decompressing data for set {}...".format(\
alibHeader["set offsets"].index(offset)))
# find samples
decompressed = decompressInFile(j2a,\
sampleDataOffset(offset, animHeader),\
animHeader["sample data"]["compressed"])
samples = parseSampleData(decompressed, animHeader["sample count"])
print("{} samples found. Writing to disk...".format(len(samples)))
# write samples
wavName = "sounds/sfx_{}_{}.wav"
for sampleNum, sample in enumerate(samples):
writeWav2(wavName.format(setNum, sampleNum), sample)
if __name__ == '__main__':
main()
|