Implement WaveFileReader

This commit is contained in:
Daniel Wolf 2019-11-04 20:23:32 +01:00
parent a84c3a0146
commit 59845f9094
29 changed files with 868 additions and 0 deletions

View File

@ -0,0 +1,616 @@
package com.rhubarb_lip_sync.audio
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.io.EOFException
import java.io.FileNotFoundException
import java.io.IOException
import java.io.RandomAccessFile
import java.lang.RuntimeException
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.nio.charset.StandardCharsets
import java.nio.file.Path
/**
* Creates an [AudioSource] representing the specified WAVE file.
*
* @throws FileNotFoundException if the specified file does not exist.
* @throws InvalidWaveFileException if the specified file is not a valid WAVE file.
* @throws UnsupportedWaveFileException if the specified file cannot be read due to limitations in the reader.
*/
suspend fun createWaveFileReader(path: Path): AutoCloseableAudioSource = withContext(Dispatchers.IO) {
val file = RandomAccessFile(path.toString(), "r")
val waveFileInfo = getWaveFileInfo(LittleEndianReader(file))
return@withContext when (waveFileInfo.sampleFormat) {
SampleFormat.UInt8 -> UInt8WaveFileReader(waveFileInfo, file)
SampleFormat.Int16 -> Int16WaveFileReader(waveFileInfo, file)
SampleFormat.Int24 -> Int24WaveFileReader(waveFileInfo, file)
SampleFormat.Int32 -> Int32WaveFileReader(waveFileInfo, file)
SampleFormat.Float32 -> Float32WaveFileReader(waveFileInfo, file)
SampleFormat.Float64 -> Float64WaveFileReader(waveFileInfo, file)
}
}
/** Indicates that a file is not a valid WAVE file. */
class InvalidWaveFileException(details: String? = null)
: IOException("File is not a valid WAVE file.${details?.let { " $it" } ?: ""}")
/** Indicates that a WAVE file cannot be read due to limitations in the reader. */
class UnsupportedWaveFileException(details: String)
: IOException("WAVE file cannot be read. $details")
private abstract class AbstractWaveFileReader(
protected val waveFileInfo: WaveFileInfo,
private val file: RandomAccessFile
) : AutoCloseableAudioSource {
override val sampleRate: Int = waveFileInfo.frameRate
override val size: Int = waveFileInfo.frameCount
protected fun readByteBuffer(startFrameIndex: Int, endFrameIndex: Int): ByteBuffer {
if (startFrameIndex < 0 || startFrameIndex > endFrameIndex || endFrameIndex > waveFileInfo.frameCount) {
throw RuntimeException("Cannot read from frame $startFrameIndex to $endFrameIndex in ${waveFileInfo.frameCount}-frame file.")
}
val bufferSize = (endFrameIndex - startFrameIndex).toLong() * waveFileInfo.bytesPerFrame
if (bufferSize > Int.MAX_VALUE) {
throw RuntimeException("Cannot acquire $bufferSize-byte buffer.")
}
val buffer = ByteBuffer.allocate(bufferSize.toInt())
buffer.order(ByteOrder.LITTLE_ENDIAN)
file.channel.read(buffer, waveFileInfo.dataOffset + startFrameIndex * waveFileInfo.bytesPerFrame)
buffer.position(0)
return buffer
}
override fun close() {
file.close()
}
}
private class UInt8WaveFileReader(waveFileInfo: WaveFileInfo, file: RandomAccessFile) :
AbstractWaveFileReader(waveFileInfo, file)
{
override suspend fun getSamples(start: Int, end: Int): SampleArray = withContext(Dispatchers.IO) {
val sampleBuffer = readByteBuffer(start, end)
val frameCount = end - start
val channelCount = waveFileInfo.channelCount
val result = SampleArray(frameCount)
val factor = 1.0f / 0x80 / channelCount
for (frameIndex in 0 until frameCount) {
var frameSum = 0.0f
repeat (channelCount) {
frameSum += sampleBuffer.getUByte().toInt() - 0x80
}
val frame = frameSum * factor
result[frameIndex] = frame
}
return@withContext result
}
}
private class Int16WaveFileReader(waveFileInfo: WaveFileInfo, file: RandomAccessFile) :
AbstractWaveFileReader(waveFileInfo, file)
{
override suspend fun getSamples(start: Int, end: Int): SampleArray = withContext(Dispatchers.IO) {
val sampleBuffer = readByteBuffer(start, end).asShortBuffer()
val frameCount = end - start
val channelCount = waveFileInfo.channelCount
val result = SampleArray(frameCount)
val factor = 1.0f / 0x8000 / channelCount
for (frameIndex in 0 until frameCount) {
var frameSum = 0.0f
repeat (channelCount) {
frameSum += sampleBuffer.get()
}
val frame = frameSum * factor
result[frameIndex] = frame
}
return@withContext result
}
}
private class Int24WaveFileReader(waveFileInfo: WaveFileInfo, file: RandomAccessFile) :
AbstractWaveFileReader(waveFileInfo, file)
{
override suspend fun getSamples(start: Int, end: Int): SampleArray = withContext(Dispatchers.IO) {
val sampleBuffer = readByteBuffer(start, end)
val frameCount = end - start
val channelCount = waveFileInfo.channelCount
val result = SampleArray(frameCount)
val factor = 1.0f / 0x800000 / channelCount
for (frameIndex in 0 until frameCount) {
var frameSum = 0.0f
repeat (channelCount) {
frameSum += sampleBuffer.getInt24()
}
val frame = frameSum * factor
result[frameIndex] = frame
}
return@withContext result
}
}
private class Int32WaveFileReader(waveFileInfo: WaveFileInfo, file: RandomAccessFile) :
AbstractWaveFileReader(waveFileInfo, file)
{
override suspend fun getSamples(start: Int, end: Int): SampleArray = withContext(Dispatchers.IO) {
val sampleBuffer = readByteBuffer(start, end).asIntBuffer()
val frameCount = end - start
val channelCount = waveFileInfo.channelCount
val result = SampleArray(frameCount)
val factor = 1.0f / 0x80000000 / channelCount
for (frameIndex in 0 until frameCount) {
var frameSum = 0.0f
repeat (channelCount) {
frameSum += sampleBuffer.get()
}
val frame = frameSum * factor
result[frameIndex] = frame
}
return@withContext result
}
}
private class Float32WaveFileReader(waveFileInfo: WaveFileInfo, file: RandomAccessFile) :
AbstractWaveFileReader(waveFileInfo, file)
{
override suspend fun getSamples(start: Int, end: Int): SampleArray = withContext(Dispatchers.IO) {
val sampleBuffer = readByteBuffer(start, end).asFloatBuffer()
val frameCount = end - start
val channelCount = waveFileInfo.channelCount
val result = SampleArray(frameCount)
val factor = 1.0f / channelCount
for (frameIndex in 0 until frameCount) {
var frameSum = 0.0f
repeat (channelCount) {
frameSum += sampleBuffer.get()
}
val frame = frameSum * factor
result[frameIndex] = frame
}
return@withContext result
}
}
private class Float64WaveFileReader(waveFileInfo: WaveFileInfo, file: RandomAccessFile) :
AbstractWaveFileReader(waveFileInfo, file)
{
override suspend fun getSamples(start: Int, end: Int): SampleArray = withContext(Dispatchers.IO) {
val sampleBuffer = readByteBuffer(start, end).asDoubleBuffer()
val frameCount = end - start
val channelCount = waveFileInfo.channelCount
val result = SampleArray(frameCount)
val factor = 1.0 / channelCount
for (frameIndex in 0 until frameCount) {
var frameSum = 0.0
repeat (channelCount) {
frameSum += sampleBuffer.get()
}
val frame = frameSum * factor
result[frameIndex] = frame.toFloat()
}
return@withContext result
}
}
private enum class SampleFormat {
UInt8,
Int16,
Int24,
Int32,
Float32,
Float64
}
private data class WaveFileInfo(
val sampleFormat: SampleFormat,
val channelCount: Int,
// The number of frames per second, one frame consisting of one sample per channel
val frameRate: Int,
val frameCount: Int,
val bytesPerFrame: Int,
// The offset, in bytes, of the raw audio data within the file
val dataOffset: Long
)
private fun getWaveFileInfo(reader: LittleEndianReader): WaveFileInfo {
try {
val rootChunkId = reader.readFourCC()
if (rootChunkId != "RIFF") {
throw InvalidWaveFileException()
}
val rootChunkSize: Long = reader.readDWord()
val expectedFileSize: Long = reader.position + rootChunkSize
if (reader.size < expectedFileSize) {
throw InvalidWaveFileException("File is incomplete.")
}
val waveId = reader.readFourCC()
if (waveId != "WAVE") {
throw InvalidWaveFileException("File format is not WAVE, but $waveId.")
}
data class FormatInfo(val sampleFormat: SampleFormat, val channelCount: Int, val frameRate: Int, val bytesPerFrame: Int)
data class DataInfo(val dataOffset: Long, val dataByteCount: Long)
var formatInfo: FormatInfo? = null
var dataInfo: DataInfo? = null
while (reader.position < expectedFileSize && (formatInfo == null || dataInfo == null)) {
val chunkId = reader.readFourCC()
val chunkSize: Long = reader.readDWord()
val chunkEnd: Long = roundUpToEven(reader.position + chunkSize)
when (chunkId) {
"fmt " -> {
// Format chunk
var codec = Codec(reader.readWord())
val channelCount = reader.readWord().also {
// This is no technical limitation, just common sense.
val maxChannelCount = 6
if (it > maxChannelCount) {
throw UnsupportedWaveFileException("Channel count $it exceeds maximum value of $maxChannelCount.")
}
}
val frameRate = reader.readDWord().let { longFrameRate ->
// Technically, it would be enough to check that the frame rate doesn't
// exceed `Int.MAX_VALUE`.
val maxSupportedFrameRate = 192_000
if (longFrameRate > maxSupportedFrameRate) {
// Although it's technically the *frame* rate, use the more common term
// *sample* rate for error message.
throw UnsupportedWaveFileException("Sample rate $longFrameRate exceeds maximum of $maxSupportedFrameRate.")
}
return@let longFrameRate.toInt()
}
reader.skipBytes(4) // Skip bytes per second
val bytesPerFrame = reader.readWord()
val bitsPerSampleOnDisk = reader.readWord()
var bitsPerSample = bitsPerSampleOnDisk
if (chunkSize > 16) {
val extensionSize = reader.readWord()
if (extensionSize >= 22) {
// Read extension fields
bitsPerSample = reader.readWord()
reader.skipBytes(4) // Skip channel mask
val codecOverride = Codec(reader.readWord())
if (codec == Codec.Extensible) {
codec = codecOverride
}
}
}
val (sampleFormat, bytesPerSample) = when (codec) {
Codec.Pcm -> when (bitsPerSample) {
8 -> Pair(SampleFormat.UInt8, 1)
16 -> Pair(SampleFormat.Int16, 2)
24 -> Pair(SampleFormat.Int24, 3)
32 -> Pair(SampleFormat.Int32, 4)
else -> throw UnsupportedWaveFileException("Unsupported bit depth for $codec data: $bitsPerSample bits per sample.")
}
Codec.Float -> when (bitsPerSample) {
32 -> Pair(SampleFormat.Float32, 4)
64 -> Pair(SampleFormat.Float64, 8)
else -> throw UnsupportedWaveFileException("Unsupported bit depth for $codec data: $bitsPerSample bits per sample.")
}
else -> throw UnsupportedWaveFileException(
"Unsupported audio codec: $codec. Only the uncompressed codecs ${Codec.Pcm} and ${Codec.Float} are supported.")
}
if (bytesPerFrame != bytesPerSample * channelCount) {
throw UnsupportedWaveFileException("Unsupported sample structure.")
}
formatInfo = FormatInfo(sampleFormat, channelCount, frameRate, bytesPerFrame)
}
"data" -> {
// Data chunk
val dataOffset: Long = reader.position
val dataByteCount = chunkSize
dataInfo = DataInfo(dataOffset, dataByteCount)
}
}
reader.position = chunkEnd
}
if (formatInfo == null) throw InvalidWaveFileException("Missing format chunk.")
if (dataInfo == null) throw InvalidWaveFileException("Missing data chunk.")
val frameCount: Long = dataInfo.dataByteCount / formatInfo.bytesPerFrame
if (frameCount > Int.MAX_VALUE) {
throw UnsupportedWaveFileException("Cannot read audio file with more than ${Int.MAX_VALUE} samples per channel.")
}
return WaveFileInfo(
formatInfo.sampleFormat,
formatInfo.channelCount,
formatInfo.frameRate,
frameCount.toInt(),
formatInfo.bytesPerFrame,
dataInfo.dataOffset
)
} catch (e: EOFException) {
throw InvalidWaveFileException("Unexpected end of file.")
}
}
private fun roundUpToEven(i: Long) = (i + 1) and 1.inv()
class LittleEndianReader(private val file: RandomAccessFile) {
private fun readByte(): Int = file.readUnsignedByte()
/** Reads an unsigned 16-bit word and returns it as an Int. */
fun readWord(): Int = readByte() or (readByte() shl 8)
/** Reads an unsigned 32-bit double-word and returns it as a Long. */
fun readDWord(): Long =
(readByte() or (readByte() shl 8) or (readByte() shl 16) or (readByte() shl 24)).toUInt().toLong()
fun readFourCC(): String {
val bytes = ByteArray(4)
file.readFully(bytes)
return String(bytes, StandardCharsets.US_ASCII)
}
val size: Long = file.length()
var position: Long
get() = file.filePointer
set(value) = file.seek(value)
fun skipBytes(byteCount: Int) = file.skipBytes(byteCount)
}
private fun ByteBuffer.getUByte() = this.get().toUByte()
private fun ByteBuffer.getInt24() =
((getUByte().toInt() shl 8) or (getUByte().toInt() shl 16) or (getUByte().toInt() shl 24)) shr 8
private inline class Codec(val value: Int) {
companion object {
val Pcm = Codec(0x0001)
val Float = Codec(0x0003)
val Extensible = Codec(0xfffe)
}
override fun toString() = when (value) {
0x0001 -> "PCM"
0x0002 -> "Microsoft ADPCM"
0x0003 -> "IEEE Float"
0x0004 -> "Compaq VSELP"
0x0005 -> "IBM CVSD"
0x0006 -> "Microsoft A-law"
0x0007 -> "Microsoft µ-law"
0x0008 -> "Microsoft DTS"
0x0009 -> "DRM"
0x000a -> "WMA 9 Speech"
0x000b -> "Microsoft Windows Media RT Voice"
0x0010 -> "OKI-ADPCM"
0x0011 -> "Intel IMA/DVI-ADPCM"
0x0012 -> "Videologic Mediaspace ADPCM"
0x0013 -> "Sierra ADPCM"
0x0014 -> "Antex G.723 ADPCM"
0x0015 -> "DSP Solutions DIGISTD"
0x0016 -> "DSP Solutions DIGIFIX"
0x0017 -> "Dialoic OKI ADPCM"
0x0018 -> "Media Vision ADPCM"
0x0019 -> "HP CU"
0x001a -> "HP Dynamic Voice"
0x0020 -> "Yamaha ADPCM"
0x0021 -> "SONARC Speech Compression"
0x0022 -> "DSP Group True Speech"
0x0023 -> "Echo Speech Corp."
0x0024 -> "Virtual Music Audiofile AF36"
0x0025 -> "Audio Processing Tech."
0x0026 -> "Virtual Music Audiofile AF10"
0x0027 -> "Aculab Prosody 1612"
0x0028 -> "Merging Tech. LRC"
0x0030 -> "Dolby AC2"
0x0031 -> "Microsoft GSM610"
0x0032 -> "MSN Audio"
0x0033 -> "Antex ADPCME"
0x0034 -> "Control Resources VQLPC"
0x0035 -> "DSP Solutions DIGIREAL"
0x0036 -> "DSP Solutions DIGIADPCM"
0x0037 -> "Control Resources CR10"
0x0038 -> "Natural MicroSystems VBX ADPCM"
0x0039 -> "Crystal Semiconductor IMA ADPCM"
0x003a -> "Echo Speech ECHOSC3"
0x003b -> "Rockwell ADPCM"
0x003c -> "Rockwell DIGITALK"
0x003d -> "Xebec Multimedia"
0x0040 -> "Antex G.721 ADPCM"
0x0041 -> "Antex G.728 CELP"
0x0042 -> "Microsoft MSG723"
0x0043 -> "IBM AVC ADPCM"
0x0045 -> "ITU-T G.726"
0x0050 -> "Microsoft MPEG"
0x0051 -> "RT23 or PAC"
0x0052 -> "InSoft RT24"
0x0053 -> "InSoft PAC"
0x0055 -> "MP3"
0x0059 -> "Cirrus"
0x0060 -> "Cirrus Logic"
0x0061 -> "ESS Tech. PCM"
0x0062 -> "Voxware Inc."
0x0063 -> "Canopus ATRAC"
0x0064 -> "APICOM G.726 ADPCM"
0x0065 -> "APICOM G.722 ADPCM"
0x0066 -> "Microsoft DSAT"
0x0067 -> "Micorsoft DSAT DISPLAY"
0x0069 -> "Voxware Byte Aligned"
0x0070 -> "Voxware AC8"
0x0071 -> "Voxware AC10"
0x0072 -> "Voxware AC16"
0x0073 -> "Voxware AC20"
0x0074 -> "Voxware MetaVoice"
0x0075 -> "Voxware MetaSound"
0x0076 -> "Voxware RT29HW"
0x0077 -> "Voxware VR12"
0x0078 -> "Voxware VR18"
0x0079 -> "Voxware TQ40"
0x007a -> "Voxware SC3"
0x007b -> "Voxware SC3"
0x0080 -> "Soundsoft"
0x0081 -> "Voxware TQ60"
0x0082 -> "Microsoft MSRT24"
0x0083 -> "AT&T G.729A"
0x0084 -> "Motion Pixels MVI MV12"
0x0085 -> "DataFusion G.726"
0x0086 -> "DataFusion GSM610"
0x0088 -> "Iterated Systems Audio"
0x0089 -> "Onlive"
0x008a -> "Multitude, Inc. FT SX20"
0x008b -> "Infocom ITS A/S G.721 ADPCM"
0x008c -> "Convedia G729"
0x008d -> "Not specified congruency, Inc."
0x0091 -> "Siemens SBC24"
0x0092 -> "Sonic Foundry Dolby AC3 APDIF"
0x0093 -> "MediaSonic G.723"
0x0094 -> "Aculab Prosody 8kbps"
0x0097 -> "ZyXEL ADPCM"
0x0098 -> "Philips LPCBB"
0x0099 -> "Studer Professional Audio Packed"
0x00a0 -> "Malden PhonyTalk"
0x00a1 -> "Racal Recorder GSM"
0x00a2 -> "Racal Recorder G720.a"
0x00a3 -> "Racal G723.1"
0x00a4 -> "Racal Tetra ACELP"
0x00b0 -> "NEC AAC NEC Corporation"
0x00ff -> "AAC"
0x0100 -> "Rhetorex ADPCM"
0x0101 -> "IBM u-Law"
0x0102 -> "IBM a-Law"
0x0103 -> "IBM ADPCM"
0x0111 -> "Vivo G.723"
0x0112 -> "Vivo Siren"
0x0120 -> "Philips Speech Processing CELP"
0x0121 -> "Philips Speech Processing GRUNDIG"
0x0123 -> "Digital G.723"
0x0125 -> "Sanyo LD ADPCM"
0x0130 -> "Sipro Lab ACEPLNET"
0x0131 -> "Sipro Lab ACELP4800"
0x0132 -> "Sipro Lab ACELP8V3"
0x0133 -> "Sipro Lab G.729"
0x0134 -> "Sipro Lab G.729A"
0x0135 -> "Sipro Lab Kelvin"
0x0136 -> "VoiceAge AMR"
0x0140 -> "Dictaphone G.726 ADPCM"
0x0150 -> "Qualcomm PureVoice"
0x0151 -> "Qualcomm HalfRate"
0x0155 -> "Ring Zero Systems TUBGSM"
0x0160 -> "Microsoft Audio1"
0x0161 -> "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio"
0x0162 -> "Windows Media Audio Professional V9"
0x0163 -> "Windows Media Audio Lossless V9"
0x0164 -> "WMA Pro over S/PDIF"
0x0170 -> "UNISYS NAP ADPCM"
0x0171 -> "UNISYS NAP ULAW"
0x0172 -> "UNISYS NAP ALAW"
0x0173 -> "UNISYS NAP 16K"
0x0174 -> "MM SYCOM ACM SYC008 SyCom Technologies"
0x0175 -> "MM SYCOM ACM SYC701 G726L SyCom Technologies"
0x0176 -> "MM SYCOM ACM SYC701 CELP54 SyCom Technologies"
0x0177 -> "MM SYCOM ACM SYC701 CELP68 SyCom Technologies"
0x0178 -> "Knowledge Adventure ADPCM"
0x0180 -> "Fraunhofer IIS MPEG2AAC"
0x0190 -> "Digital Theater Systems DTS DS"
0x0200 -> "Creative Labs ADPCM"
0x0202 -> "Creative Labs FASTSPEECH8"
0x0203 -> "Creative Labs FASTSPEECH10"
0x0210 -> "UHER ADPCM"
0x0215 -> "Ulead DV ACM"
0x0216 -> "Ulead DV ACM"
0x0220 -> "Quarterdeck Corp."
0x0230 -> "I-Link VC"
0x0240 -> "Aureal Semiconductor Raw Sport"
0x0241 -> "ESST AC3"
0x0250 -> "Interactive Products HSX"
0x0251 -> "Interactive Products RPELP"
0x0260 -> "Consistent CS2"
0x0270 -> "Sony SCX"
0x0271 -> "Sony SCY"
0x0272 -> "Sony ATRAC3"
0x0273 -> "Sony SPC"
0x0280 -> "TELUM Telum Inc."
0x0281 -> "TELUMIA Telum Inc."
0x0285 -> "Norcom Voice Systems ADPCM"
0x0300 -> "Fujitsu FM TOWNS SND"
0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308 -> "Fujitsu (not specified)"
0x0350 -> "Micronas Semiconductors, Inc. Development"
0x0351 -> "Micronas Semiconductors, Inc. CELP833"
0x0400 -> "Brooktree Digital"
0x0401 -> "Intel Music Coder (IMC)"
0x0402 -> "Ligos Indeo Audio"
0x0450 -> "QDesign Music"
0x0500 -> "On2 VP7 On2 Technologies"
0x0501 -> "On2 VP6 On2 Technologies"
0x0680 -> "AT&T VME VMPCM"
0x0681 -> "AT&T TCP"
0x0700 -> "YMPEG Alpha (dummy for MPEG-2 compressor)"
0x08ae -> "ClearJump LiteWave (lossless)"
0x1000 -> "Olivetti GSM"
0x1001 -> "Olivetti ADPCM"
0x1002 -> "Olivetti CELP"
0x1003 -> "Olivetti SBC"
0x1004 -> "Olivetti OPR"
0x1100 -> "Lernout & Hauspie"
0x1101 -> "Lernout & Hauspie CELP codec"
0x1102, 0x1103, 0x1104 -> "Lernout & Hauspie SBC codec"
0x1400 -> "Norris Comm. Inc."
0x1401 -> "ISIAudio"
0x1500 -> "AT&T Soundspace Music Compression"
0x181c -> "VoxWare RT24 speech codec"
0x181e -> "Lucent elemedia AX24000P Music codec"
0x1971 -> "Sonic Foundry LOSSLESS"
0x1979 -> "Innings Telecom Inc. ADPCM"
0x1c07 -> "Lucent SX8300P speech codec"
0x1c0c -> "Lucent SX5363S G.723 compliant codec"
0x1f03 -> "CUseeMe DigiTalk (ex-Rocwell)"
0x1fc4 -> "NCT Soft ALF2CD ACM"
0x2000 -> "FAST Multimedia DVM"
0x2001 -> "Dolby DTS (Digital Theater System)"
0x2002 -> "RealAudio 1 / 2 14.4"
0x2003 -> "RealAudio 1 / 2 28.8"
0x2004 -> "RealAudio G2 / 8 Cook (low bitrate)"
0x2005 -> "RealAudio 3 / 4 / 5 Music (DNET)"
0x2006 -> "RealAudio 10 AAC (RAAC)"
0x2007 -> "RealAudio 10 AAC+ (RACP)"
0x2500 -> "Reserved range to 0x2600 Microsoft"
0x3313 -> "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)"
0x4143 -> "Divio MPEG-4 AAC audio"
0x4201 -> "Nokia adaptive multirate"
0x4243 -> "Divio G726 Divio, Inc."
0x434c -> "LEAD Speech"
0x564c -> "LEAD Vorbis"
0x5756 -> "WavPack Audio"
0x674f -> "Ogg Vorbis (mode 1)"
0x6750 -> "Ogg Vorbis (mode 2)"
0x6751 -> "Ogg Vorbis (mode 3)"
0x676f -> "Ogg Vorbis (mode 1+)"
0x6770 -> "Ogg Vorbis (mode 2+)"
0x6771 -> "Ogg Vorbis (mode 3+)"
0x7000 -> "3COM NBX 3Com Corporation"
0x706d -> "FAAD AAC"
0x7a21 -> "GSM-AMR (CBR, no SID)"
0x7a22 -> "GSM-AMR (VBR, including SID)"
0xa100 -> "Comverse Infosys Ltd. G723 1"
0xa101 -> "Comverse Infosys Ltd. AVQSBC"
0xa102 -> "Comverse Infosys Ltd. OLDSBC"
0xa103 -> "Symbol Technologies G729A"
0xa104 -> "VoiceAge AMR WB VoiceAge Corporation"
0xa105 -> "Ingenient Technologies Inc. G726"
0xa106 -> "ISO/MPEG-4 advanced audio Coding"
0xa107 -> "Encore Software Ltd G726"
0xa109 -> "Speex ACM Codec xiph.org"
0xdfac -> "DebugMode SonicFoundry Vegas FrameServer ACM Codec"
0xf1ac -> "Free Lossless Audio Codec FLAC"
0xfffe -> "Extensible"
0xffff -> "Development"
else -> "0x%04X".format(value)
}
}

View File

@ -0,0 +1,248 @@
package com.rhubarb_lip_sync.audio
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.Assertions.assertThatCode
import org.assertj.core.api.Assertions.assertThatThrownBy
import org.assertj.core.api.Assertions.within
import org.spekframework.spek2.Spek
import org.spekframework.spek2.style.specification.describe
import java.io.FileNotFoundException
import java.io.IOException
import java.io.RandomAccessFile
import java.lang.Integer.max
import java.lang.RuntimeException
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
import java.nio.file.StandardCopyOption
import kotlin.math.PI
import kotlin.math.abs
import kotlin.math.sin
object WaveFileReaderTest: Spek({
describe("createWaveFileReader") {
it("throws FileNotFoundException if the file does not exist") {
assertThatThrownBy { runBlocking { createWaveFileReader(Paths.get("this file does not exist")) } }
.isInstanceOf(FileNotFoundException::class.java)
}
it("throws InvalidWaveFileException if the file has zero bytes") {
assertThatThrownBy { runBlocking { createWaveFileReader(getAudioFilePath("zero-bytes.wav")) } }
.isInstanceOf(InvalidWaveFileException::class.java)
.hasMessage("File is not a valid WAVE file. Unexpected end of file.")
}
it("throws InvalidWaveFileException if the file is incomplete") {
assertThatThrownBy { runBlocking { createWaveFileReader(getAudioFilePath("incomplete.wav")) } }
.isInstanceOf(InvalidWaveFileException::class.java)
.hasMessage("File is not a valid WAVE file. File is incomplete.")
}
it("throws InvalidWaveFileException if the file format does not match") {
assertThatThrownBy { runBlocking { createWaveFileReader(getAudioFilePath("sine-triangle.ogg")) } }
.isInstanceOf(InvalidWaveFileException::class.java)
.hasMessage("File is not a valid WAVE file.")
}
it("throws UnsupportedWaveFileException if the encoding is not supported") {
assertThatThrownBy { runBlocking { createWaveFileReader(getAudioFilePath("sine-triangle-flac-ffmpeg.wav")) } }
.isInstanceOf(UnsupportedWaveFileException::class.java)
.hasMessage("WAVE file cannot be read. Unsupported audio codec: Free Lossless Audio Codec FLAC. Only the uncompressed codecs PCM and IEEE Float are supported.")
}
it("throws UnsupportedWaveFileException if the codec is unknown") {
assertThatThrownBy { runBlocking { createWaveFileReader(getAudioFilePath("sine-triangle-vorbis-ffmpeg.wav")) } }
.isInstanceOf(UnsupportedWaveFileException::class.java)
.hasMessage("WAVE file cannot be read. Unsupported audio codec: 0x566F. Only the uncompressed codecs PCM and IEEE Float are supported.")
}
it("does not throw on valid file") {
assertThatCode {
runBlocking { createWaveFileReader(getAudioFilePath("sine-triangle-float32-ffmpeg.wav")) }
}.doesNotThrowAnyException()
}
it("throws on invalid zero-length file") {
// Zero-length files created by Adobe Audition are missing their data chunk
assertThatThrownBy {
runBlocking { createWaveFileReader(getAudioFilePath("zero-frames-audition.wav")) }
}.hasMessage("File is not a valid WAVE file. Missing data chunk.")
}
it("does not throw on valid zero-length file") {
assertThatCode {
runBlocking { createWaveFileReader(getAudioFilePath("zero-frames-ffmpeg.wav")) }
}.doesNotThrowAnyException()
}
listOf(
"sine-triangle-uint8-audition.wav",
"sine-triangle-uint8-ffmpeg.wav",
"sine-triangle-uint8-soundforge.wav",
"sine-triangle-int16-audacity.wav",
"sine-triangle-int16-audition.wav",
"sine-triangle-int16-ffmpeg.wav",
"sine-triangle-int16-soundforge.wav",
"sine-triangle-int24-audacity.wav",
"sine-triangle-int24-audition.wav",
"sine-triangle-int24-ffmpeg.wav",
"sine-triangle-int24-soundforge.wav",
"sine-triangle-int32-ffmpeg.wav",
"sine-triangle-int32-soundforge.wav",
"sine-triangle-float32-audacity.wav",
"sine-triangle-float32-audition.wav",
"sine-triangle-float32-ffmpeg.wav",
"sine-triangle-float32-soundforge.wav",
"sine-triangle-float64-ffmpeg.wav"
).forEach { fileName ->
val path = getAudioFilePath(fileName)
describe("reader for $fileName") {
describe("sampleRate") {
it("is 48000 Hz") {
val reader = runBlocking { createWaveFileReader(path) }
assertThat(reader.sampleRate).isEqualTo(48000)
}
}
val expectedSampleCount = 10 * 48000
describe("size") {
it("is $expectedSampleCount samples") {
val reader = runBlocking { createWaveFileReader(path) }
assertThat(reader.size).isEqualTo(expectedSampleCount)
}
}
val epsilon = when {
"uint8" in fileName -> 1.0f / 0x80
"int16" in fileName -> 1.0f / 0x8000
"int24" in fileName -> 1.0f / 0x800000
"int32" in fileName -> 1.0f / 0x80000000
"float" in fileName -> 0.0f
else -> throw RuntimeException("File name $fileName does not contain sample format information.")
}
describe("getSamples") {
it("throws if arguments are out of bounds") {
val reader = runBlocking { createWaveFileReader(path) }
assertThatThrownBy { runBlocking { reader.getSamples(-1, 1) } }
.hasMessage("Cannot read from frame -1 to 1 in 480000-frame file.")
assertThatThrownBy { runBlocking { reader.getSamples(10, 9) } }
.hasMessage("Cannot read from frame 10 to 9 in 480000-frame file.")
assertThatThrownBy { runBlocking { reader.getSamples(reader.size - 1, reader.size + 1) } }
.hasMessage("Cannot read from frame 479999 to 480001 in 480000-frame file.")
}
it("correctly reads the entire file") {
runBlocking {
val reader = createWaveFileReader(path)
val samples = reader.getSamples(0, reader.size)
for (sampleIndex in samples.indices) {
assertThat(samples[sampleIndex])
.describedAs("sample $sampleIndex")
.isCloseTo(getSineTriangleSample(sampleIndex), within(epsilon))
}
}
}
it("correctly reads overlapping blocks in parallel") {
runBlocking {
val reader = createWaveFileReader(path)
val blockCount = 5
val overlap = 10
coroutineScope {
for (i in 0 until blockCount) {
launch {
val start = max(0, reader.size * i / blockCount - overlap)
val end = reader.size * (i + 1) / blockCount
val samples = reader.getSamples(start, end)
for (sampleIndex in start until end ) {
assertThat(samples[sampleIndex -start])
.describedAs("sample $sampleIndex")
.isCloseTo(getSineTriangleSample(sampleIndex), within(epsilon))
}
}
}
}
}
}
it("allows reading zero samples") {
val reader = runBlocking { createWaveFileReader(path) }
for (sampleIndex in listOf(0, 10, expectedSampleCount - 1, expectedSampleCount)) {
assertThat(runBlocking { reader.getSamples(sampleIndex, sampleIndex) })
.isEqualTo(SampleArray(0))
}
}
}
describe("close") {
it("frees all file handles") {
// Open a temporary copy
val tempPath = createTempFile().toPath()
Files.copy(path, tempPath, StandardCopyOption.REPLACE_EXISTING)
runBlocking { createWaveFileReader(tempPath) }.use {
// File cannot be deleted while open
assertThatThrownBy { Files.delete(tempPath) }.isInstanceOf(IOException::class.java)
}
// File can be deleted once closed
assertThatCode { Files.delete(tempPath) }.doesNotThrowAnyException()
}
it("does not throw if invoked repeatedly") {
val reader = runBlocking { createWaveFileReader(path) }
repeat(3) {
assertThatCode { reader.close() }.doesNotThrowAnyException()
}
}
}
}
}
}
})
fun getAudioFilePath(pathSuffix: String): Path {
val resource = WaveFileReaderTest::class.java.classLoader.getResource("audio/WaveFileReader/$pathSuffix")
?: throw Exception("Could not locate resource $pathSuffix.")
return Paths.get(resource.toURI())
}
// Returns one downmixed sample of the test signal.
fun getSineTriangleSample(sampleIndex: Int): Float {
val (left, right) = getSineTriangleFrame(sampleIndex)
return (left + right) / 2.0f
}
// Returns the left and right sample of one frame of the test signal.
// The test signal consists of a 1 kHz sine wave (at 48 kHz sample rate) on the left channel and a
// 1 kHz triangle wave on the right channel.
fun getSineTriangleFrame(sampleIndex: Int): Pair<Float, Float> {
val period = 48 // Samples per period
val t: Float = sampleIndex.rem(period).toFloat() / period // [0, 1[
// The test signal contains a sine wave on one channel, a triangle wave on the other.
// Return the downmixed mono result.
val sine = sin(t * 2 * PI.toFloat())
val triangle = abs((t + 0.75f).rem(1f) * 4f - 2f) - 1f
return Pair(sine, triangle)
}
// Generates the raw test signal file from which all other files are derived.
// Output format is big-endian 32-bit IEEE floats, stereo, 48,000 Hz.
// FFmpeg command line: `ffmpeg -f f32be -ar 48000 -ac 2 -i sine-triangle.raw -acodec pcm_f32le sine-triangle-float32-ffmpeg.wav`
@Suppress("unused")
fun generateRawFile() {
RandomAccessFile("${System.getProperty("user.home")}/sine-triangle.raw", "rw").use { file ->
for (sampleIndex in 0 until 48 * 1000 * 10) {
val (left, right) = getSineTriangleFrame(sampleIndex)
file.writeFloat(left)
file.writeFloat(right)
}
}
}

View File

@ -0,0 +1,4 @@
This directory contains test files for the WAVE file reader.
All files starting with _sine-rect_ contain the same 10-second stereo signal sampled at 48,000 Hz. The left channel contains a 1 kHz sine wave, the right channel contains a 1 kHz triangle wave. As those signals are strictly periodic, Git can compress these files very efficiently.