Indent code files with spaces rather than tabs

This commit is contained in:
Daniel Wolf 2024-12-09 08:31:59 +01:00
parent 71259421a9
commit b365c4c1d5
147 changed files with 8098 additions and 8096 deletions

View File

@ -14,17 +14,17 @@ add_subdirectory("extras/EsotericSoftwareSpine")
# Install misc. files # Install misc. files
install( install(
FILES README.adoc LICENSE.md CHANGELOG.md FILES README.adoc LICENSE.md CHANGELOG.md
DESTINATION . DESTINATION .
) )
# Configure CPack # Configure CPack
function(get_short_system_name variable) function(get_short_system_name variable)
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(${variable} "macOS" PARENT_SCOPE) set(${variable} "macOS" PARENT_SCOPE)
else() else()
set(${variable} "${CMAKE_SYSTEM_NAME}" PARENT_SCOPE) set(${variable} "${CMAKE_SYSTEM_NAME}" PARENT_SCOPE)
endif() endif()
endfunction() endfunction()
set(CPACK_PACKAGE_NAME ${appName}) set(CPACK_PACKAGE_NAME ${appName})

View File

@ -1,11 +1,11 @@
cmake_minimum_required(VERSION 3.2) cmake_minimum_required(VERSION 3.2)
set(afterEffectsFiles set(afterEffectsFiles
"Rhubarb Lip Sync.jsx" "Rhubarb Lip Sync.jsx"
"README.adoc" "README.adoc"
) )
install( install(
FILES ${afterEffectsFiles} FILES ${afterEffectsFiles}
DESTINATION "extras/AdobeAfterEffects" DESTINATION "extras/AdobeAfterEffects"
) )

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,18 @@
cmake_minimum_required(VERSION 3.2) cmake_minimum_required(VERSION 3.2)
add_custom_target( add_custom_target(
rhubarbForSpine ALL rhubarbForSpine ALL
"./gradlew" "build" "./gradlew" "build"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Building Rhubarb for Spine through Gradle." COMMENT "Building Rhubarb for Spine through Gradle."
) )
install( install(
DIRECTORY "build/libs/" DIRECTORY "build/libs/"
DESTINATION "extras/EsotericSoftwareSpine" DESTINATION "extras/EsotericSoftwareSpine"
) )
install( install(
FILES README.adoc FILES README.adoc
DESTINATION "extras/EsotericSoftwareSpine" DESTINATION "extras/EsotericSoftwareSpine"
) )

View File

@ -14,112 +14,112 @@ import tornadofx.setValue
import java.util.concurrent.ExecutorService import java.util.concurrent.ExecutorService
class AnimationFileModel(val parentModel: MainModel, animationFilePath: Path, private val executor: ExecutorService) { class AnimationFileModel(val parentModel: MainModel, animationFilePath: Path, private val executor: ExecutorService) {
val spineJson = SpineJson(animationFilePath) val spineJson = SpineJson(animationFilePath)
val slotsProperty = SimpleObjectProperty<ObservableList<String>>() val slotsProperty = SimpleObjectProperty<ObservableList<String>>()
private var slots: ObservableList<String> by slotsProperty private var slots: ObservableList<String> by slotsProperty
val mouthSlotProperty: SimpleStringProperty = SimpleStringProperty().alsoListen { val mouthSlotProperty: SimpleStringProperty = SimpleStringProperty().alsoListen {
val mouthSlot = this.mouthSlot val mouthSlot = this.mouthSlot
val mouthNaming = if (mouthSlot != null) val mouthNaming = if (mouthSlot != null)
MouthNaming.guess(spineJson.getSlotAttachmentNames(mouthSlot)) MouthNaming.guess(spineJson.getSlotAttachmentNames(mouthSlot))
else null else null
this.mouthNaming = mouthNaming this.mouthNaming = mouthNaming
mouthShapes = if (mouthSlot != null && mouthNaming != null) { mouthShapes = if (mouthSlot != null && mouthNaming != null) {
val mouthNames = spineJson.getSlotAttachmentNames(mouthSlot) val mouthNames = spineJson.getSlotAttachmentNames(mouthSlot)
MouthShape.values().filter { mouthNames.contains(mouthNaming.getName(it)) } MouthShape.values().filter { mouthNames.contains(mouthNaming.getName(it)) }
} else listOf() } else listOf()
mouthSlotError = if (mouthSlot != null) mouthSlotError = if (mouthSlot != null)
null null
else else
"No slot with mouth drawings specified." "No slot with mouth drawings specified."
} }
private var mouthSlot: String? by mouthSlotProperty private var mouthSlot: String? by mouthSlotProperty
val mouthSlotErrorProperty = SimpleStringProperty() val mouthSlotErrorProperty = SimpleStringProperty()
private var mouthSlotError: String? by mouthSlotErrorProperty private var mouthSlotError: String? by mouthSlotErrorProperty
val mouthNamingProperty = SimpleObjectProperty<MouthNaming>() val mouthNamingProperty = SimpleObjectProperty<MouthNaming>()
private var mouthNaming: MouthNaming? by mouthNamingProperty private var mouthNaming: MouthNaming? by mouthNamingProperty
val mouthShapesProperty = SimpleObjectProperty<List<MouthShape>>().alsoListen { val mouthShapesProperty = SimpleObjectProperty<List<MouthShape>>().alsoListen {
mouthShapesError = getMouthShapesErrorString() mouthShapesError = getMouthShapesErrorString()
} }
var mouthShapes: List<MouthShape> by mouthShapesProperty var mouthShapes: List<MouthShape> by mouthShapesProperty
private set private set
val mouthShapesErrorProperty = SimpleStringProperty() val mouthShapesErrorProperty = SimpleStringProperty()
private var mouthShapesError: String? by mouthShapesErrorProperty private var mouthShapesError: String? by mouthShapesErrorProperty
val audioFileModelsProperty = SimpleListProperty<AudioFileModel>( val audioFileModelsProperty = SimpleListProperty<AudioFileModel>(
spineJson.audioEvents spineJson.audioEvents
.map { event -> .map { event ->
var audioFileModel: AudioFileModel? = null var audioFileModel: AudioFileModel? = null
val reportResult: (List<MouthCue>) -> Unit = val reportResult: (List<MouthCue>) -> Unit =
{ result -> saveAnimation(audioFileModel!!.animationName, event.name, result) } { result -> saveAnimation(audioFileModel!!.animationName, event.name, result) }
audioFileModel = AudioFileModel(event, this, executor, reportResult) audioFileModel = AudioFileModel(event, this, executor, reportResult)
return@map audioFileModel return@map audioFileModel
} }
.asObservable() .asObservable()
) )
val audioFileModels: ObservableList<AudioFileModel> by audioFileModelsProperty val audioFileModels: ObservableList<AudioFileModel> by audioFileModelsProperty
val busyProperty = SimpleBooleanProperty().apply { val busyProperty = SimpleBooleanProperty().apply {
bind(object : BooleanBinding() { bind(object : BooleanBinding() {
init { init {
for (audioFileModel in audioFileModels) { for (audioFileModel in audioFileModels) {
super.bind(audioFileModel.busyProperty) super.bind(audioFileModel.busyProperty)
} }
} }
override fun computeValue(): Boolean { override fun computeValue(): Boolean {
return audioFileModels.any { it.busy } return audioFileModels.any { it.busy }
} }
}) })
} }
val busy by busyProperty val busy by busyProperty
val validProperty = SimpleBooleanProperty().apply { val validProperty = SimpleBooleanProperty().apply {
val errorProperties = arrayOf(mouthSlotErrorProperty, mouthShapesErrorProperty) val errorProperties = arrayOf(mouthSlotErrorProperty, mouthShapesErrorProperty)
bind(object : BooleanBinding() { bind(object : BooleanBinding() {
init { init {
super.bind(*errorProperties) super.bind(*errorProperties)
} }
override fun computeValue(): Boolean { override fun computeValue(): Boolean {
return errorProperties.all { it.value == null } return errorProperties.all { it.value == null }
} }
}) })
} }
private fun saveAnimation(animationName: String, audioEventName: String, mouthCues: List<MouthCue>) { private fun saveAnimation(animationName: String, audioEventName: String, mouthCues: List<MouthCue>) {
spineJson.createOrUpdateAnimation(mouthCues, audioEventName, animationName, mouthSlot!!, mouthNaming!!) spineJson.createOrUpdateAnimation(mouthCues, audioEventName, animationName, mouthSlot!!, mouthNaming!!)
spineJson.save() spineJson.save()
} }
init { init {
slots = spineJson.slots.asObservable() slots = spineJson.slots.asObservable()
mouthSlot = spineJson.guessMouthSlot() mouthSlot = spineJson.guessMouthSlot()
} }
private fun getMouthShapesErrorString(): String? { private fun getMouthShapesErrorString(): String? {
val missingBasicShapes = MouthShape.basicShapes val missingBasicShapes = MouthShape.basicShapes
.filter{ !mouthShapes.contains(it) } .filter{ !mouthShapes.contains(it) }
if (missingBasicShapes.isEmpty()) return null if (missingBasicShapes.isEmpty()) return null
val result = StringBuilder() val result = StringBuilder()
val missingShapesString = missingBasicShapes.joinToString() val missingShapesString = missingBasicShapes.joinToString()
result.appendln( result.appendln(
if (missingBasicShapes.count() > 1) if (missingBasicShapes.count() > 1)
"Mouth shapes $missingShapesString are missing." "Mouth shapes $missingShapesString are missing."
else else
"Mouth shape $missingShapesString is missing." "Mouth shape $missingShapesString is missing."
) )
val first = MouthShape.basicShapes.first() val first = MouthShape.basicShapes.first()
val last = MouthShape.basicShapes.last() val last = MouthShape.basicShapes.last()
result.append("At least the basic mouth shapes $first-$last need corresponding image attachments.") result.append("At least the basic mouth shapes $first-$last need corresponding image attachments.")
return result.toString() return result.toString()
} }
} }

View File

@ -16,181 +16,181 @@ import java.util.concurrent.ExecutorService
import java.util.concurrent.Future import java.util.concurrent.Future
class AudioFileModel( class AudioFileModel(
audioEvent: SpineJson.AudioEvent, audioEvent: SpineJson.AudioEvent,
private val parentModel: AnimationFileModel, private val parentModel: AnimationFileModel,
private val executor: ExecutorService, private val executor: ExecutorService,
private val reportResult: (List<MouthCue>) -> Unit private val reportResult: (List<MouthCue>) -> Unit
) { ) {
private val spineJson = parentModel.spineJson private val spineJson = parentModel.spineJson
private val audioFilePath: Path = spineJson.audioDirectoryPath.resolve(audioEvent.relativeAudioFilePath) private val audioFilePath: Path = spineJson.audioDirectoryPath.resolve(audioEvent.relativeAudioFilePath)
val eventNameProperty = SimpleStringProperty(audioEvent.name) val eventNameProperty = SimpleStringProperty(audioEvent.name)
val eventName: String by eventNameProperty val eventName: String by eventNameProperty
val displayFilePathProperty = SimpleStringProperty(audioEvent.relativeAudioFilePath) val displayFilePathProperty = SimpleStringProperty(audioEvent.relativeAudioFilePath)
val animationNameProperty = SimpleStringProperty().apply { val animationNameProperty = SimpleStringProperty().apply {
val mainModel = parentModel.parentModel val mainModel = parentModel.parentModel
bind(object : ObjectBinding<String>() { bind(object : ObjectBinding<String>() {
init { init {
super.bind( super.bind(
mainModel.animationPrefixProperty, mainModel.animationPrefixProperty,
eventNameProperty, eventNameProperty,
mainModel.animationSuffixProperty mainModel.animationSuffixProperty
) )
} }
override fun computeValue(): String { override fun computeValue(): String {
return mainModel.animationPrefix + eventName + mainModel.animationSuffix return mainModel.animationPrefix + eventName + mainModel.animationSuffix
} }
}) })
} }
val animationName: String by animationNameProperty val animationName: String by animationNameProperty
val dialogProperty = SimpleStringProperty(audioEvent.dialog) val dialogProperty = SimpleStringProperty(audioEvent.dialog)
private val dialog: String? by dialogProperty private val dialog: String? by dialogProperty
val animationProgressProperty = SimpleObjectProperty<Double?>(null) val animationProgressProperty = SimpleObjectProperty<Double?>(null)
var animationProgress: Double? by animationProgressProperty var animationProgress: Double? by animationProgressProperty
private set private set
private val animatedProperty = SimpleBooleanProperty().apply { private val animatedProperty = SimpleBooleanProperty().apply {
bind(object : ObjectBinding<Boolean>() { bind(object : ObjectBinding<Boolean>() {
init { init {
super.bind(animationNameProperty, parentModel.spineJson.animationNames) super.bind(animationNameProperty, parentModel.spineJson.animationNames)
} }
override fun computeValue(): Boolean { override fun computeValue(): Boolean {
return parentModel.spineJson.animationNames.contains(animationName) return parentModel.spineJson.animationNames.contains(animationName)
} }
}) })
} }
private var animated by animatedProperty private var animated by animatedProperty
private val futureProperty = SimpleObjectProperty<Future<*>?>() private val futureProperty = SimpleObjectProperty<Future<*>?>()
private var future by futureProperty private var future by futureProperty
val audioFileStateProperty = SimpleObjectProperty<AudioFileState>().apply { val audioFileStateProperty = SimpleObjectProperty<AudioFileState>().apply {
bind(object : ObjectBinding<AudioFileState>() { bind(object : ObjectBinding<AudioFileState>() {
init { init {
super.bind(animatedProperty, futureProperty, animationProgressProperty) super.bind(animatedProperty, futureProperty, animationProgressProperty)
} }
override fun computeValue(): AudioFileState { override fun computeValue(): AudioFileState {
return if (future != null) { return if (future != null) {
if (animationProgress != null) if (animationProgress != null)
if (future!!.isCancelled) if (future!!.isCancelled)
AudioFileState(AudioFileStatus.Canceling) AudioFileState(AudioFileStatus.Canceling)
else else
AudioFileState(AudioFileStatus.Animating, animationProgress) AudioFileState(AudioFileStatus.Animating, animationProgress)
else else
AudioFileState(AudioFileStatus.Pending) AudioFileState(AudioFileStatus.Pending)
} else { } else {
if (animated) if (animated)
AudioFileState(AudioFileStatus.Done) AudioFileState(AudioFileStatus.Done)
else else
AudioFileState(AudioFileStatus.NotAnimated) AudioFileState(AudioFileStatus.NotAnimated)
} }
} }
}) })
} }
val busyProperty = SimpleBooleanProperty().apply { val busyProperty = SimpleBooleanProperty().apply {
bind(object : BooleanBinding() { bind(object : BooleanBinding() {
init { init {
super.bind(futureProperty) super.bind(futureProperty)
} }
override fun computeValue(): Boolean { override fun computeValue(): Boolean {
return future != null return future != null
} }
}) })
} }
val busy by busyProperty val busy by busyProperty
val actionLabelProperty = SimpleStringProperty().apply { val actionLabelProperty = SimpleStringProperty().apply {
bind(object : StringBinding() { bind(object : StringBinding() {
init { init {
super.bind(futureProperty) super.bind(futureProperty)
} }
override fun computeValue(): String { override fun computeValue(): String {
return if (future != null) return if (future != null)
"Cancel" "Cancel"
else else
"Animate" "Animate"
} }
}) })
} }
fun performAction() { fun performAction() {
if (future == null) { if (future == null) {
if (animated) { if (animated) {
Alert(Alert.AlertType.CONFIRMATION).apply { Alert(Alert.AlertType.CONFIRMATION).apply {
headerText = "Animation '$animationName' already exists." headerText = "Animation '$animationName' already exists."
contentText = "Do you want to replace the existing animation?" contentText = "Do you want to replace the existing animation?"
val result = showAndWait() val result = showAndWait()
if (result.get() != ButtonType.OK) { if (result.get() != ButtonType.OK) {
return return
} }
} }
} }
startAnimation() startAnimation()
} else { } else {
cancelAnimation() cancelAnimation()
} }
} }
private fun startAnimation() { private fun startAnimation() {
val wrapperTask = Runnable { val wrapperTask = Runnable {
val recognizer = parentModel.parentModel.recognizer.value val recognizer = parentModel.parentModel.recognizer.value
val extendedMouthShapes = parentModel.mouthShapes.filter { it.isExtended }.toSet() val extendedMouthShapes = parentModel.mouthShapes.filter { it.isExtended }.toSet()
val reportProgress: (Double?) -> Unit = { val reportProgress: (Double?) -> Unit = {
progress -> runAndWait { this@AudioFileModel.animationProgress = progress } progress -> runAndWait { this@AudioFileModel.animationProgress = progress }
} }
val rhubarbTask = RhubarbTask(audioFilePath, recognizer, dialog, extendedMouthShapes, reportProgress) val rhubarbTask = RhubarbTask(audioFilePath, recognizer, dialog, extendedMouthShapes, reportProgress)
try { try {
try { try {
val result = rhubarbTask.call() val result = rhubarbTask.call()
runAndWait { runAndWait {
reportResult(result) reportResult(result)
} }
} finally { } finally {
runAndWait { runAndWait {
animationProgress = null animationProgress = null
future = null future = null
} }
} }
} catch (e: InterruptedException) { } catch (e: InterruptedException) {
} catch (e: Exception) { } catch (e: Exception) {
e.printStackTrace(System.err) e.printStackTrace(System.err)
Platform.runLater { Platform.runLater {
Alert(Alert.AlertType.ERROR).apply { Alert(Alert.AlertType.ERROR).apply {
headerText = "Error performing lip sync for event '$eventName'." headerText = "Error performing lip sync for event '$eventName'."
contentText = if (e is EndUserException) contentText = if (e is EndUserException)
e.message e.message
else else
("An internal error occurred.\n" ("An internal error occurred.\n"
+ "Please report an issue, including the following information.\n" + "Please report an issue, including the following information.\n"
+ getStackTrace(e)) + getStackTrace(e))
show() show()
} }
} }
} }
} }
future = executor.submit(wrapperTask) future = executor.submit(wrapperTask)
} }
private fun cancelAnimation() { private fun cancelAnimation() {
future?.cancel(true) future?.cancel(true)
} }
} }
enum class AudioFileStatus { enum class AudioFileStatus {
NotAnimated, NotAnimated,
Pending, Pending,
Animating, Animating,
Canceling, Canceling,
Done Done
} }
data class AudioFileState(val status: AudioFileStatus, val progress: Double? = null) data class AudioFileState(val status: AudioFileStatus, val progress: Double? = null)

View File

@ -14,67 +14,67 @@ import tornadofx.rectangle
import tornadofx.removeFromParent import tornadofx.removeFromParent
fun renderErrorIndicator(): Node { fun renderErrorIndicator(): Node {
return Group().apply { return Group().apply {
isManaged = false isManaged = false
circle { circle {
radius = 7.0 radius = 7.0
fill = Color.ORANGERED fill = Color.ORANGERED
} }
rectangle { rectangle {
x = -1.0 x = -1.0
y = -5.0 y = -5.0
width = 2.0 width = 2.0
height = 7.0 height = 7.0
fill = Color.WHITE fill = Color.WHITE
} }
rectangle { rectangle {
x = -1.0 x = -1.0
y = 3.0 y = 3.0
width = 2.0 width = 2.0
height = 2.0 height = 2.0
fill = Color.WHITE fill = Color.WHITE
} }
} }
} }
fun Parent.errorProperty() : StringProperty { fun Parent.errorProperty() : StringProperty {
return properties.getOrPut("rhubarb.errorProperty", { return properties.getOrPut("rhubarb.errorProperty", {
val errorIndicator: Node = renderErrorIndicator() val errorIndicator: Node = renderErrorIndicator()
val tooltip = Tooltip() val tooltip = Tooltip()
val property = SimpleStringProperty() val property = SimpleStringProperty()
fun updateTooltipVisibility() { fun updateTooltipVisibility() {
if (tooltip.text.isNotEmpty() && isFocused) { if (tooltip.text.isNotEmpty() && isFocused) {
val bounds = localToScreen(boundsInLocal) val bounds = localToScreen(boundsInLocal)
tooltip.show(scene.window, bounds.minX + 5, bounds.maxY + 2) tooltip.show(scene.window, bounds.minX + 5, bounds.maxY + 2)
} else { } else {
tooltip.hide() tooltip.hide()
} }
} }
focusedProperty().addListener({ focusedProperty().addListener({
_: ObservableValue<out Boolean>, _: Boolean, _: Boolean -> _: ObservableValue<out Boolean>, _: Boolean, _: Boolean ->
updateTooltipVisibility() updateTooltipVisibility()
}) })
property.addListener({ property.addListener({
_: ObservableValue<out String?>, _: String?, newValue: String? -> _: ObservableValue<out String?>, _: String?, newValue: String? ->
if (newValue != null) { if (newValue != null) {
this.addChildIfPossible(errorIndicator) this.addChildIfPossible(errorIndicator)
tooltip.text = newValue tooltip.text = newValue
Tooltip.install(this, tooltip) Tooltip.install(this, tooltip)
updateTooltipVisibility() updateTooltipVisibility()
} else { } else {
errorIndicator.removeFromParent() errorIndicator.removeFromParent()
tooltip.text = "" tooltip.text = ""
tooltip.hide() tooltip.hide()
Tooltip.uninstall(this, tooltip) Tooltip.uninstall(this, tooltip)
updateTooltipVisibility() updateTooltipVisibility()
} }
}) })
return@getOrPut property return@getOrPut property
}) as StringProperty }) as StringProperty
} }

View File

@ -8,18 +8,18 @@ import java.lang.reflect.Method
import javax.swing.ImageIcon import javax.swing.ImageIcon
class MainApp : App(MainView::class) { class MainApp : App(MainView::class) {
override fun start(stage: Stage) { override fun start(stage: Stage) {
super.start(stage) super.start(stage)
setIcon() setIcon()
} }
private fun setIcon() { private fun setIcon() {
// Set icon for windows // Set icon for windows
for (iconSize in listOf(16, 32, 48, 256)) { for (iconSize in listOf(16, 32, 48, 256)) {
addStageIcon(Image(this.javaClass.getResourceAsStream("/icon-$iconSize.png"))) addStageIcon(Image(this.javaClass.getResourceAsStream("/icon-$iconSize.png")))
} }
// OS X requires the dock icon to be changed separately. // OS X requires the dock icon to be changed separately.
// Not all JDKs contain the class com.apple.eawt.Application, so we have to use reflection. // Not all JDKs contain the class com.apple.eawt.Application, so we have to use reflection.
val classLoader = this.javaClass.classLoader val classLoader = this.javaClass.classLoader
try { try {
@ -37,6 +37,6 @@ class MainApp : App(MainView::class) {
} catch (e: Exception) { } catch (e: Exception) {
// Works only on OS X // Works only on OS X
} }
} }
} }

View File

@ -13,51 +13,51 @@ import java.nio.file.Paths
import java.util.concurrent.ExecutorService import java.util.concurrent.ExecutorService
class MainModel(private val executor: ExecutorService) { class MainModel(private val executor: ExecutorService) {
val filePathStringProperty = SimpleStringProperty(getDefaultPathString()).alsoListen { value -> val filePathStringProperty = SimpleStringProperty(getDefaultPathString()).alsoListen { value ->
filePathError = getExceptionMessage { filePathError = getExceptionMessage {
animationFileModel = null animationFileModel = null
if (value.isNullOrBlank()) { if (value.isNullOrBlank()) {
throw EndUserException("No input file specified.") throw EndUserException("No input file specified.")
} }
val path = try { val path = try {
val trimmed = value.removeSurrounding("\"") val trimmed = value.removeSurrounding("\"")
Paths.get(trimmed) Paths.get(trimmed)
} catch (e: InvalidPathException) { } catch (e: InvalidPathException) {
throw EndUserException("Not a valid file path.") throw EndUserException("Not a valid file path.")
} }
if (!Files.exists(path)) { if (!Files.exists(path)) {
throw EndUserException("File does not exist.") throw EndUserException("File does not exist.")
} }
animationFileModel = AnimationFileModel(this, path, executor) animationFileModel = AnimationFileModel(this, path, executor)
} }
} }
val filePathErrorProperty = SimpleStringProperty() val filePathErrorProperty = SimpleStringProperty()
private var filePathError: String? by filePathErrorProperty private var filePathError: String? by filePathErrorProperty
val animationFileModelProperty = SimpleObjectProperty<AnimationFileModel?>() val animationFileModelProperty = SimpleObjectProperty<AnimationFileModel?>()
var animationFileModel by animationFileModelProperty var animationFileModel by animationFileModelProperty
private set private set
val recognizersProperty = SimpleObjectProperty<ObservableList<Recognizer>>(FXCollections.observableArrayList( val recognizersProperty = SimpleObjectProperty<ObservableList<Recognizer>>(FXCollections.observableArrayList(
Recognizer("pocketSphinx", "PocketSphinx (use for English recordings)"), Recognizer("pocketSphinx", "PocketSphinx (use for English recordings)"),
Recognizer("phonetic", "Phonetic (use for non-English recordings)") Recognizer("phonetic", "Phonetic (use for non-English recordings)")
)) ))
private var recognizers: ObservableList<Recognizer> by recognizersProperty private var recognizers: ObservableList<Recognizer> by recognizersProperty
val recognizerProperty = SimpleObjectProperty<Recognizer>(recognizers[0]) val recognizerProperty = SimpleObjectProperty<Recognizer>(recognizers[0])
var recognizer: Recognizer by recognizerProperty var recognizer: Recognizer by recognizerProperty
val animationPrefixProperty = SimpleStringProperty("say_") val animationPrefixProperty = SimpleStringProperty("say_")
var animationPrefix: String by animationPrefixProperty var animationPrefix: String by animationPrefixProperty
val animationSuffixProperty = SimpleStringProperty("") val animationSuffixProperty = SimpleStringProperty("")
var animationSuffix: String by animationSuffixProperty var animationSuffix: String by animationSuffixProperty
private fun getDefaultPathString() = FX.application.parameters.raw.firstOrNull() private fun getDefaultPathString() = FX.application.parameters.raw.firstOrNull()
} }
class Recognizer(val value: String, val description: String) class Recognizer(val value: String, val description: String)

View File

@ -23,235 +23,235 @@ import java.io.File
import java.util.concurrent.Executors import java.util.concurrent.Executors
class MainView : View() { class MainView : View() {
private val executor = Executors.newSingleThreadExecutor() private val executor = Executors.newSingleThreadExecutor()
private val mainModel = MainModel(executor) private val mainModel = MainModel(executor)
init { init {
title = "Rhubarb Lip Sync for Spine" title = "Rhubarb Lip Sync for Spine"
} }
override val root = form { override val root = form {
var filePathTextField: TextField? = null var filePathTextField: TextField? = null
var filePathButton: Button? = null var filePathButton: Button? = null
val fileModelProperty = mainModel.animationFileModelProperty val fileModelProperty = mainModel.animationFileModelProperty
minWidth = 800.0 minWidth = 800.0
prefWidth = 1000.0 prefWidth = 1000.0
fieldset("Settings") { fieldset("Settings") {
disableProperty().bind(fileModelProperty.select { it!!.busyProperty }) disableProperty().bind(fileModelProperty.select { it!!.busyProperty })
field("Spine JSON file") { field("Spine JSON file") {
filePathTextField = textfield { filePathTextField = textfield {
textProperty().bindBidirectional(mainModel.filePathStringProperty) textProperty().bindBidirectional(mainModel.filePathStringProperty)
errorProperty().bind(mainModel.filePathErrorProperty) errorProperty().bind(mainModel.filePathErrorProperty)
} }
filePathButton = button("...") filePathButton = button("...")
} }
field("Mouth slot") { field("Mouth slot") {
combobox<String> { combobox<String> {
itemsProperty().bind(fileModelProperty.select { it!!.slotsProperty }) itemsProperty().bind(fileModelProperty.select { it!!.slotsProperty })
valueProperty().bindBidirectional(fileModelProperty.select { it!!.mouthSlotProperty }) valueProperty().bindBidirectional(fileModelProperty.select { it!!.mouthSlotProperty })
errorProperty().bind(fileModelProperty.select { it!!.mouthSlotErrorProperty }) errorProperty().bind(fileModelProperty.select { it!!.mouthSlotErrorProperty })
} }
} }
field("Mouth naming") { field("Mouth naming") {
label { label {
textProperty().bind( textProperty().bind(
fileModelProperty fileModelProperty
.select { it!!.mouthNamingProperty } .select { it!!.mouthNamingProperty }
.select { SimpleStringProperty(it.displayString) } .select { SimpleStringProperty(it.displayString) }
) )
} }
} }
field("Mouth shapes") { field("Mouth shapes") {
hbox { hbox {
errorProperty().bind(fileModelProperty.select { it!!.mouthShapesErrorProperty }) errorProperty().bind(fileModelProperty.select { it!!.mouthShapesErrorProperty })
gridpane { gridpane {
hgap = 10.0 hgap = 10.0
vgap = 3.0 vgap = 3.0
row { row {
label("Basic:") label("Basic:")
for (shape in MouthShape.basicShapes) { for (shape in MouthShape.basicShapes) {
renderShapeCheckbox(shape, fileModelProperty, this) renderShapeCheckbox(shape, fileModelProperty, this)
} }
} }
row { row {
label("Extended:") label("Extended:")
for (shape in MouthShape.extendedShapes) { for (shape in MouthShape.extendedShapes) {
renderShapeCheckbox(shape, fileModelProperty, this) renderShapeCheckbox(shape, fileModelProperty, this)
} }
} }
} }
} }
} }
field("Dialog recognizer") { field("Dialog recognizer") {
combobox<Recognizer> { combobox<Recognizer> {
itemsProperty().bind(mainModel.recognizersProperty) itemsProperty().bind(mainModel.recognizersProperty)
this.converter = object : StringConverter<Recognizer>() { this.converter = object : StringConverter<Recognizer>() {
override fun toString(recognizer: Recognizer?): String { override fun toString(recognizer: Recognizer?): String {
return recognizer?.description ?: "" return recognizer?.description ?: ""
} }
override fun fromString(string: String?): Recognizer { override fun fromString(string: String?): Recognizer {
throw NotImplementedError() throw NotImplementedError()
} }
} }
valueProperty().bindBidirectional(mainModel.recognizerProperty) valueProperty().bindBidirectional(mainModel.recognizerProperty)
} }
} }
field("Animation naming") { field("Animation naming") {
textfield { textfield {
maxWidth = 100.0 maxWidth = 100.0
textProperty().bindBidirectional(mainModel.animationPrefixProperty) textProperty().bindBidirectional(mainModel.animationPrefixProperty)
} }
label("<audio event name>") label("<audio event name>")
textfield { textfield {
maxWidth = 100.0 maxWidth = 100.0
textProperty().bindBidirectional(mainModel.animationSuffixProperty) textProperty().bindBidirectional(mainModel.animationSuffixProperty)
} }
} }
} }
fieldset("Audio events") { fieldset("Audio events") {
tableview<AudioFileModel> { tableview<AudioFileModel> {
placeholder = Label("There are no events with associated audio files.") placeholder = Label("There are no events with associated audio files.")
columnResizePolicy = SmartResize.POLICY columnResizePolicy = SmartResize.POLICY
column("Event", AudioFileModel::eventNameProperty) column("Event", AudioFileModel::eventNameProperty)
.weightedWidth(1.0) .weightedWidth(1.0)
column("Animation name", AudioFileModel::animationNameProperty) column("Animation name", AudioFileModel::animationNameProperty)
.weightedWidth(1.0) .weightedWidth(1.0)
column("Audio file", AudioFileModel::displayFilePathProperty) column("Audio file", AudioFileModel::displayFilePathProperty)
.weightedWidth(1.0) .weightedWidth(1.0)
column("Dialog", AudioFileModel::dialogProperty).apply { column("Dialog", AudioFileModel::dialogProperty).apply {
weightedWidth(3.0) weightedWidth(3.0)
// Make dialog column wrap // Make dialog column wrap
setCellFactory { tableColumn -> setCellFactory { tableColumn ->
return@setCellFactory TableCell<AudioFileModel, String>().also { cell -> return@setCellFactory TableCell<AudioFileModel, String>().also { cell ->
cell.graphic = Text().apply { cell.graphic = Text().apply {
textProperty().bind(cell.itemProperty()) textProperty().bind(cell.itemProperty())
fillProperty().bind(cell.textFillProperty()) fillProperty().bind(cell.textFillProperty())
val widthProperty = tableColumn.widthProperty() val widthProperty = tableColumn.widthProperty()
.minus(cell.paddingLeftProperty) .minus(cell.paddingLeftProperty)
.minus(cell.paddingRightProperty) .minus(cell.paddingRightProperty)
wrappingWidthProperty().bind(widthProperty) wrappingWidthProperty().bind(widthProperty)
} }
cell.prefHeight = Control.USE_COMPUTED_SIZE cell.prefHeight = Control.USE_COMPUTED_SIZE
} }
} }
} }
column("Status", AudioFileModel::audioFileStateProperty).apply { column("Status", AudioFileModel::audioFileStateProperty).apply {
weightedWidth(1.0) weightedWidth(1.0)
setCellFactory { setCellFactory {
return@setCellFactory object : TableCell<AudioFileModel, AudioFileState>() { return@setCellFactory object : TableCell<AudioFileModel, AudioFileState>() {
override fun updateItem(state: AudioFileState?, empty: Boolean) { override fun updateItem(state: AudioFileState?, empty: Boolean) {
super.updateItem(state, empty) super.updateItem(state, empty)
graphic = if (state != null) { graphic = if (state != null) {
when (state.status) { when (state.status) {
AudioFileStatus.NotAnimated -> Text("Not animated").apply { AudioFileStatus.NotAnimated -> Text("Not animated").apply {
fill = Color.GRAY fill = Color.GRAY
} }
AudioFileStatus.Pending, AudioFileStatus.Pending,
AudioFileStatus.Animating -> HBox().apply { AudioFileStatus.Animating -> HBox().apply {
val progress: Double? = state.progress val progress: Double? = state.progress
val indeterminate = -1.0 val indeterminate = -1.0
val bar = progressbar(progress ?: indeterminate) { val bar = progressbar(progress ?: indeterminate) {
maxWidth = Double.MAX_VALUE maxWidth = Double.MAX_VALUE
} }
HBox.setHgrow(bar, Priority.ALWAYS) HBox.setHgrow(bar, Priority.ALWAYS)
hbox { hbox {
minWidth = 30.0 minWidth = 30.0
if (progress != null) { if (progress != null) {
text("${(progress * 100).toInt()}%") { text("${(progress * 100).toInt()}%") {
alignment = Pos.BASELINE_RIGHT alignment = Pos.BASELINE_RIGHT
} }
} }
} }
} }
AudioFileStatus.Canceling -> Text("Canceling") AudioFileStatus.Canceling -> Text("Canceling")
AudioFileStatus.Done -> Text("Done").apply { AudioFileStatus.Done -> Text("Done").apply {
font = Font.font(font.family, FontWeight.BOLD, font.size) font = Font.font(font.family, FontWeight.BOLD, font.size)
} }
} }
} else null } else null
} }
} }
} }
} }
column("", AudioFileModel::actionLabelProperty).apply { column("", AudioFileModel::actionLabelProperty).apply {
weightedWidth(1.0) weightedWidth(1.0)
// Show button // Show button
setCellFactory { setCellFactory {
return@setCellFactory object : TableCell<AudioFileModel, String>() { return@setCellFactory object : TableCell<AudioFileModel, String>() {
override fun updateItem(item: String?, empty: Boolean) { override fun updateItem(item: String?, empty: Boolean) {
super.updateItem(item, empty) super.updateItem(item, empty)
graphic = if (!empty) graphic = if (!empty)
Button(item).apply { Button(item).apply {
this.maxWidth = Double.MAX_VALUE this.maxWidth = Double.MAX_VALUE
setOnAction { setOnAction {
val audioFileModel = this@tableview.items[index] val audioFileModel = this@tableview.items[index]
audioFileModel.performAction() audioFileModel.performAction()
} }
val invalidProperty: Property<Boolean> = fileModelProperty val invalidProperty: Property<Boolean> = fileModelProperty
.select { it!!.validProperty } .select { it!!.validProperty }
.select { SimpleBooleanProperty(!it) } .select { SimpleBooleanProperty(!it) }
disableProperty().bind(invalidProperty) disableProperty().bind(invalidProperty)
} }
else else
null null
} }
} }
} }
} }
itemsProperty().bind(fileModelProperty.select { it!!.audioFileModelsProperty }) itemsProperty().bind(fileModelProperty.select { it!!.audioFileModelsProperty })
} }
} }
onDragOver = EventHandler<DragEvent> { event -> onDragOver = EventHandler<DragEvent> { event ->
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) { if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
event.acceptTransferModes(TransferMode.COPY) event.acceptTransferModes(TransferMode.COPY)
event.consume() event.consume()
} }
} }
onDragDropped = EventHandler<DragEvent> { event -> onDragDropped = EventHandler<DragEvent> { event ->
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) { if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
filePathTextField!!.text = event.dragboard.files.firstOrNull()?.path filePathTextField!!.text = event.dragboard.files.firstOrNull()?.path
event.isDropCompleted = true event.isDropCompleted = true
event.consume() event.consume()
} }
} }
whenUndocked { whenUndocked {
executor.shutdownNow() executor.shutdownNow()
} }
filePathButton!!.onAction = EventHandler<ActionEvent> { filePathButton!!.onAction = EventHandler<ActionEvent> {
val fileChooser = FileChooser().apply { val fileChooser = FileChooser().apply {
title = "Open Spine JSON file" title = "Open Spine JSON file"
extensionFilters.addAll( extensionFilters.addAll(
FileChooser.ExtensionFilter("Spine JSON file (*.json)", "*.json"), FileChooser.ExtensionFilter("Spine JSON file (*.json)", "*.json"),
FileChooser.ExtensionFilter("All files (*.*)", "*.*") FileChooser.ExtensionFilter("All files (*.*)", "*.*")
) )
val lastDirectory = filePathTextField!!.text?.let { File(it).parentFile } val lastDirectory = filePathTextField!!.text?.let { File(it).parentFile }
if (lastDirectory != null && lastDirectory.isDirectory) { if (lastDirectory != null && lastDirectory.isDirectory) {
initialDirectory = lastDirectory initialDirectory = lastDirectory
} }
} }
val file = fileChooser.showOpenDialog(this@MainView.primaryStage) val file = fileChooser.showOpenDialog(this@MainView.primaryStage)
if (file != null) { if (file != null) {
filePathTextField!!.text = file.path filePathTextField!!.text = file.path
} }
} }
} }
private fun renderShapeCheckbox(shape: MouthShape, fileModelProperty: SimpleObjectProperty<AnimationFileModel?>, parent: EventTarget) { private fun renderShapeCheckbox(shape: MouthShape, fileModelProperty: SimpleObjectProperty<AnimationFileModel?>, parent: EventTarget) {
parent.label { parent.label {
textProperty().bind( textProperty().bind(
fileModelProperty fileModelProperty
.select { it!!.mouthShapesProperty } .select { it!!.mouthShapesProperty }
.select { mouthShapes -> .select { mouthShapes ->
val hairSpace = "\u200A" val hairSpace = "\u200A"
val result = shape.toString() + hairSpace + if (mouthShapes.contains(shape)) "" else "" val result = shape.toString() + hairSpace + if (mouthShapes.contains(shape)) "" else ""
return@select SimpleStringProperty(result) return@select SimpleStringProperty(result)
} }
) )
} }
} }
} }

View File

@ -4,52 +4,52 @@ import java.util.*
class MouthNaming(private val prefix: String, private val suffix: String, private val mouthShapeCasing: MouthShapeCasing) { class MouthNaming(private val prefix: String, private val suffix: String, private val mouthShapeCasing: MouthShapeCasing) {
companion object { companion object {
fun guess(mouthNames: List<String>): MouthNaming { fun guess(mouthNames: List<String>): MouthNaming {
if (mouthNames.isEmpty()) { if (mouthNames.isEmpty()) {
return MouthNaming("", "", guessMouthShapeCasing("")) return MouthNaming("", "", guessMouthShapeCasing(""))
} }
val commonPrefix = mouthNames.commonPrefix val commonPrefix = mouthNames.commonPrefix
val commonSuffix = mouthNames.commonSuffix val commonSuffix = mouthNames.commonSuffix
val firstMouthName = mouthNames.first() val firstMouthName = mouthNames.first()
if (commonPrefix.length + commonSuffix.length >= firstMouthName.length) { if (commonPrefix.length + commonSuffix.length >= firstMouthName.length) {
return MouthNaming(commonPrefix, "", guessMouthShapeCasing("")) return MouthNaming(commonPrefix, "", guessMouthShapeCasing(""))
} }
val shapeName = firstMouthName.substring( val shapeName = firstMouthName.substring(
commonPrefix.length, commonPrefix.length,
firstMouthName.length - commonSuffix.length) firstMouthName.length - commonSuffix.length)
val mouthShapeCasing = guessMouthShapeCasing(shapeName) val mouthShapeCasing = guessMouthShapeCasing(shapeName)
return MouthNaming(commonPrefix, commonSuffix, mouthShapeCasing) return MouthNaming(commonPrefix, commonSuffix, mouthShapeCasing)
} }
private fun guessMouthShapeCasing(shapeName: String): MouthShapeCasing { private fun guessMouthShapeCasing(shapeName: String): MouthShapeCasing {
return if (shapeName.isBlank() || shapeName[0].isLowerCase()) return if (shapeName.isBlank() || shapeName[0].isLowerCase())
MouthShapeCasing.Lower MouthShapeCasing.Lower
else else
MouthShapeCasing.Upper MouthShapeCasing.Upper
} }
} }
fun getName(mouthShape: MouthShape): String { fun getName(mouthShape: MouthShape): String {
val name = if (mouthShapeCasing == MouthShapeCasing.Upper) val name = if (mouthShapeCasing == MouthShapeCasing.Upper)
mouthShape.toString() mouthShape.toString()
else else
mouthShape.toString().toLowerCase(Locale.ROOT) mouthShape.toString().toLowerCase(Locale.ROOT)
return "$prefix$name$suffix" return "$prefix$name$suffix"
} }
val displayString: String get() { val displayString: String get() {
val casing = if (mouthShapeCasing == MouthShapeCasing.Upper) val casing = if (mouthShapeCasing == MouthShapeCasing.Upper)
"<UPPER-CASE SHAPE NAME>" "<UPPER-CASE SHAPE NAME>"
else else
"<lower-case shape name>" "<lower-case shape name>"
return "\"$prefix$casing$suffix\"" return "\"$prefix$casing$suffix\""
} }
} }
enum class MouthShapeCasing { enum class MouthShapeCasing {
Upper, Upper,
Lower Lower
} }

View File

@ -1,19 +1,19 @@
package com.rhubarb_lip_sync.rhubarb_for_spine package com.rhubarb_lip_sync.rhubarb_for_spine
enum class MouthShape { enum class MouthShape {
A, B, C, D, E, F, G, H, X; A, B, C, D, E, F, G, H, X;
val isBasic: Boolean val isBasic: Boolean
get() = this.ordinal < basicShapeCount get() = this.ordinal < basicShapeCount
val isExtended: Boolean val isExtended: Boolean
get() = !this.isBasic get() = !this.isBasic
companion object { companion object {
const val basicShapeCount = 6 const val basicShapeCount = 6
val basicShapes = MouthShape.values().take(basicShapeCount) val basicShapes = MouthShape.values().take(basicShapeCount)
val extendedShapes = MouthShape.values().drop(basicShapeCount) val extendedShapes = MouthShape.values().drop(basicShapeCount)
} }
} }

View File

@ -10,157 +10,157 @@ import java.nio.file.Path
import java.util.concurrent.Callable import java.util.concurrent.Callable
class RhubarbTask( class RhubarbTask(
val audioFilePath: Path, val audioFilePath: Path,
val recognizer: String, val recognizer: String,
val dialog: String?, val dialog: String?,
val extendedMouthShapes: Set<MouthShape>, val extendedMouthShapes: Set<MouthShape>,
val reportProgress: (Double?) -> Unit val reportProgress: (Double?) -> Unit
) : Callable<List<MouthCue>> { ) : Callable<List<MouthCue>> {
override fun call(): List<MouthCue> { override fun call(): List<MouthCue> {
if (Thread.currentThread().isInterrupted) { if (Thread.currentThread().isInterrupted) {
throw InterruptedException() throw InterruptedException()
} }
if (!Files.exists(audioFilePath)) { if (!Files.exists(audioFilePath)) {
throw EndUserException("File '$audioFilePath' does not exist.") throw EndUserException("File '$audioFilePath' does not exist.")
} }
val dialogFile = if (dialog != null) TemporaryTextFile(dialog) else null val dialogFile = if (dialog != null) TemporaryTextFile(dialog) else null
val outputFile = TemporaryTextFile() val outputFile = TemporaryTextFile()
dialogFile.use { outputFile.use { dialogFile.use { outputFile.use {
val processBuilder = ProcessBuilder(createProcessBuilderArgs(dialogFile?.filePath)).apply { val processBuilder = ProcessBuilder(createProcessBuilderArgs(dialogFile?.filePath)).apply {
// See http://java-monitor.com/forum/showthread.php?t=4067 // See http://java-monitor.com/forum/showthread.php?t=4067
redirectOutput(outputFile.filePath.toFile()) redirectOutput(outputFile.filePath.toFile())
} }
val process: Process = processBuilder.start() val process: Process = processBuilder.start()
val stderr = BufferedReader(InputStreamReader(process.errorStream, StandardCharsets.UTF_8)) val stderr = BufferedReader(InputStreamReader(process.errorStream, StandardCharsets.UTF_8))
try { try {
while (true) { while (true) {
val line = stderr.interruptibleReadLine() val line = stderr.interruptibleReadLine()
val message = parseJsonObject(line) val message = parseJsonObject(line)
when (message.string("type")!!) { when (message.string("type")!!) {
"progress" -> { "progress" -> {
reportProgress(message.double("value")!!) reportProgress(message.double("value")!!)
} }
"success" -> { "success" -> {
reportProgress(1.0) reportProgress(1.0)
val resultString = String(Files.readAllBytes(outputFile.filePath), StandardCharsets.UTF_8) val resultString = String(Files.readAllBytes(outputFile.filePath), StandardCharsets.UTF_8)
return parseRhubarbResult(resultString) return parseRhubarbResult(resultString)
} }
"failure" -> { "failure" -> {
throw EndUserException(message.string("reason") ?: "Rhubarb failed without reason.") throw EndUserException(message.string("reason") ?: "Rhubarb failed without reason.")
} }
} }
} }
} catch (e: InterruptedException) { } catch (e: InterruptedException) {
process.destroyForcibly() process.destroyForcibly()
throw e throw e
} catch (e: EOFException) { } catch (e: EOFException) {
throw EndUserException("Rhubarb terminated unexpectedly.") throw EndUserException("Rhubarb terminated unexpectedly.")
} finally { } finally {
process.waitFor() process.waitFor()
} }
}} }}
throw EndUserException("Audio file processing terminated in an unexpected way.") throw EndUserException("Audio file processing terminated in an unexpected way.")
} }
private fun parseRhubarbResult(jsonString: String): List<MouthCue> { private fun parseRhubarbResult(jsonString: String): List<MouthCue> {
val json = parseJsonObject(jsonString) val json = parseJsonObject(jsonString)
val mouthCues = json.array<JsonObject>("mouthCues")!! val mouthCues = json.array<JsonObject>("mouthCues")!!
return mouthCues.map { mouthCue -> return mouthCues.map { mouthCue ->
val time = mouthCue.double("start")!! val time = mouthCue.double("start")!!
val mouthShape = MouthShape.valueOf(mouthCue.string("value")!!) val mouthShape = MouthShape.valueOf(mouthCue.string("value")!!)
return@map MouthCue(time, mouthShape) return@map MouthCue(time, mouthShape)
} }
} }
private val jsonParser = JsonParser.default() private val jsonParser = JsonParser.default()
private fun parseJsonObject(jsonString: String): JsonObject { private fun parseJsonObject(jsonString: String): JsonObject {
return jsonParser.parse(StringReader(jsonString)) as JsonObject return jsonParser.parse(StringReader(jsonString)) as JsonObject
} }
private fun createProcessBuilderArgs(dialogFilePath: Path?): List<String> { private fun createProcessBuilderArgs(dialogFilePath: Path?): List<String> {
val extendedMouthShapesString = val extendedMouthShapesString =
if (extendedMouthShapes.any()) extendedMouthShapes.joinToString(separator = "") if (extendedMouthShapes.any()) extendedMouthShapes.joinToString(separator = "")
else "\"\"" else "\"\""
return mutableListOf( return mutableListOf(
rhubarbBinFilePath.toString(), rhubarbBinFilePath.toString(),
"--machineReadable", "--machineReadable",
"--recognizer", recognizer, "--recognizer", recognizer,
"--exportFormat", "json", "--exportFormat", "json",
"--extendedShapes", extendedMouthShapesString "--extendedShapes", extendedMouthShapesString
).apply { ).apply {
if (dialogFilePath != null) { if (dialogFilePath != null) {
addAll(listOf( addAll(listOf(
"--dialogFile", dialogFilePath.toString() "--dialogFile", dialogFilePath.toString()
)) ))
} }
}.apply { }.apply {
add(audioFilePath.toString()) add(audioFilePath.toString())
} }
} }
private val guiBinDirectory: Path by lazy { private val guiBinDirectory: Path by lazy {
val path = urlToPath(getLocation(RhubarbTask::class.java)) val path = urlToPath(getLocation(RhubarbTask::class.java))
return@lazy if (Files.isDirectory(path)) path.parent else path return@lazy if (Files.isDirectory(path)) path.parent else path
} }
private val rhubarbBinFilePath: Path by lazy { private val rhubarbBinFilePath: Path by lazy {
val rhubarbBinName = if (IS_OS_WINDOWS) "rhubarb.exe" else "rhubarb" val rhubarbBinName = if (IS_OS_WINDOWS) "rhubarb.exe" else "rhubarb"
var currentDirectory: Path? = guiBinDirectory var currentDirectory: Path? = guiBinDirectory
while (currentDirectory != null) { while (currentDirectory != null) {
val candidate: Path = currentDirectory.resolve(rhubarbBinName) val candidate: Path = currentDirectory.resolve(rhubarbBinName)
if (Files.exists(candidate)) { if (Files.exists(candidate)) {
return@lazy candidate return@lazy candidate
} }
currentDirectory = currentDirectory.parent currentDirectory = currentDirectory.parent
} }
throw EndUserException("Could not find Rhubarb Lip Sync executable '$rhubarbBinName'." throw EndUserException("Could not find Rhubarb Lip Sync executable '$rhubarbBinName'."
+ " Expected to find it in '$guiBinDirectory' or any directory above.") + " Expected to find it in '$guiBinDirectory' or any directory above.")
} }
private class TemporaryTextFile(text: String = "") : AutoCloseable { private class TemporaryTextFile(text: String = "") : AutoCloseable {
val filePath: Path = Files.createTempFile(null, null).also { val filePath: Path = Files.createTempFile(null, null).also {
Files.write(it, text.toByteArray(StandardCharsets.UTF_8)) Files.write(it, text.toByteArray(StandardCharsets.UTF_8))
} }
override fun close() { override fun close() {
Files.delete(filePath) Files.delete(filePath)
} }
} }
// Same as readLine, but can be interrupted. // Same as readLine, but can be interrupted.
// Note that this function handles linebreak characters differently from readLine. // Note that this function handles linebreak characters differently from readLine.
// It only consumes the first linebreak character before returning and swallows any leading // It only consumes the first linebreak character before returning and swallows any leading
// linebreak characters. // linebreak characters.
// This behavior is much easier to implement and doesn't make any difference for our purposes. // This behavior is much easier to implement and doesn't make any difference for our purposes.
private fun BufferedReader.interruptibleReadLine(): String { private fun BufferedReader.interruptibleReadLine(): String {
val result = StringBuilder() val result = StringBuilder()
while (true) { while (true) {
val char = interruptibleReadChar() val char = interruptibleReadChar()
if (char == '\r' || char == '\n') { if (char == '\r' || char == '\n') {
if (result.isNotEmpty()) return result.toString() if (result.isNotEmpty()) return result.toString()
} else { } else {
result.append(char) result.append(char)
} }
} }
} }
private fun BufferedReader.interruptibleReadChar(): Char { private fun BufferedReader.interruptibleReadChar(): Char {
while (true) { while (true) {
if (Thread.currentThread().isInterrupted) { if (Thread.currentThread().isInterrupted) {
throw InterruptedException() throw InterruptedException()
} }
if (ready()) { if (ready()) {
val result: Int = read() val result: Int = read()
if (result == -1) { if (result == -1) {
throw EOFException() throw EOFException()
} }
return result.toChar() return result.toChar()
} }
Thread.yield() Thread.yield()
} }
} }
} }

View File

@ -7,157 +7,157 @@ import java.nio.file.Files
import java.nio.file.Path import java.nio.file.Path
class SpineJson(private val filePath: Path) { class SpineJson(private val filePath: Path) {
private val fileDirectoryPath: Path = filePath.parent private val fileDirectoryPath: Path = filePath.parent
private val json: JsonObject private val json: JsonObject
private val skeleton: JsonObject private val skeleton: JsonObject
init { init {
if (!Files.exists(filePath)) { if (!Files.exists(filePath)) {
throw EndUserException("File '$filePath' does not exist.") throw EndUserException("File '$filePath' does not exist.")
} }
try { try {
json = Parser.default().parse(filePath.toString()) as JsonObject json = Parser.default().parse(filePath.toString()) as JsonObject
} catch (e: Exception) { } catch (e: Exception) {
throw EndUserException("Wrong file format. This is not a valid JSON file.") throw EndUserException("Wrong file format. This is not a valid JSON file.")
} }
skeleton = json.obj("skeleton") ?: throw EndUserException("JSON file is corrupted.") skeleton = json.obj("skeleton") ?: throw EndUserException("JSON file is corrupted.")
validateProperties() validateProperties()
} }
private fun validateProperties() { private fun validateProperties() {
imagesDirectoryPath imagesDirectoryPath
audioDirectoryPath audioDirectoryPath
} }
private val imagesDirectoryPath: Path get() { private val imagesDirectoryPath: Path get() {
val relativeImagesDirectory = skeleton.string("images") val relativeImagesDirectory = skeleton.string("images")
?: throw EndUserException("JSON file is incomplete: Images path is missing." ?: throw EndUserException("JSON file is incomplete: Images path is missing."
+ " Make sure to check 'Nonessential data' when exporting.") + " Make sure to check 'Nonessential data' when exporting.")
val imagesDirectoryPath = fileDirectoryPath.resolve(relativeImagesDirectory).normalize() val imagesDirectoryPath = fileDirectoryPath.resolve(relativeImagesDirectory).normalize()
if (!Files.exists(imagesDirectoryPath)) { if (!Files.exists(imagesDirectoryPath)) {
throw EndUserException("Could not find images directory relative to the JSON file." throw EndUserException("Could not find images directory relative to the JSON file."
+ " Make sure the JSON file is in the same directory as the original Spine file.") + " Make sure the JSON file is in the same directory as the original Spine file.")
} }
return imagesDirectoryPath return imagesDirectoryPath
} }
val audioDirectoryPath: Path get() { val audioDirectoryPath: Path get() {
val relativeAudioDirectory = skeleton.string("audio") val relativeAudioDirectory = skeleton.string("audio")
?: throw EndUserException("JSON file is incomplete: Audio path is missing." ?: throw EndUserException("JSON file is incomplete: Audio path is missing."
+ " Make sure to check 'Nonessential data' when exporting.") + " Make sure to check 'Nonessential data' when exporting.")
val audioDirectoryPath = fileDirectoryPath.resolve(relativeAudioDirectory).normalize() val audioDirectoryPath = fileDirectoryPath.resolve(relativeAudioDirectory).normalize()
if (!Files.exists(audioDirectoryPath)) { if (!Files.exists(audioDirectoryPath)) {
throw EndUserException("Could not find audio directory relative to the JSON file." throw EndUserException("Could not find audio directory relative to the JSON file."
+ " Make sure the JSON file is in the same directory as the original Spine file.") + " Make sure the JSON file is in the same directory as the original Spine file.")
} }
return audioDirectoryPath return audioDirectoryPath
} }
val frameRate: Double get() { val frameRate: Double get() {
return skeleton.double("fps") ?: 30.0 return skeleton.double("fps") ?: 30.0
} }
val slots: List<String> get() { val slots: List<String> get() {
val slots = json.array("slots") ?: listOf<JsonObject>() val slots = json.array("slots") ?: listOf<JsonObject>()
return slots.mapNotNull { it.string("name") } return slots.mapNotNull { it.string("name") }
} }
fun guessMouthSlot(): String? { fun guessMouthSlot(): String? {
return slots.firstOrNull { it.contains("mouth", ignoreCase = true) } return slots.firstOrNull { it.contains("mouth", ignoreCase = true) }
?: slots.firstOrNull() ?: slots.firstOrNull()
} }
data class AudioEvent(val name: String, val relativeAudioFilePath: String, val dialog: String?) data class AudioEvent(val name: String, val relativeAudioFilePath: String, val dialog: String?)
val audioEvents: List<AudioEvent> get() { val audioEvents: List<AudioEvent> get() {
val events = json.obj("events") ?: JsonObject() val events = json.obj("events") ?: JsonObject()
val result = mutableListOf<AudioEvent>() val result = mutableListOf<AudioEvent>()
for ((name, value) in events) { for ((name, value) in events) {
if (value !is JsonObject) throw EndUserException("Invalid event found.") if (value !is JsonObject) throw EndUserException("Invalid event found.")
val relativeAudioFilePath = value.string("audio") ?: continue val relativeAudioFilePath = value.string("audio") ?: continue
val dialog = value.string("string") val dialog = value.string("string")
result.add(AudioEvent(name, relativeAudioFilePath, dialog)) result.add(AudioEvent(name, relativeAudioFilePath, dialog))
} }
return result return result
} }
fun getSlotAttachmentNames(slotName: String): List<String> { fun getSlotAttachmentNames(slotName: String): List<String> {
@Suppress("UNCHECKED_CAST") @Suppress("UNCHECKED_CAST")
val skins: Collection<JsonObject> = when (val skinsObject = json["skins"]) { val skins: Collection<JsonObject> = when (val skinsObject = json["skins"]) {
is JsonObject -> skinsObject.values as Collection<JsonObject> is JsonObject -> skinsObject.values as Collection<JsonObject>
is JsonArray<*> -> skinsObject as Collection<JsonObject> is JsonArray<*> -> skinsObject as Collection<JsonObject>
else -> emptyList() else -> emptyList()
} }
// Get attachment names for all skins // Get attachment names for all skins
return skins return skins
.flatMap { skin -> .flatMap { skin ->
skin.obj(slotName)?.keys?.toList() skin.obj(slotName)?.keys?.toList()
?: skin.obj("attachments")?.obj(slotName)?.keys?.toList() ?: skin.obj("attachments")?.obj(slotName)?.keys?.toList()
?: emptyList<String>() ?: emptyList<String>()
} }
.distinct() .distinct()
} }
val animationNames = observableSet<String>( val animationNames = observableSet<String>(
json.obj("animations")?.map{ it.key }?.toMutableSet() ?: mutableSetOf() json.obj("animations")?.map{ it.key }?.toMutableSet() ?: mutableSetOf()
) )
fun createOrUpdateAnimation(mouthCues: List<MouthCue>, eventName: String, animationName: String, fun createOrUpdateAnimation(mouthCues: List<MouthCue>, eventName: String, animationName: String,
mouthSlot: String, mouthNaming: MouthNaming mouthSlot: String, mouthNaming: MouthNaming
) { ) {
if (!json.containsKey("animations")) { if (!json.containsKey("animations")) {
json["animations"] = JsonObject() json["animations"] = JsonObject()
} }
val animations: JsonObject = json.obj("animations")!! val animations: JsonObject = json.obj("animations")!!
// Round times to full frames. Always round down. // Round times to full frames. Always round down.
// If events coincide, prefer the latest one. // If events coincide, prefer the latest one.
val keyframes = mutableMapOf<Int, MouthShape>() val keyframes = mutableMapOf<Int, MouthShape>()
for (mouthCue in mouthCues) { for (mouthCue in mouthCues) {
val frameNumber = (mouthCue.time * frameRate).toInt() val frameNumber = (mouthCue.time * frameRate).toInt()
keyframes[frameNumber] = mouthCue.mouthShape keyframes[frameNumber] = mouthCue.mouthShape
} }
animations[animationName] = JsonObject().apply { animations[animationName] = JsonObject().apply {
this["slots"] = JsonObject().apply { this["slots"] = JsonObject().apply {
this[mouthSlot] = JsonObject().apply { this[mouthSlot] = JsonObject().apply {
this["attachment"] = JsonArray( this["attachment"] = JsonArray(
keyframes keyframes
.toSortedMap() .toSortedMap()
.map { (frameNumber, mouthShape) -> .map { (frameNumber, mouthShape) ->
JsonObject().apply { JsonObject().apply {
this["time"] = frameNumber / frameRate this["time"] = frameNumber / frameRate
this["name"] = mouthNaming.getName(mouthShape) this["name"] = mouthNaming.getName(mouthShape)
} }
} }
) )
} }
} }
this["events"] = JsonArray( this["events"] = JsonArray(
JsonObject().apply { JsonObject().apply {
this["time"] = 0.0 this["time"] = 0.0
this["name"] = eventName this["name"] = eventName
this["string"] = "" this["string"] = ""
} }
) )
} }
animationNames.add(animationName) animationNames.add(animationName)
} }
override fun toString(): String { override fun toString(): String {
return json.toJsonString(prettyPrint = true) return json.toJsonString(prettyPrint = true)
} }
fun save() { fun save() {
Files.write(filePath, listOf(toString()), StandardCharsets.UTF_8) Files.write(filePath, listOf(toString()), StandardCharsets.UTF_8)
} }
} }

View File

@ -24,37 +24,37 @@ import java.nio.file.Paths
* @param c The class whose location is desired. * @param c The class whose location is desired.
*/ */
fun getLocation(c: Class<*>): URL { fun getLocation(c: Class<*>): URL {
// Try the easy way first // Try the easy way first
try { try {
val codeSourceLocation = c.protectionDomain.codeSource.location val codeSourceLocation = c.protectionDomain.codeSource.location
if (codeSourceLocation != null) return codeSourceLocation if (codeSourceLocation != null) return codeSourceLocation
} catch (e: SecurityException) { } catch (e: SecurityException) {
// Cannot access protection domain // Cannot access protection domain
} catch (e: NullPointerException) { } catch (e: NullPointerException) {
// Protection domain or code source is null // Protection domain or code source is null
} }
// The easy way failed, so we try the hard way. We ask for the class // The easy way failed, so we try the hard way. We ask for the class
// itself as a resource, then strip the class's path from the URL string, // itself as a resource, then strip the class's path from the URL string,
// leaving the base path. // leaving the base path.
// Get the class's raw resource path // Get the class's raw resource path
val classResource = c.getResource(c.simpleName + ".class") val classResource = c.getResource(c.simpleName + ".class")
?: throw Exception("Cannot find class resource.") ?: throw Exception("Cannot find class resource.")
val url = classResource.toString() val url = classResource.toString()
val suffix = c.canonicalName.replace('.', '/') + ".class" val suffix = c.canonicalName.replace('.', '/') + ".class"
if (!url.endsWith(suffix)) throw Exception("Malformed URL.") if (!url.endsWith(suffix)) throw Exception("Malformed URL.")
// strip the class's path from the URL string // strip the class's path from the URL string
val base = url.substring(0, url.length - suffix.length) val base = url.substring(0, url.length - suffix.length)
var path = base var path = base
// remove the "jar:" prefix and "!/" suffix, if present // remove the "jar:" prefix and "!/" suffix, if present
if (path.startsWith("jar:")) path = path.substring(4, path.length - 2) if (path.startsWith("jar:")) path = path.substring(4, path.length - 2)
return URL(path) return URL(path)
} }
/** /**
@ -64,29 +64,29 @@ fun getLocation(c: Class<*>): URL {
* @return A file path suitable for use with e.g. [FileInputStream] * @return A file path suitable for use with e.g. [FileInputStream]
*/ */
fun urlToPath(url: URL): Path { fun urlToPath(url: URL): Path {
var pathString = url.toString() var pathString = url.toString()
if (pathString.startsWith("jar:")) { if (pathString.startsWith("jar:")) {
// Remove "jar:" prefix and "!/" suffix // Remove "jar:" prefix and "!/" suffix
val index = pathString.indexOf("!/") val index = pathString.indexOf("!/")
pathString = pathString.substring(4, index) pathString = pathString.substring(4, index)
} }
try { try {
if (IS_OS_WINDOWS && pathString.matches("file:[A-Za-z]:.*".toRegex())) { if (IS_OS_WINDOWS && pathString.matches("file:[A-Za-z]:.*".toRegex())) {
pathString = "file:/" + pathString.substring(5) pathString = "file:/" + pathString.substring(5)
} }
return Paths.get(URL(pathString).toURI()) return Paths.get(URL(pathString).toURI())
} catch (e: MalformedURLException) { } catch (e: MalformedURLException) {
// URL is not completely well-formed. // URL is not completely well-formed.
} catch (e: URISyntaxException) { } catch (e: URISyntaxException) {
// URL is not completely well-formed. // URL is not completely well-formed.
} }
if (pathString.startsWith("file:")) { if (pathString.startsWith("file:")) {
// Pass through the URL as-is, minus "file:" prefix // Pass through the URL as-is, minus "file:" prefix
pathString = pathString.substring(5) pathString = pathString.substring(5)
return Paths.get(pathString) return Paths.get(pathString)
} }
throw IllegalArgumentException("Invalid URL: $url") throw IllegalArgumentException("Invalid URL: $url")
} }

View File

@ -3,5 +3,5 @@ package com.rhubarb_lip_sync.rhubarb_for_spine
import javafx.application.Application import javafx.application.Application
fun main(args: Array<String>) { fun main(args: Array<String>) {
Application.launch(MainApp::class.java, *args) Application.launch(MainApp::class.java, *args)
} }

View File

@ -8,31 +8,31 @@ import java.io.PrintWriter
import java.io.StringWriter import java.io.StringWriter
val List<String>.commonPrefix: String get() { val List<String>.commonPrefix: String get() {
return if (isEmpty()) "" else this.reduce { result, string -> result.commonPrefixWith(string) } return if (isEmpty()) "" else this.reduce { result, string -> result.commonPrefixWith(string) }
} }
val List<String>.commonSuffix: String get() { val List<String>.commonSuffix: String get() {
return if (isEmpty()) "" else this.reduce { result, string -> result.commonSuffixWith(string) } return if (isEmpty()) "" else this.reduce { result, string -> result.commonSuffixWith(string) }
} }
fun <TValue, TProperty : Property<TValue>> TProperty.alsoListen(listener: (TValue) -> Unit) : TProperty { fun <TValue, TProperty : Property<TValue>> TProperty.alsoListen(listener: (TValue) -> Unit) : TProperty {
// Notify the listener of the initial value. // Notify the listener of the initial value.
// If we did this synchronously, the listener's state would have to be fully initialized the // If we did this synchronously, the listener's state would have to be fully initialized the
// moment this function is called. So calling this function during object initialization might // moment this function is called. So calling this function during object initialization might
// result in access to uninitialized state. // result in access to uninitialized state.
Platform.runLater { listener(this.value) } Platform.runLater { listener(this.value) }
addListener({ _, _, newValue -> listener(newValue)}) addListener({ _, _, newValue -> listener(newValue)})
return this return this
} }
fun getExceptionMessage(action: () -> Unit): String? { fun getExceptionMessage(action: () -> Unit): String? {
try { try {
action() action()
} catch (e: Exception) { } catch (e: Exception) {
return e.message return e.message
} }
return null return null
} }
/** /**
@ -44,32 +44,32 @@ fun getExceptionMessage(action: () -> Unit): String? {
* @throws Throwable An exception occurred in the run method of the Runnable * @throws Throwable An exception occurred in the run method of the Runnable
*/ */
fun runAndWait(action: () -> Unit) { fun runAndWait(action: () -> Unit) {
if (Platform.isFxApplicationThread()) { if (Platform.isFxApplicationThread()) {
action() action()
} else { } else {
val lock = ReentrantLock() val lock = ReentrantLock()
lock.withLock { lock.withLock {
val doneCondition = lock.newCondition() val doneCondition = lock.newCondition()
var throwable: Throwable? = null var throwable: Throwable? = null
Platform.runLater { Platform.runLater {
lock.withLock { lock.withLock {
try { try {
action() action()
} catch (e: Throwable) { } catch (e: Throwable) {
throwable = e throwable = e
} finally { } finally {
doneCondition.signal() doneCondition.signal()
} }
} }
} }
doneCondition.await() doneCondition.await()
throwable?.let { throw it } throwable?.let { throw it }
} }
} }
} }
fun getStackTrace(e: Exception): String { fun getStackTrace(e: Exception): String {
val stringWriter = StringWriter() val stringWriter = StringWriter()
e.printStackTrace(PrintWriter(stringWriter)) e.printStackTrace(PrintWriter(stringWriter))
return stringWriter.toString() return stringWriter.toString()
} }

View File

@ -7,63 +7,63 @@ import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.Assertions.catchThrowable import org.assertj.core.api.Assertions.catchThrowable
class SpineJsonTest { class SpineJsonTest {
@Nested @Nested
inner class `file format 3_7` { inner class `file format 3_7` {
@Test @Test
fun `correctly reads valid file`() { fun `correctly reads valid file`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.7.json").toAbsolutePath() val path = Paths.get("src/test/data/jsonFiles/matt-3.7.json").toAbsolutePath()
val spine = SpineJson(path) val spine = SpineJson(path)
assertThat(spine.audioDirectoryPath) assertThat(spine.audioDirectoryPath)
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath()) .isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
assertThat(spine.frameRate).isEqualTo(30.0) assertThat(spine.frameRate).isEqualTo(30.0)
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth") assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
assertThat(spine.guessMouthSlot()).isEqualTo("mouth") assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
assertThat(spine.audioEvents).containsExactly( assertThat(spine.audioEvents).containsExactly(
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null), SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null), SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null) SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
) )
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" }) assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
assertThat(spine.animationNames).containsExactly("shake_head", "walk") assertThat(spine.animationNames).containsExactly("shake_head", "walk")
} }
@Test @Test
fun `throws on file without nonessential data`() { fun `throws on file without nonessential data`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.7-essential.json").toAbsolutePath() val path = Paths.get("src/test/data/jsonFiles/matt-3.7-essential.json").toAbsolutePath()
val throwable = catchThrowable { SpineJson(path) } val throwable = catchThrowable { SpineJson(path) }
assertThat(throwable) assertThat(throwable)
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.") .hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
} }
} }
@Nested @Nested
inner class `file format 3_8` { inner class `file format 3_8` {
@Test @Test
fun `correctly reads valid file`() { fun `correctly reads valid file`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.8.json").toAbsolutePath() val path = Paths.get("src/test/data/jsonFiles/matt-3.8.json").toAbsolutePath()
val spine = SpineJson(path) val spine = SpineJson(path)
assertThat(spine.audioDirectoryPath) assertThat(spine.audioDirectoryPath)
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath()) .isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
assertThat(spine.frameRate).isEqualTo(30.0) assertThat(spine.frameRate).isEqualTo(30.0)
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth") assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
assertThat(spine.guessMouthSlot()).isEqualTo("mouth") assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
assertThat(spine.audioEvents).containsExactly( assertThat(spine.audioEvents).containsExactly(
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null), SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null), SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null) SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
) )
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" }) assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
assertThat(spine.animationNames).containsExactly("shake_head", "walk") assertThat(spine.animationNames).containsExactly("shake_head", "walk")
} }
@Test @Test
fun `throws on file without nonessential data`() { fun `throws on file without nonessential data`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.8-essential.json").toAbsolutePath() val path = Paths.get("src/test/data/jsonFiles/matt-3.8-essential.json").toAbsolutePath()
val throwable = catchThrowable { SpineJson(path) } val throwable = catchThrowable { SpineJson(path) }
assertThat(throwable) assertThat(throwable)
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.") .hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
} }
} }
} }

View File

@ -1,14 +1,14 @@
cmake_minimum_required(VERSION 3.2) cmake_minimum_required(VERSION 3.2)
set(vegasFiles set(vegasFiles
"Debug Rhubarb.cs" "Debug Rhubarb.cs"
"Debug Rhubarb.cs.config" "Debug Rhubarb.cs.config"
"Import Rhubarb.cs" "Import Rhubarb.cs"
"Import Rhubarb.cs.config" "Import Rhubarb.cs.config"
"README.adoc" "README.adoc"
) )
install( install(
FILES ${vegasFiles} FILES ${vegasFiles}
DESTINATION "extras/MagixVegas" DESTINATION "extras/MagixVegas"
) )

View File

@ -17,329 +17,329 @@ using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas
using Region = ScriptPortal.Vegas.Region; // For older versions, this should say Sony.Vegas.Region using Region = ScriptPortal.Vegas.Region; // For older versions, this should say Sony.Vegas.Region
public class EntryPoint { public class EntryPoint {
public void FromVegas(Vegas vegas) { public void FromVegas(Vegas vegas) {
Config config = Config.Load(); Config config = Config.Load();
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); }); ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
importDialog.ShowDialog(); importDialog.ShowDialog();
config.Save(); config.Save();
} }
private void Import(Config config, Vegas vegas) { private void Import(Config config, Vegas vegas) {
Project project = vegas.Project; Project project = vegas.Project;
// Clear markers and regions // Clear markers and regions
if (config.ClearMarkers) { if (config.ClearMarkers) {
project.Markers.Clear(); project.Markers.Clear();
} }
if (config.ClearRegions) { if (config.ClearRegions) {
project.Regions.Clear(); project.Regions.Clear();
} }
// Load log file // Load log file
if (!File.Exists(config.LogFile)) { if (!File.Exists(config.LogFile)) {
throw new Exception("Log file does not exist."); throw new Exception("Log file does not exist.");
} }
Dictionary<EventType, List<TimedEvent>> timedEvents = ParseLogFile(config); Dictionary<EventType, List<TimedEvent>> timedEvents = ParseLogFile(config);
// Add markers/regions // Add markers/regions
foreach (EventType eventType in timedEvents.Keys) { foreach (EventType eventType in timedEvents.Keys) {
foreach (Visualization visualization in config.Visualizations) { foreach (Visualization visualization in config.Visualizations) {
if (visualization.EventType != eventType) continue; if (visualization.EventType != eventType) continue;
List<TimedEvent> filteredEvents = FilterEvents(timedEvents[eventType], visualization.Regex); List<TimedEvent> filteredEvents = FilterEvents(timedEvents[eventType], visualization.Regex);
foreach (TimedEvent timedEvent in filteredEvents) { foreach (TimedEvent timedEvent in filteredEvents) {
Timecode start = Timecode.FromSeconds(timedEvent.Start); Timecode start = Timecode.FromSeconds(timedEvent.Start);
Timecode end = Timecode.FromSeconds(timedEvent.End); Timecode end = Timecode.FromSeconds(timedEvent.End);
Timecode length = end - start; Timecode length = end - start;
if (config.LoopRegionOnly) { if (config.LoopRegionOnly) {
Timecode loopRegionStart = vegas.Transport.LoopRegionStart; Timecode loopRegionStart = vegas.Transport.LoopRegionStart;
Timecode loopRegionEnd = loopRegionStart + vegas.Transport.LoopRegionLength; Timecode loopRegionEnd = loopRegionStart + vegas.Transport.LoopRegionLength;
if (start < loopRegionStart || start > loopRegionEnd || end < loopRegionStart || end > loopRegionEnd) { if (start < loopRegionStart || start > loopRegionEnd || end < loopRegionStart || end > loopRegionEnd) {
continue; continue;
} }
} }
switch (visualization.VisualizationType) { switch (visualization.VisualizationType) {
case VisualizationType.Marker: case VisualizationType.Marker:
project.Markers.Add(new Marker(start, timedEvent.Value)); project.Markers.Add(new Marker(start, timedEvent.Value));
break; break;
case VisualizationType.Region: case VisualizationType.Region:
project.Regions.Add(new Region(start, length, timedEvent.Value)); project.Regions.Add(new Region(start, length, timedEvent.Value));
break; break;
} }
} }
} }
} }
} }
private List<TimedEvent> FilterEvents(List<TimedEvent> timedEvents, Regex filterRegex) { private List<TimedEvent> FilterEvents(List<TimedEvent> timedEvents, Regex filterRegex) {
if (filterRegex == null) return timedEvents; if (filterRegex == null) return timedEvents;
StringBuilder stringBuilder = new StringBuilder(); StringBuilder stringBuilder = new StringBuilder();
Dictionary<int, TimedEvent> timedEventsByCharPosition = new Dictionary<int, TimedEvent>(); Dictionary<int, TimedEvent> timedEventsByCharPosition = new Dictionary<int, TimedEvent>();
foreach (TimedEvent timedEvent in timedEvents) { foreach (TimedEvent timedEvent in timedEvents) {
string inAngleBrackets = "<" + timedEvent.Value + ">"; string inAngleBrackets = "<" + timedEvent.Value + ">";
for (int charPosition = stringBuilder.Length; for (int charPosition = stringBuilder.Length;
charPosition < stringBuilder.Length + inAngleBrackets.Length; charPosition < stringBuilder.Length + inAngleBrackets.Length;
charPosition++) { charPosition++) {
timedEventsByCharPosition[charPosition] = timedEvent; timedEventsByCharPosition[charPosition] = timedEvent;
} }
stringBuilder.Append(inAngleBrackets); stringBuilder.Append(inAngleBrackets);
} }
MatchCollection matches = filterRegex.Matches(stringBuilder.ToString()); MatchCollection matches = filterRegex.Matches(stringBuilder.ToString());
List<TimedEvent> result = new List<TimedEvent>(); List<TimedEvent> result = new List<TimedEvent>();
foreach (Match match in matches) { foreach (Match match in matches) {
if (match.Length == 0) continue; if (match.Length == 0) continue;
for (int charPosition = match.Index; charPosition < match.Index + match.Length; charPosition++) { for (int charPosition = match.Index; charPosition < match.Index + match.Length; charPosition++) {
TimedEvent matchedEvent = timedEventsByCharPosition[charPosition]; TimedEvent matchedEvent = timedEventsByCharPosition[charPosition];
if (!result.Contains(matchedEvent)) { if (!result.Contains(matchedEvent)) {
result.Add(matchedEvent); result.Add(matchedEvent);
} }
} }
} }
return result; return result;
} }
private static Dictionary<EventType, List<TimedEvent>> ParseLogFile(Config config) { private static Dictionary<EventType, List<TimedEvent>> ParseLogFile(Config config) {
string[] lines = File.ReadAllLines(config.LogFile); string[] lines = File.ReadAllLines(config.LogFile);
Regex structuredLogLine = new Regex(@"##(\w+)\[(\d*\.\d*)-(\d*\.\d*)\]: (.*)"); Regex structuredLogLine = new Regex(@"##(\w+)\[(\d*\.\d*)-(\d*\.\d*)\]: (.*)");
Dictionary<EventType, List<TimedEvent>> timedEvents = new Dictionary<EventType, List<TimedEvent>>(); Dictionary<EventType, List<TimedEvent>> timedEvents = new Dictionary<EventType, List<TimedEvent>>();
foreach (string line in lines) { foreach (string line in lines) {
Match match = structuredLogLine.Match(line); Match match = structuredLogLine.Match(line);
if (!match.Success) continue; if (!match.Success) continue;
EventType eventType = (EventType) Enum.Parse(typeof(EventType), match.Groups[1].Value, true); EventType eventType = (EventType) Enum.Parse(typeof(EventType), match.Groups[1].Value, true);
double start = double.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture); double start = double.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
double end = double.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture); double end = double.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture);
string value = match.Groups[4].Value; string value = match.Groups[4].Value;
if (!timedEvents.ContainsKey(eventType)) { if (!timedEvents.ContainsKey(eventType)) {
timedEvents[eventType] = new List<TimedEvent>(); timedEvents[eventType] = new List<TimedEvent>();
} }
timedEvents[eventType].Add(new TimedEvent(eventType, start, end, value)); timedEvents[eventType].Add(new TimedEvent(eventType, start, end, value));
} }
return timedEvents; return timedEvents;
} }
} }
public class TimedEvent { public class TimedEvent {
private readonly EventType eventType; private readonly EventType eventType;
private readonly double start; private readonly double start;
private readonly double end; private readonly double end;
private readonly string value; private readonly string value;
public TimedEvent(EventType eventType, double start, double end, string value) { public TimedEvent(EventType eventType, double start, double end, string value) {
this.eventType = eventType; this.eventType = eventType;
this.start = start; this.start = start;
this.end = end; this.end = end;
this.value = value; this.value = value;
} }
public EventType EventType { public EventType EventType {
get { return eventType; } get { return eventType; }
} }
public double Start { public double Start {
get { return start; } get { return start; }
} }
public double End { public double End {
get { return end; } get { return end; }
} }
public string Value { public string Value {
get { return value; } get { return value; }
} }
} }
public class Config { public class Config {
private string logFile; private string logFile;
private bool clearMarkers; private bool clearMarkers;
private bool clearRegions; private bool clearRegions;
private bool loopRegionOnly; private bool loopRegionOnly;
private List<Visualization> visualizations = new List<Visualization>(); private List<Visualization> visualizations = new List<Visualization>();
[DisplayName("Log File")] [DisplayName("Log File")]
[Description("A log file generated by Rhubarb Lip Sync.")] [Description("A log file generated by Rhubarb Lip Sync.")]
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))] [Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
public string LogFile { public string LogFile {
get { return logFile; } get { return logFile; }
set { logFile = value; } set { logFile = value; }
} }
[DisplayName("Clear Markers")] [DisplayName("Clear Markers")]
[Description("Clear all markers in the current project.")] [Description("Clear all markers in the current project.")]
public bool ClearMarkers { public bool ClearMarkers {
get { return clearMarkers; } get { return clearMarkers; }
set { clearMarkers = value; } set { clearMarkers = value; }
} }
[DisplayName("Clear Regions")] [DisplayName("Clear Regions")]
[Description("Clear all regions in the current project.")] [Description("Clear all regions in the current project.")]
public bool ClearRegions { public bool ClearRegions {
get { return clearRegions; } get { return clearRegions; }
set { clearRegions = value; } set { clearRegions = value; }
} }
[DisplayName("Loop region only")] [DisplayName("Loop region only")]
[Description("Adds regions or markers to the loop region only.")] [Description("Adds regions or markers to the loop region only.")]
public bool LoopRegionOnly { public bool LoopRegionOnly {
get { return loopRegionOnly; } get { return loopRegionOnly; }
set { loopRegionOnly = value; } set { loopRegionOnly = value; }
} }
[DisplayName("Visualization rules")] [DisplayName("Visualization rules")]
[Description("Specify how to visualize various log events.")] [Description("Specify how to visualize various log events.")]
[Editor(typeof(CollectionEditor), typeof(UITypeEditor))] [Editor(typeof(CollectionEditor), typeof(UITypeEditor))]
[XmlIgnore] [XmlIgnore]
public List<Visualization> Visualizations { public List<Visualization> Visualizations {
get { return visualizations; } get { return visualizations; }
set { visualizations = value; } set { visualizations = value; }
} }
[Browsable(false)] [Browsable(false)]
public Visualization[] VisualizationArray { public Visualization[] VisualizationArray {
get { return visualizations.ToArray(); } get { return visualizations.ToArray(); }
set { visualizations = new List<Visualization>(value); } set { visualizations = new List<Visualization>(value); }
} }
private static string ConfigFileName { private static string ConfigFileName {
get { get {
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData); string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
return Path.Combine(folder, "DebugRhubarbSettings.xml"); return Path.Combine(folder, "DebugRhubarbSettings.xml");
} }
} }
public static Config Load() { public static Config Load() {
try { try {
XmlSerializer serializer = new XmlSerializer(typeof(Config)); XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (FileStream file = File.OpenRead(ConfigFileName)) { using (FileStream file = File.OpenRead(ConfigFileName)) {
return (Config) serializer.Deserialize(file); return (Config) serializer.Deserialize(file);
} }
} catch (Exception) { } catch (Exception) {
return new Config(); return new Config();
} }
} }
public void Save() { public void Save() {
XmlSerializer serializer = new XmlSerializer(typeof(Config)); XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (StreamWriter file = File.CreateText(ConfigFileName)) { using (StreamWriter file = File.CreateText(ConfigFileName)) {
XmlWriterSettings settings = new XmlWriterSettings(); XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true; settings.Indent = true;
settings.IndentChars = "\t"; settings.IndentChars = "\t";
using (XmlWriter writer = XmlWriter.Create(file, settings)) { using (XmlWriter writer = XmlWriter.Create(file, settings)) {
serializer.Serialize(writer, this); serializer.Serialize(writer, this);
} }
} }
} }
} }
public class Visualization { public class Visualization {
private EventType eventType; private EventType eventType;
private string regexString; private string regexString;
private VisualizationType visualizationType = VisualizationType.Marker; private VisualizationType visualizationType = VisualizationType.Marker;
[DisplayName("Event Type")] [DisplayName("Event Type")]
[Description("The type of event to visualize.")] [Description("The type of event to visualize.")]
public EventType EventType { public EventType EventType {
get { return eventType; } get { return eventType; }
set { eventType = value; } set { eventType = value; }
} }
[DisplayName("Regular Expression")] [DisplayName("Regular Expression")]
[Description("A regular expression used to filter events. Leave empty to disable filtering.\nInput is a string of events in angle brackets. Example: '<AO>(?=<T>)' finds every AO phone followed by a T phone.")] [Description("A regular expression used to filter events. Leave empty to disable filtering.\nInput is a string of events in angle brackets. Example: '<AO>(?=<T>)' finds every AO phone followed by a T phone.")]
public string RegexString { public string RegexString {
get { return regexString; } get { return regexString; }
set { regexString = value; } set { regexString = value; }
} }
[Browsable(false)] [Browsable(false)]
public Regex Regex { public Regex Regex {
get { return string.IsNullOrEmpty(RegexString) ? null : new Regex(RegexString); } get { return string.IsNullOrEmpty(RegexString) ? null : new Regex(RegexString); }
} }
[DisplayName("Visualization Type")] [DisplayName("Visualization Type")]
[Description("Specify how to visualize events.")] [Description("Specify how to visualize events.")]
public VisualizationType VisualizationType { public VisualizationType VisualizationType {
get { return visualizationType; } get { return visualizationType; }
set { visualizationType = value; } set { visualizationType = value; }
} }
public override string ToString() { public override string ToString() {
return string.Format("{0} -> {1}", EventType, VisualizationType); return string.Format("{0} -> {1}", EventType, VisualizationType);
} }
} }
public enum EventType { public enum EventType {
Utterance, Utterance,
Word, Word,
RawPhone, RawPhone,
Phone, Phone,
Shape, Shape,
Segment Segment
} }
public enum VisualizationType { public enum VisualizationType {
None, None,
Marker, Marker,
Region Region
} }
public delegate void ImportAction(); public delegate void ImportAction();
public class ImportDialog : Form { public class ImportDialog : Form {
private readonly Config config; private readonly Config config;
private readonly ImportAction import; private readonly ImportAction import;
public ImportDialog(Config config, ImportAction import) { public ImportDialog(Config config, ImportAction import) {
this.config = config; this.config = config;
this.import = import; this.import = import;
SuspendLayout(); SuspendLayout();
InitializeComponent(); InitializeComponent();
ResumeLayout(false); ResumeLayout(false);
} }
private void InitializeComponent() { private void InitializeComponent() {
// Configure dialog // Configure dialog
Text = "Debug Rhubarb"; Text = "Debug Rhubarb";
Size = new Size(600, 400); Size = new Size(600, 400);
Font = new Font(Font.FontFamily, 10); Font = new Font(Font.FontFamily, 10);
// Add property grid // Add property grid
PropertyGrid propertyGrid1 = new PropertyGrid(); PropertyGrid propertyGrid1 = new PropertyGrid();
propertyGrid1.SelectedObject = config; propertyGrid1.SelectedObject = config;
Controls.Add(propertyGrid1); Controls.Add(propertyGrid1);
propertyGrid1.Dock = DockStyle.Fill; propertyGrid1.Dock = DockStyle.Fill;
// Add button panel // Add button panel
FlowLayoutPanel buttonPanel = new FlowLayoutPanel(); FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
buttonPanel.FlowDirection = FlowDirection.RightToLeft; buttonPanel.FlowDirection = FlowDirection.RightToLeft;
buttonPanel.AutoSize = true; buttonPanel.AutoSize = true;
buttonPanel.Dock = DockStyle.Bottom; buttonPanel.Dock = DockStyle.Bottom;
Controls.Add(buttonPanel); Controls.Add(buttonPanel);
// Add Cancel button // Add Cancel button
Button cancelButton1 = new Button(); Button cancelButton1 = new Button();
cancelButton1.Text = "Cancel"; cancelButton1.Text = "Cancel";
cancelButton1.DialogResult = DialogResult.Cancel; cancelButton1.DialogResult = DialogResult.Cancel;
buttonPanel.Controls.Add(cancelButton1); buttonPanel.Controls.Add(cancelButton1);
CancelButton = cancelButton1; CancelButton = cancelButton1;
// Add OK button // Add OK button
Button okButton1 = new Button(); Button okButton1 = new Button();
okButton1.Text = "OK"; okButton1.Text = "OK";
okButton1.Click += OkButtonClickedHandler; okButton1.Click += OkButtonClickedHandler;
buttonPanel.Controls.Add(okButton1); buttonPanel.Controls.Add(okButton1);
AcceptButton = okButton1; AcceptButton = okButton1;
} }
private void OkButtonClickedHandler(object sender, EventArgs e) { private void OkButtonClickedHandler(object sender, EventArgs e) {
try { try {
import(); import();
DialogResult = DialogResult.OK; DialogResult = DialogResult.OK;
} catch (Exception exception) { } catch (Exception exception) {
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
} }
} }
} }

View File

@ -14,161 +14,161 @@ using System.Xml.Serialization;
using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas
public class EntryPoint { public class EntryPoint {
public void FromVegas(Vegas vegas) { public void FromVegas(Vegas vegas) {
Config config = Config.Load(); Config config = Config.Load();
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); }); ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
importDialog.ShowDialog(); importDialog.ShowDialog();
config.Save(); config.Save();
} }
private void Import(Config config, Vegas vegas) { private void Import(Config config, Vegas vegas) {
// Load XML file // Load XML file
if (!File.Exists(config.XmlFile)) { if (!File.Exists(config.XmlFile)) {
throw new Exception("XML file does not exist."); throw new Exception("XML file does not exist.");
} }
XmlDocument xmlDocument = new XmlDocument(); XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(config.XmlFile); xmlDocument.Load(config.XmlFile);
// Determine image file names // Determine image file names
XmlNodeList mouthCueElements = xmlDocument.SelectNodes("//mouthCue"); XmlNodeList mouthCueElements = xmlDocument.SelectNodes("//mouthCue");
List<string> shapeNames = new List<string>(); List<string> shapeNames = new List<string>();
foreach (XmlElement mouthCueElement in mouthCueElements) { foreach (XmlElement mouthCueElement in mouthCueElements) {
if (!shapeNames.Contains(mouthCueElement.InnerText)) { if (!shapeNames.Contains(mouthCueElement.InnerText)) {
shapeNames.Add(mouthCueElement.InnerText); shapeNames.Add(mouthCueElement.InnerText);
} }
} }
Dictionary<string, string> imageFileNames = GetImageFileNames(config.OneImageFile, shapeNames.ToArray()); Dictionary<string, string> imageFileNames = GetImageFileNames(config.OneImageFile, shapeNames.ToArray());
// Create new project // Create new project
bool promptSave = !config.DiscardChanges; bool promptSave = !config.DiscardChanges;
bool showDialog = false; bool showDialog = false;
Project project = new Project(promptSave, showDialog); Project project = new Project(promptSave, showDialog);
// Set frame size // Set frame size
Bitmap testImage = new Bitmap(config.OneImageFile); Bitmap testImage = new Bitmap(config.OneImageFile);
project.Video.Width = testImage.Width; project.Video.Width = testImage.Width;
project.Video.Height = testImage.Height; project.Video.Height = testImage.Height;
// Set frame rate // Set frame rate
if (config.FrameRate < 0.1 || config.FrameRate > 100) { if (config.FrameRate < 0.1 || config.FrameRate > 100) {
throw new Exception("Invalid frame rate."); throw new Exception("Invalid frame rate.");
} }
project.Video.FrameRate = config.FrameRate; project.Video.FrameRate = config.FrameRate;
// Set other video settings // Set other video settings
project.Video.FieldOrder = VideoFieldOrder.ProgressiveScan; project.Video.FieldOrder = VideoFieldOrder.ProgressiveScan;
project.Video.PixelAspectRatio = 1; project.Video.PixelAspectRatio = 1;
// Add video track with images // Add video track with images
VideoTrack videoTrack = vegas.Project.AddVideoTrack(); VideoTrack videoTrack = vegas.Project.AddVideoTrack();
foreach (XmlElement mouthCueElement in mouthCueElements) { foreach (XmlElement mouthCueElement in mouthCueElements) {
Timecode start = GetTimecode(mouthCueElement.Attributes["start"]); Timecode start = GetTimecode(mouthCueElement.Attributes["start"]);
Timecode length = GetTimecode(mouthCueElement.Attributes["end"]) - start; Timecode length = GetTimecode(mouthCueElement.Attributes["end"]) - start;
VideoEvent videoEvent = videoTrack.AddVideoEvent(start, length); VideoEvent videoEvent = videoTrack.AddVideoEvent(start, length);
Media imageMedia = new Media(imageFileNames[mouthCueElement.InnerText]); Media imageMedia = new Media(imageFileNames[mouthCueElement.InnerText]);
videoEvent.AddTake(imageMedia.GetVideoStreamByIndex(0)); videoEvent.AddTake(imageMedia.GetVideoStreamByIndex(0));
} }
// Add audio track with original sound file // Add audio track with original sound file
AudioTrack audioTrack = vegas.Project.AddAudioTrack(); AudioTrack audioTrack = vegas.Project.AddAudioTrack();
Media audioMedia = new Media(xmlDocument.SelectSingleNode("//soundFile").InnerText); Media audioMedia = new Media(xmlDocument.SelectSingleNode("//soundFile").InnerText);
AudioEvent audioEvent = audioTrack.AddAudioEvent(new Timecode(0), audioMedia.Length); AudioEvent audioEvent = audioTrack.AddAudioEvent(new Timecode(0), audioMedia.Length);
audioEvent.AddTake(audioMedia.GetAudioStreamByIndex(0)); audioEvent.AddTake(audioMedia.GetAudioStreamByIndex(0));
} }
private static Timecode GetTimecode(XmlAttribute valueAttribute) { private static Timecode GetTimecode(XmlAttribute valueAttribute) {
double seconds = Double.Parse(valueAttribute.Value, CultureInfo.InvariantCulture); double seconds = Double.Parse(valueAttribute.Value, CultureInfo.InvariantCulture);
return Timecode.FromSeconds(seconds); return Timecode.FromSeconds(seconds);
} }
private Dictionary<string, string> GetImageFileNames(string oneImageFile, string[] shapeNames) { private Dictionary<string, string> GetImageFileNames(string oneImageFile, string[] shapeNames) {
if (oneImageFile == null) { if (oneImageFile == null) {
throw new Exception("Image file name not set."); throw new Exception("Image file name not set.");
} }
Regex nameRegex = new Regex(@"(?<=-)([^-]*)(?=\.[^.]+$)"); Regex nameRegex = new Regex(@"(?<=-)([^-]*)(?=\.[^.]+$)");
if (!nameRegex.IsMatch(oneImageFile)) { if (!nameRegex.IsMatch(oneImageFile)) {
throw new Exception("Image file name doesn't have expected format."); throw new Exception("Image file name doesn't have expected format.");
} }
Dictionary<string, string> result = new Dictionary<string, string>(); Dictionary<string, string> result = new Dictionary<string, string>();
foreach (string shapeName in shapeNames) { foreach (string shapeName in shapeNames) {
string imageFileName = nameRegex.Replace(oneImageFile, shapeName); string imageFileName = nameRegex.Replace(oneImageFile, shapeName);
if (!File.Exists(imageFileName)) { if (!File.Exists(imageFileName)) {
throw new Exception(string.Format("Image file '{0}' not found.", imageFileName)); throw new Exception(string.Format("Image file '{0}' not found.", imageFileName));
} }
result[shapeName] = imageFileName; result[shapeName] = imageFileName;
} }
return result; return result;
} }
} }
public class Config { public class Config {
private string xmlFile; private string xmlFile;
private string oneImageFile; private string oneImageFile;
private double frameRate = 100; private double frameRate = 100;
private bool discardChanges = false; private bool discardChanges = false;
[DisplayName("XML File")] [DisplayName("XML File")]
[Description("An XML file generated by Rhubarb Lip Sync.")] [Description("An XML file generated by Rhubarb Lip Sync.")]
[Editor(typeof(XmlFileEditor), typeof(UITypeEditor))] [Editor(typeof(XmlFileEditor), typeof(UITypeEditor))]
public string XmlFile { public string XmlFile {
get { return xmlFile; } get { return xmlFile; }
set { xmlFile = value; } set { xmlFile = value; }
} }
[DisplayName("One image file")] [DisplayName("One image file")]
[Description("Any image file out of the set of image files representing the mouth chart.")] [Description("Any image file out of the set of image files representing the mouth chart.")]
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))] [Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
public string OneImageFile { public string OneImageFile {
get { return oneImageFile; } get { return oneImageFile; }
set { oneImageFile = value; } set { oneImageFile = value; }
} }
[DisplayName("Frame rate")] [DisplayName("Frame rate")]
[Description("The frame rate for the new project.")] [Description("The frame rate for the new project.")]
public double FrameRate { public double FrameRate {
get { return frameRate; } get { return frameRate; }
set { frameRate = value; } set { frameRate = value; }
} }
[DisplayName("Discard Changes")] [DisplayName("Discard Changes")]
[Description("Discard all changes to the current project without prompting to save.")] [Description("Discard all changes to the current project without prompting to save.")]
public bool DiscardChanges { public bool DiscardChanges {
get { return discardChanges; } get { return discardChanges; }
set { discardChanges = value; } set { discardChanges = value; }
} }
private static string ConfigFileName { private static string ConfigFileName {
get { get {
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData); string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
return Path.Combine(folder, "ImportRhubarbSettings.xml"); return Path.Combine(folder, "ImportRhubarbSettings.xml");
} }
} }
public static Config Load() { public static Config Load() {
try { try {
XmlSerializer serializer = new XmlSerializer(typeof(Config)); XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (FileStream file = File.OpenRead(ConfigFileName)) { using (FileStream file = File.OpenRead(ConfigFileName)) {
return (Config) serializer.Deserialize(file); return (Config) serializer.Deserialize(file);
} }
} catch (Exception) { } catch (Exception) {
return new Config(); return new Config();
} }
} }
public void Save() { public void Save() {
XmlSerializer serializer = new XmlSerializer(typeof(Config)); XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (StreamWriter file = File.CreateText(ConfigFileName)) { using (StreamWriter file = File.CreateText(ConfigFileName)) {
XmlWriterSettings settings = new XmlWriterSettings(); XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true; settings.Indent = true;
settings.IndentChars = "\t"; settings.IndentChars = "\t";
using (XmlWriter writer = XmlWriter.Create(file, settings)) { using (XmlWriter writer = XmlWriter.Create(file, settings)) {
serializer.Serialize(writer, this); serializer.Serialize(writer, this);
} }
} }
} }
} }
@ -176,58 +176,58 @@ public delegate void ImportAction();
public class ImportDialog : Form { public class ImportDialog : Form {
private readonly Config config; private readonly Config config;
private readonly ImportAction import; private readonly ImportAction import;
public ImportDialog(Config config, ImportAction import) { public ImportDialog(Config config, ImportAction import) {
this.config = config; this.config = config;
this.import = import; this.import = import;
SuspendLayout(); SuspendLayout();
InitializeComponent(); InitializeComponent();
ResumeLayout(false); ResumeLayout(false);
} }
private void InitializeComponent() { private void InitializeComponent() {
// Configure dialog // Configure dialog
Text = "Import Rhubarb"; Text = "Import Rhubarb";
Size = new Size(600, 400); Size = new Size(600, 400);
Font = new Font(Font.FontFamily, 10); Font = new Font(Font.FontFamily, 10);
// Add property grid // Add property grid
PropertyGrid propertyGrid1 = new PropertyGrid(); PropertyGrid propertyGrid1 = new PropertyGrid();
propertyGrid1.SelectedObject = config; propertyGrid1.SelectedObject = config;
Controls.Add(propertyGrid1); Controls.Add(propertyGrid1);
propertyGrid1.Dock = DockStyle.Fill; propertyGrid1.Dock = DockStyle.Fill;
// Add button panel // Add button panel
FlowLayoutPanel buttonPanel = new FlowLayoutPanel(); FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
buttonPanel.FlowDirection = FlowDirection.RightToLeft; buttonPanel.FlowDirection = FlowDirection.RightToLeft;
buttonPanel.AutoSize = true; buttonPanel.AutoSize = true;
buttonPanel.Dock = DockStyle.Bottom; buttonPanel.Dock = DockStyle.Bottom;
Controls.Add(buttonPanel); Controls.Add(buttonPanel);
// Add Cancel button // Add Cancel button
Button cancelButton1 = new Button(); Button cancelButton1 = new Button();
cancelButton1.Text = "Cancel"; cancelButton1.Text = "Cancel";
cancelButton1.DialogResult = DialogResult.Cancel; cancelButton1.DialogResult = DialogResult.Cancel;
buttonPanel.Controls.Add(cancelButton1); buttonPanel.Controls.Add(cancelButton1);
CancelButton = cancelButton1; CancelButton = cancelButton1;
// Add OK button // Add OK button
Button okButton1 = new Button(); Button okButton1 = new Button();
okButton1.Text = "OK"; okButton1.Text = "OK";
okButton1.Click += OkButtonClickedHandler; okButton1.Click += OkButtonClickedHandler;
buttonPanel.Controls.Add(okButton1); buttonPanel.Controls.Add(okButton1);
AcceptButton = okButton1; AcceptButton = okButton1;
} }
private void OkButtonClickedHandler(object sender, EventArgs e) { private void OkButtonClickedHandler(object sender, EventArgs e) {
try { try {
import(); import();
DialogResult = DialogResult.OK; DialogResult = DialogResult.OK;
} catch (Exception exception) { } catch (Exception exception) {
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
} }
} }
} }

View File

@ -11,37 +11,37 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Enable POSIX threads # Enable POSIX threads
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
endif() endif()
# Use static run-time # Use static run-time
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_options(/MT$<$<CONFIG:Debug>:d>) add_compile_options(/MT$<$<CONFIG:Debug>:d>)
endif() endif()
# Set global flags and define flags variables for later use # Set global flags and define flags variables for later use
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(enableWarningsFlags "-Wall;-Wextra") set(enableWarningsFlags "-Wall;-Wextra")
set(disableWarningsFlags "-w") set(disableWarningsFlags "-w")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
set(enableWarningsFlags "/W4") set(enableWarningsFlags "/W4")
set(disableWarningsFlags "/W0") set(disableWarningsFlags "/W0")
# Disable warning C4456: declaration of '...' hides previous local declaration # Disable warning C4456: declaration of '...' hides previous local declaration
# I'm doing that on purpose. # I'm doing that on purpose.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4458") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4458")
# Assume UTF-8 encoding for source files and encode string constants in UTF-8 # Assume UTF-8 encoding for source files and encode string constants in UTF-8
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
endif() endif()
# Use UTF-8 throughout # Use UTF-8 throughout
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_options("/utf-8") add_compile_options("/utf-8")
endif() endif()
if(${UNIX}) if(${UNIX})
add_definitions(-DHAVE_UNISTD_H) add_definitions(-DHAVE_UNISTD_H)
endif() endif()
# Enable project folders # Enable project folders
@ -69,9 +69,9 @@ set_target_properties(cppFormat PROPERTIES FOLDER lib)
FILE(GLOB_RECURSE sphinxbaseFiles "lib/sphinxbase-rev13216/src/libsphinxbase/*.c") FILE(GLOB_RECURSE sphinxbaseFiles "lib/sphinxbase-rev13216/src/libsphinxbase/*.c")
add_library(sphinxbase ${sphinxbaseFiles}) add_library(sphinxbase ${sphinxbaseFiles})
target_include_directories(sphinxbase SYSTEM PUBLIC target_include_directories(sphinxbase SYSTEM PUBLIC
"lib/sphinxbase-rev13216/include" "lib/sphinxbase-rev13216/include"
"lib/sphinxbase-rev13216/src" "lib/sphinxbase-rev13216/src"
"lib/sphinx_config" "lib/sphinx_config"
) )
target_compile_options(sphinxbase PRIVATE ${disableWarningsFlags}) target_compile_options(sphinxbase PRIVATE ${disableWarningsFlags})
target_compile_definitions(sphinxbase PUBLIC __SPHINXBASE_EXPORT_H__=1 SPHINXBASE_EXPORT=) # Compile as static lib target_compile_definitions(sphinxbase PUBLIC __SPHINXBASE_EXPORT_H__=1 SPHINXBASE_EXPORT=) # Compile as static lib
@ -81,8 +81,8 @@ set_target_properties(sphinxbase PROPERTIES FOLDER lib)
FILE(GLOB pocketSphinxFiles "lib/pocketsphinx-rev13216/src/libpocketsphinx/*.c") FILE(GLOB pocketSphinxFiles "lib/pocketsphinx-rev13216/src/libpocketsphinx/*.c")
add_library(pocketSphinx ${pocketSphinxFiles}) add_library(pocketSphinx ${pocketSphinxFiles})
target_include_directories(pocketSphinx SYSTEM PUBLIC target_include_directories(pocketSphinx SYSTEM PUBLIC
"lib/pocketsphinx-rev13216/include" "lib/pocketsphinx-rev13216/include"
"lib/pocketsphinx-rev13216/src/libpocketsphinx" "lib/pocketsphinx-rev13216/src/libpocketsphinx"
) )
target_link_libraries(pocketSphinx sphinxbase) target_link_libraries(pocketSphinx sphinxbase)
target_compile_options(pocketSphinx PRIVATE ${disableWarningsFlags}) target_compile_options(pocketSphinx PRIVATE ${disableWarningsFlags})
@ -108,23 +108,23 @@ include_directories(SYSTEM "lib/gsl/include")
# ... WebRTC # ... WebRTC
set(webRtcFiles set(webRtcFiles
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/cross_correlation.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/cross_correlation.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/division_operations.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/division_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/downsample_fast.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/downsample_fast.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/energy.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/energy.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/get_scaling_square.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/get_scaling_square.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/min_max_operations.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/min_max_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_48khz.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_48khz.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_by_2_internal.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_by_2_internal.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_fractional.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_fractional.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_init.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_init.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_inl.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_inl.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/vector_scaling_operations.c lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/vector_scaling_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_core.c lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_core.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_filterbank.c lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_filterbank.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_gmm.c lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_gmm.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_sp.c lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_sp.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/webrtc_vad.c lib/webrtc-8d2248ff/webrtc/common_audio/vad/webrtc_vad.c
) )
add_library(webRtc ${webRtcFiles}) add_library(webRtc ${webRtcFiles})
target_include_directories(webRtc SYSTEM PUBLIC "lib/webrtc-8d2248ff") target_include_directories(webRtc SYSTEM PUBLIC "lib/webrtc-8d2248ff")
@ -133,7 +133,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
target_compile_options(webRtc PRIVATE -pthread -lpthread) target_compile_options(webRtc PRIVATE -pthread -lpthread)
endif() endif()
if (NOT WIN32) if (NOT WIN32)
target_compile_definitions(webRtc PRIVATE WEBRTC_POSIX) target_compile_definitions(webRtc PRIVATE WEBRTC_POSIX)
endif() endif()
set_target_properties(webRtc PROPERTIES FOLDER lib) set_target_properties(webRtc PROPERTIES FOLDER lib)
@ -144,76 +144,76 @@ set_target_properties(whereami PROPERTIES FOLDER lib)
# ... Flite # ... Flite
set(fliteFiles set(fliteFiles
lib/flite-1.4/lang/cmulex/cmu_lex.c lib/flite-1.4/lang/cmulex/cmu_lex.c
lib/flite-1.4/lang/cmulex/cmu_lex_data.c lib/flite-1.4/lang/cmulex/cmu_lex_data.c
lib/flite-1.4/lang/cmulex/cmu_lex_entries.c lib/flite-1.4/lang/cmulex/cmu_lex_entries.c
lib/flite-1.4/lang/cmulex/cmu_lts_model.c lib/flite-1.4/lang/cmulex/cmu_lts_model.c
lib/flite-1.4/lang/cmulex/cmu_lts_rules.c lib/flite-1.4/lang/cmulex/cmu_lts_rules.c
lib/flite-1.4/lang/cmulex/cmu_postlex.c lib/flite-1.4/lang/cmulex/cmu_postlex.c
lib/flite-1.4/lang/usenglish/us_aswd.c lib/flite-1.4/lang/usenglish/us_aswd.c
lib/flite-1.4/lang/usenglish/us_dur_stats.c lib/flite-1.4/lang/usenglish/us_dur_stats.c
lib/flite-1.4/lang/usenglish/us_durz_cart.c lib/flite-1.4/lang/usenglish/us_durz_cart.c
lib/flite-1.4/lang/usenglish/us_expand.c lib/flite-1.4/lang/usenglish/us_expand.c
lib/flite-1.4/lang/usenglish/us_f0_model.c lib/flite-1.4/lang/usenglish/us_f0_model.c
lib/flite-1.4/lang/usenglish/us_f0lr.c lib/flite-1.4/lang/usenglish/us_f0lr.c
lib/flite-1.4/lang/usenglish/us_ffeatures.c lib/flite-1.4/lang/usenglish/us_ffeatures.c
lib/flite-1.4/lang/usenglish/us_gpos.c lib/flite-1.4/lang/usenglish/us_gpos.c
lib/flite-1.4/lang/usenglish/us_int_accent_cart.c lib/flite-1.4/lang/usenglish/us_int_accent_cart.c
lib/flite-1.4/lang/usenglish/us_int_tone_cart.c lib/flite-1.4/lang/usenglish/us_int_tone_cart.c
lib/flite-1.4/lang/usenglish/us_nums_cart.c lib/flite-1.4/lang/usenglish/us_nums_cart.c
lib/flite-1.4/lang/usenglish/us_phoneset.c lib/flite-1.4/lang/usenglish/us_phoneset.c
lib/flite-1.4/lang/usenglish/us_phrasing_cart.c lib/flite-1.4/lang/usenglish/us_phrasing_cart.c
lib/flite-1.4/lang/usenglish/us_pos_cart.c lib/flite-1.4/lang/usenglish/us_pos_cart.c
lib/flite-1.4/lang/usenglish/us_text.c lib/flite-1.4/lang/usenglish/us_text.c
lib/flite-1.4/lang/usenglish/usenglish.c lib/flite-1.4/lang/usenglish/usenglish.c
lib/flite-1.4/src/audio/au_none.c lib/flite-1.4/src/audio/au_none.c
lib/flite-1.4/src/audio/au_streaming.c lib/flite-1.4/src/audio/au_streaming.c
lib/flite-1.4/src/audio/audio.c lib/flite-1.4/src/audio/audio.c
lib/flite-1.4/src/hrg/cst_ffeature.c lib/flite-1.4/src/hrg/cst_ffeature.c
lib/flite-1.4/src/hrg/cst_item.c lib/flite-1.4/src/hrg/cst_item.c
lib/flite-1.4/src/hrg/cst_relation.c lib/flite-1.4/src/hrg/cst_relation.c
lib/flite-1.4/src/hrg/cst_utterance.c lib/flite-1.4/src/hrg/cst_utterance.c
lib/flite-1.4/src/lexicon/cst_lexicon.c lib/flite-1.4/src/lexicon/cst_lexicon.c
lib/flite-1.4/src/lexicon/cst_lts.c lib/flite-1.4/src/lexicon/cst_lts.c
lib/flite-1.4/src/regex/cst_regex.c lib/flite-1.4/src/regex/cst_regex.c
lib/flite-1.4/src/regex/regexp.c lib/flite-1.4/src/regex/regexp.c
lib/flite-1.4/src/speech/cst_lpcres.c lib/flite-1.4/src/speech/cst_lpcres.c
lib/flite-1.4/src/speech/cst_track.c lib/flite-1.4/src/speech/cst_track.c
lib/flite-1.4/src/speech/cst_wave.c lib/flite-1.4/src/speech/cst_wave.c
lib/flite-1.4/src/speech/cst_wave_io.c lib/flite-1.4/src/speech/cst_wave_io.c
lib/flite-1.4/src/speech/cst_wave_utils.c lib/flite-1.4/src/speech/cst_wave_utils.c
lib/flite-1.4/src/speech/rateconv.c lib/flite-1.4/src/speech/rateconv.c
lib/flite-1.4/src/stats/cst_cart.c lib/flite-1.4/src/stats/cst_cart.c
lib/flite-1.4/src/synth/cst_ffeatures.c lib/flite-1.4/src/synth/cst_ffeatures.c
lib/flite-1.4/src/synth/cst_phoneset.c lib/flite-1.4/src/synth/cst_phoneset.c
lib/flite-1.4/src/synth/cst_synth.c lib/flite-1.4/src/synth/cst_synth.c
lib/flite-1.4/src/synth/cst_utt_utils.c lib/flite-1.4/src/synth/cst_utt_utils.c
lib/flite-1.4/src/synth/cst_voice.c lib/flite-1.4/src/synth/cst_voice.c
lib/flite-1.4/src/synth/flite.c lib/flite-1.4/src/synth/flite.c
lib/flite-1.4/src/utils/cst_alloc.c lib/flite-1.4/src/utils/cst_alloc.c
lib/flite-1.4/src/utils/cst_endian.c lib/flite-1.4/src/utils/cst_endian.c
lib/flite-1.4/src/utils/cst_error.c lib/flite-1.4/src/utils/cst_error.c
lib/flite-1.4/src/utils/cst_features.c lib/flite-1.4/src/utils/cst_features.c
lib/flite-1.4/src/utils/cst_file_stdio.c lib/flite-1.4/src/utils/cst_file_stdio.c
lib/flite-1.4/src/utils/cst_string.c lib/flite-1.4/src/utils/cst_string.c
lib/flite-1.4/src/utils/cst_tokenstream.c lib/flite-1.4/src/utils/cst_tokenstream.c
lib/flite-1.4/src/utils/cst_val.c lib/flite-1.4/src/utils/cst_val.c
lib/flite-1.4/src/utils/cst_val_const.c lib/flite-1.4/src/utils/cst_val_const.c
lib/flite-1.4/src/utils/cst_val_user.c lib/flite-1.4/src/utils/cst_val_user.c
lib/flite-1.4/src/utils/cst_val_user.c lib/flite-1.4/src/utils/cst_val_user.c
) )
add_library(flite ${fliteFiles}) add_library(flite ${fliteFiles})
target_include_directories(flite SYSTEM PUBLIC target_include_directories(flite SYSTEM PUBLIC
"lib/flite-1.4/include" "lib/flite-1.4/include"
"lib/flite-1.4" "lib/flite-1.4"
) )
target_compile_options(flite PRIVATE ${disableWarningsFlags}) target_compile_options(flite PRIVATE ${disableWarningsFlags})
set_target_properties(flite PROPERTIES FOLDER lib) set_target_properties(flite PROPERTIES FOLDER lib)
# ... UTF8-CPP # ... UTF8-CPP
add_library(utfcpp add_library(utfcpp
lib/header-only.c lib/header-only.c
lib/utfcpp-2.3.5/source/utf8.h lib/utfcpp-2.3.5/source/utf8.h
) )
target_include_directories(utfcpp SYSTEM PUBLIC "lib/utfcpp-2.3.5/source") target_include_directories(utfcpp SYSTEM PUBLIC "lib/utfcpp-2.3.5/source")
target_compile_options(utfcpp PRIVATE ${disableWarningsFlags}) target_compile_options(utfcpp PRIVATE ${disableWarningsFlags})
@ -221,8 +221,8 @@ set_target_properties(utfcpp PROPERTIES FOLDER lib)
# ... utf8proc # ... utf8proc
add_library(utf8proc add_library(utf8proc
lib/utf8proc-2.2.0/utf8proc.c lib/utf8proc-2.2.0/utf8proc.c
lib/utf8proc-2.2.0/utf8proc.h lib/utf8proc-2.2.0/utf8proc.h
) )
target_include_directories(utf8proc SYSTEM PUBLIC "lib/utf8proc-2.2.0") target_include_directories(utf8proc SYSTEM PUBLIC "lib/utf8proc-2.2.0")
target_compile_options(utf8proc PRIVATE ${disableWarningsFlags}) target_compile_options(utf8proc PRIVATE ${disableWarningsFlags})
@ -231,9 +231,9 @@ set_target_properties(utf8proc PROPERTIES FOLDER lib)
# ... Ogg # ... Ogg
add_library(ogg add_library(ogg
lib/ogg-1.3.3/include/ogg/ogg.h lib/ogg-1.3.3/include/ogg/ogg.h
lib/ogg-1.3.3/src/bitwise.c lib/ogg-1.3.3/src/bitwise.c
lib/ogg-1.3.3/src/framing.c lib/ogg-1.3.3/src/framing.c
) )
target_include_directories(ogg SYSTEM PUBLIC "lib/ogg-1.3.3/include") target_include_directories(ogg SYSTEM PUBLIC "lib/ogg-1.3.3/include")
target_compile_options(ogg PRIVATE ${disableWarningsFlags}) target_compile_options(ogg PRIVATE ${disableWarningsFlags})
@ -241,30 +241,30 @@ set_target_properties(ogg PROPERTIES FOLDER lib)
# ... Vorbis # ... Vorbis
add_library(vorbis add_library(vorbis
lib/vorbis-1.3.6/include/vorbis/vorbisfile.h lib/vorbis-1.3.6/include/vorbis/vorbisfile.h
lib/vorbis-1.3.6/lib/bitrate.c lib/vorbis-1.3.6/lib/bitrate.c
lib/vorbis-1.3.6/lib/block.c lib/vorbis-1.3.6/lib/block.c
lib/vorbis-1.3.6/lib/codebook.c lib/vorbis-1.3.6/lib/codebook.c
lib/vorbis-1.3.6/lib/envelope.c lib/vorbis-1.3.6/lib/envelope.c
lib/vorbis-1.3.6/lib/floor0.c lib/vorbis-1.3.6/lib/floor0.c
lib/vorbis-1.3.6/lib/floor1.c lib/vorbis-1.3.6/lib/floor1.c
lib/vorbis-1.3.6/lib/info.c lib/vorbis-1.3.6/lib/info.c
lib/vorbis-1.3.6/lib/lpc.c lib/vorbis-1.3.6/lib/lpc.c
lib/vorbis-1.3.6/lib/lsp.c lib/vorbis-1.3.6/lib/lsp.c
lib/vorbis-1.3.6/lib/mapping0.c lib/vorbis-1.3.6/lib/mapping0.c
lib/vorbis-1.3.6/lib/mdct.c lib/vorbis-1.3.6/lib/mdct.c
lib/vorbis-1.3.6/lib/psy.c lib/vorbis-1.3.6/lib/psy.c
lib/vorbis-1.3.6/lib/registry.c lib/vorbis-1.3.6/lib/registry.c
lib/vorbis-1.3.6/lib/res0.c lib/vorbis-1.3.6/lib/res0.c
lib/vorbis-1.3.6/lib/sharedbook.c lib/vorbis-1.3.6/lib/sharedbook.c
lib/vorbis-1.3.6/lib/smallft.c lib/vorbis-1.3.6/lib/smallft.c
lib/vorbis-1.3.6/lib/synthesis.c lib/vorbis-1.3.6/lib/synthesis.c
lib/vorbis-1.3.6/lib/vorbisfile.c lib/vorbis-1.3.6/lib/vorbisfile.c
lib/vorbis-1.3.6/lib/window.c lib/vorbis-1.3.6/lib/window.c
) )
target_include_directories(vorbis SYSTEM PUBLIC "lib/vorbis-1.3.6/include") target_include_directories(vorbis SYSTEM PUBLIC "lib/vorbis-1.3.6/include")
target_link_libraries(vorbis target_link_libraries(vorbis
ogg ogg
) )
target_compile_options(vorbis PRIVATE ${disableWarningsFlags}) target_compile_options(vorbis PRIVATE ${disableWarningsFlags})
set_target_properties(vorbis PROPERTIES FOLDER lib) set_target_properties(vorbis PROPERTIES FOLDER lib)
@ -275,303 +275,303 @@ include_directories("src")
# ... rhubarb-animation # ... rhubarb-animation
add_library(rhubarb-animation add_library(rhubarb-animation
src/animation/animationRules.cpp src/animation/animationRules.cpp
src/animation/animationRules.h src/animation/animationRules.h
src/animation/mouthAnimation.cpp src/animation/mouthAnimation.cpp
src/animation/mouthAnimation.h src/animation/mouthAnimation.h
src/animation/pauseAnimation.cpp src/animation/pauseAnimation.cpp
src/animation/pauseAnimation.h src/animation/pauseAnimation.h
src/animation/roughAnimation.cpp src/animation/roughAnimation.cpp
src/animation/roughAnimation.h src/animation/roughAnimation.h
src/animation/ShapeRule.cpp src/animation/ShapeRule.cpp
src/animation/ShapeRule.h src/animation/ShapeRule.h
src/animation/shapeShorthands.h src/animation/shapeShorthands.h
src/animation/staticSegments.cpp src/animation/staticSegments.cpp
src/animation/staticSegments.h src/animation/staticSegments.h
src/animation/targetShapeSet.cpp src/animation/targetShapeSet.cpp
src/animation/targetShapeSet.h src/animation/targetShapeSet.h
src/animation/timingOptimization.cpp src/animation/timingOptimization.cpp
src/animation/timingOptimization.h src/animation/timingOptimization.h
src/animation/tweening.cpp src/animation/tweening.cpp
src/animation/tweening.h src/animation/tweening.h
) )
target_include_directories(rhubarb-animation PRIVATE "src/animation") target_include_directories(rhubarb-animation PRIVATE "src/animation")
target_link_libraries(rhubarb-animation target_link_libraries(rhubarb-animation
rhubarb-core rhubarb-core
rhubarb-logging rhubarb-logging
rhubarb-time rhubarb-time
) )
# ... rhubarb-audio # ... rhubarb-audio
add_library(rhubarb-audio add_library(rhubarb-audio
src/audio/AudioClip.cpp src/audio/AudioClip.cpp
src/audio/AudioClip.h src/audio/AudioClip.h
src/audio/audioFileReading.cpp src/audio/audioFileReading.cpp
src/audio/audioFileReading.h src/audio/audioFileReading.h
src/audio/AudioSegment.cpp src/audio/AudioSegment.cpp
src/audio/AudioSegment.h src/audio/AudioSegment.h
src/audio/DcOffset.cpp src/audio/DcOffset.cpp
src/audio/DcOffset.h src/audio/DcOffset.h
src/audio/ioTools.h src/audio/ioTools.h
src/audio/OggVorbisFileReader.cpp src/audio/OggVorbisFileReader.cpp
src/audio/OggVorbisFileReader.h src/audio/OggVorbisFileReader.h
src/audio/processing.cpp src/audio/processing.cpp
src/audio/processing.h src/audio/processing.h
src/audio/SampleRateConverter.cpp src/audio/SampleRateConverter.cpp
src/audio/SampleRateConverter.h src/audio/SampleRateConverter.h
src/audio/voiceActivityDetection.cpp src/audio/voiceActivityDetection.cpp
src/audio/voiceActivityDetection.h src/audio/voiceActivityDetection.h
src/audio/WaveFileReader.cpp src/audio/WaveFileReader.cpp
src/audio/WaveFileReader.h src/audio/WaveFileReader.h
src/audio/waveFileWriting.cpp src/audio/waveFileWriting.cpp
src/audio/waveFileWriting.h src/audio/waveFileWriting.h
) )
target_include_directories(rhubarb-audio PRIVATE "src/audio") target_include_directories(rhubarb-audio PRIVATE "src/audio")
target_link_libraries(rhubarb-audio target_link_libraries(rhubarb-audio
webRtc webRtc
vorbis vorbis
rhubarb-logging rhubarb-logging
rhubarb-time rhubarb-time
rhubarb-tools rhubarb-tools
) )
# ... rhubarb-core # ... rhubarb-core
configure_file(src/core/appInfo.cpp.in appInfo.cpp ESCAPE_QUOTES) configure_file(src/core/appInfo.cpp.in appInfo.cpp ESCAPE_QUOTES)
add_library(rhubarb-core add_library(rhubarb-core
${CMAKE_CURRENT_BINARY_DIR}/appInfo.cpp ${CMAKE_CURRENT_BINARY_DIR}/appInfo.cpp
src/core/appInfo.h src/core/appInfo.h
src/core/Phone.cpp src/core/Phone.cpp
src/core/Phone.h src/core/Phone.h
src/core/Shape.cpp src/core/Shape.cpp
src/core/Shape.h src/core/Shape.h
) )
target_include_directories(rhubarb-core PRIVATE "src/core") target_include_directories(rhubarb-core PRIVATE "src/core")
target_link_libraries(rhubarb-core target_link_libraries(rhubarb-core
rhubarb-tools rhubarb-tools
) )
# ... rhubarb-exporters # ... rhubarb-exporters
add_library(rhubarb-exporters add_library(rhubarb-exporters
src/exporters/DatExporter.cpp src/exporters/DatExporter.cpp
src/exporters/DatExporter.h src/exporters/DatExporter.h
src/exporters/Exporter.h src/exporters/Exporter.h
src/exporters/exporterTools.cpp src/exporters/exporterTools.cpp
src/exporters/exporterTools.h src/exporters/exporterTools.h
src/exporters/JsonExporter.cpp src/exporters/JsonExporter.cpp
src/exporters/JsonExporter.h src/exporters/JsonExporter.h
src/exporters/TsvExporter.cpp src/exporters/TsvExporter.cpp
src/exporters/TsvExporter.h src/exporters/TsvExporter.h
src/exporters/XmlExporter.cpp src/exporters/XmlExporter.cpp
src/exporters/XmlExporter.h src/exporters/XmlExporter.h
) )
target_include_directories(rhubarb-exporters PRIVATE "src/exporters") target_include_directories(rhubarb-exporters PRIVATE "src/exporters")
target_link_libraries(rhubarb-exporters target_link_libraries(rhubarb-exporters
rhubarb-animation rhubarb-animation
rhubarb-core rhubarb-core
rhubarb-time rhubarb-time
) )
# ... rhubarb-lib # ... rhubarb-lib
add_library(rhubarb-lib add_library(rhubarb-lib
src/lib/rhubarbLib.cpp src/lib/rhubarbLib.cpp
src/lib/rhubarbLib.h src/lib/rhubarbLib.h
) )
target_include_directories(rhubarb-lib PRIVATE "src/lib") target_include_directories(rhubarb-lib PRIVATE "src/lib")
target_link_libraries(rhubarb-lib target_link_libraries(rhubarb-lib
rhubarb-animation rhubarb-animation
rhubarb-audio rhubarb-audio
rhubarb-core rhubarb-core
rhubarb-recognition rhubarb-recognition
rhubarb-time rhubarb-time
rhubarb-tools rhubarb-tools
) )
# ... rhubarb-logging # ... rhubarb-logging
add_library(rhubarb-logging add_library(rhubarb-logging
src/logging/Entry.cpp src/logging/Entry.cpp
src/logging/Entry.h src/logging/Entry.h
src/logging/Formatter.h src/logging/Formatter.h
src/logging/formatters.cpp src/logging/formatters.cpp
src/logging/formatters.h src/logging/formatters.h
src/logging/Level.cpp src/logging/Level.cpp
src/logging/Level.h src/logging/Level.h
src/logging/logging.cpp src/logging/logging.cpp
src/logging/logging.h src/logging/logging.h
src/logging/Sink.h src/logging/Sink.h
src/logging/sinks.cpp src/logging/sinks.cpp
src/logging/sinks.h src/logging/sinks.h
) )
target_include_directories(rhubarb-logging PRIVATE "src/logging") target_include_directories(rhubarb-logging PRIVATE "src/logging")
target_link_libraries(rhubarb-logging target_link_libraries(rhubarb-logging
rhubarb-tools rhubarb-tools
) )
# ... rhubarb-recognition # ... rhubarb-recognition
add_library(rhubarb-recognition add_library(rhubarb-recognition
src/recognition/g2p.cpp src/recognition/g2p.cpp
src/recognition/g2p.h src/recognition/g2p.h
src/recognition/languageModels.cpp src/recognition/languageModels.cpp
src/recognition/languageModels.h src/recognition/languageModels.h
src/recognition/PhoneticRecognizer.cpp src/recognition/PhoneticRecognizer.cpp
src/recognition/PhoneticRecognizer.h src/recognition/PhoneticRecognizer.h
src/recognition/PocketSphinxRecognizer.cpp src/recognition/PocketSphinxRecognizer.cpp
src/recognition/PocketSphinxRecognizer.h src/recognition/PocketSphinxRecognizer.h
src/recognition/pocketSphinxTools.cpp src/recognition/pocketSphinxTools.cpp
src/recognition/pocketSphinxTools.h src/recognition/pocketSphinxTools.h
src/recognition/Recognizer.h src/recognition/Recognizer.h
src/recognition/tokenization.cpp src/recognition/tokenization.cpp
src/recognition/tokenization.h src/recognition/tokenization.h
) )
target_include_directories(rhubarb-recognition PRIVATE "src/recognition") target_include_directories(rhubarb-recognition PRIVATE "src/recognition")
target_link_libraries(rhubarb-recognition target_link_libraries(rhubarb-recognition
flite flite
pocketSphinx pocketSphinx
rhubarb-audio rhubarb-audio
rhubarb-core rhubarb-core
rhubarb-logging rhubarb-logging
) )
# ... rhubarb-time # ... rhubarb-time
add_library(rhubarb-time add_library(rhubarb-time
src/time/BoundedTimeline.h src/time/BoundedTimeline.h
src/time/centiseconds.cpp src/time/centiseconds.cpp
src/time/centiseconds.h src/time/centiseconds.h
src/time/ContinuousTimeline.h src/time/ContinuousTimeline.h
src/time/Timed.h src/time/Timed.h
src/time/timedLogging.h src/time/timedLogging.h
src/time/Timeline.h src/time/Timeline.h
src/time/TimeRange.cpp src/time/TimeRange.cpp
src/time/TimeRange.h src/time/TimeRange.h
) )
target_include_directories(rhubarb-time PRIVATE "src/time") target_include_directories(rhubarb-time PRIVATE "src/time")
target_link_libraries(rhubarb-time target_link_libraries(rhubarb-time
cppFormat cppFormat
rhubarb-logging rhubarb-logging
) )
# ... rhubarb-tools # ... rhubarb-tools
add_library(rhubarb-tools add_library(rhubarb-tools
src/tools/array.h src/tools/array.h
src/tools/EnumConverter.h src/tools/EnumConverter.h
src/tools/exceptions.cpp src/tools/exceptions.cpp
src/tools/exceptions.h src/tools/exceptions.h
src/tools/fileTools.cpp src/tools/fileTools.cpp
src/tools/fileTools.h src/tools/fileTools.h
src/tools/Lazy.h src/tools/Lazy.h
src/tools/nextCombination.h src/tools/nextCombination.h
src/tools/NiceCmdLineOutput.cpp src/tools/NiceCmdLineOutput.cpp
src/tools/NiceCmdLineOutput.h src/tools/NiceCmdLineOutput.h
src/tools/ObjectPool.h src/tools/ObjectPool.h
src/tools/pairs.h src/tools/pairs.h
src/tools/parallel.h src/tools/parallel.h
src/tools/platformTools.cpp src/tools/platformTools.cpp
src/tools/platformTools.h src/tools/platformTools.h
src/tools/progress.cpp src/tools/progress.cpp
src/tools/progress.h src/tools/progress.h
src/tools/ProgressBar.cpp src/tools/ProgressBar.cpp
src/tools/ProgressBar.h src/tools/ProgressBar.h
src/tools/stringTools.cpp src/tools/stringTools.cpp
src/tools/stringTools.h src/tools/stringTools.h
src/tools/TablePrinter.cpp src/tools/TablePrinter.cpp
src/tools/TablePrinter.h src/tools/TablePrinter.h
src/tools/textFiles.cpp src/tools/textFiles.cpp
src/tools/textFiles.h src/tools/textFiles.h
src/tools/tools.cpp src/tools/tools.cpp
src/tools/tools.h src/tools/tools.h
src/tools/tupleHash.h src/tools/tupleHash.h
) )
target_include_directories(rhubarb-tools PRIVATE "src/tools") target_include_directories(rhubarb-tools PRIVATE "src/tools")
target_link_libraries(rhubarb-tools target_link_libraries(rhubarb-tools
cppFormat cppFormat
whereami whereami
utfcpp utfcpp
utf8proc utf8proc
) )
# Define Rhubarb executable # Define Rhubarb executable
add_executable(rhubarb add_executable(rhubarb
src/rhubarb/main.cpp src/rhubarb/main.cpp
src/rhubarb/ExportFormat.cpp src/rhubarb/ExportFormat.cpp
src/rhubarb/ExportFormat.h src/rhubarb/ExportFormat.h
src/rhubarb/RecognizerType.cpp src/rhubarb/RecognizerType.cpp
src/rhubarb/RecognizerType.h src/rhubarb/RecognizerType.h
src/rhubarb/semanticEntries.cpp src/rhubarb/semanticEntries.cpp
src/rhubarb/semanticEntries.h src/rhubarb/semanticEntries.h
src/rhubarb/sinks.cpp src/rhubarb/sinks.cpp
src/rhubarb/sinks.h src/rhubarb/sinks.h
) )
target_include_directories(rhubarb PUBLIC "src/rhubarb") target_include_directories(rhubarb PUBLIC "src/rhubarb")
target_link_libraries(rhubarb target_link_libraries(rhubarb
rhubarb-exporters rhubarb-exporters
rhubarb-lib rhubarb-lib
) )
target_compile_options(rhubarb PUBLIC ${enableWarningsFlags}) target_compile_options(rhubarb PUBLIC ${enableWarningsFlags})
# Define test project # Define test project
#include_directories("${gtest_SOURCE_DIR}/include") #include_directories("${gtest_SOURCE_DIR}/include")
set(TEST_FILES set(TEST_FILES
tests/stringToolsTests.cpp tests/stringToolsTests.cpp
tests/TimelineTests.cpp tests/TimelineTests.cpp
tests/BoundedTimelineTests.cpp tests/BoundedTimelineTests.cpp
tests/ContinuousTimelineTests.cpp tests/ContinuousTimelineTests.cpp
tests/pairsTests.cpp tests/pairsTests.cpp
tests/tokenizationTests.cpp tests/tokenizationTests.cpp
tests/g2pTests.cpp tests/g2pTests.cpp
tests/LazyTests.cpp tests/LazyTests.cpp
tests/WaveFileReaderTests.cpp tests/WaveFileReaderTests.cpp
) )
add_executable(runTests ${TEST_FILES}) add_executable(runTests ${TEST_FILES})
target_link_libraries(runTests target_link_libraries(runTests
gtest gtest
gmock gmock
gmock_main gmock_main
rhubarb-recognition rhubarb-recognition
rhubarb-time rhubarb-time
rhubarb-audio rhubarb-audio
) )
# Copies the specified files in a post-build event, then installs them # Copies the specified files in a post-build event, then installs them
function(copy_and_install sourceGlob relativeTargetDirectory) function(copy_and_install sourceGlob relativeTargetDirectory)
# Set `sourcePaths` # Set `sourcePaths`
file(GLOB sourcePaths "${sourceGlob}") file(GLOB sourcePaths "${sourceGlob}")
foreach(sourcePath ${sourcePaths}) foreach(sourcePath ${sourcePaths})
if(NOT IS_DIRECTORY ${sourcePath}) if(NOT IS_DIRECTORY ${sourcePath})
# Set `fileName` # Set `fileName`
get_filename_component(fileName "${sourcePath}" NAME) get_filename_component(fileName "${sourcePath}" NAME)
# Copy file during build # Copy file during build
add_custom_command(TARGET rhubarb POST_BUILD add_custom_command(TARGET rhubarb POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}" COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'" COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
) )
# Install file # Install file
install( install(
FILES "${sourcePath}" FILES "${sourcePath}"
DESTINATION "${relativeTargetDirectory}" DESTINATION "${relativeTargetDirectory}"
) )
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
# Copies the specified files in a post-build event # Copies the specified files in a post-build event
function(copy sourceGlob relativeTargetDirectory) function(copy sourceGlob relativeTargetDirectory)
# Set `sourcePaths` # Set `sourcePaths`
file(GLOB sourcePaths "${sourceGlob}") file(GLOB sourcePaths "${sourceGlob}")
foreach(sourcePath ${sourcePaths}) foreach(sourcePath ${sourcePaths})
if(NOT IS_DIRECTORY ${sourcePath}) if(NOT IS_DIRECTORY ${sourcePath})
# Set `fileName` # Set `fileName`
get_filename_component(fileName "${sourcePath}" NAME) get_filename_component(fileName "${sourcePath}" NAME)
# Copy file during build # Copy file during build
add_custom_command(TARGET rhubarb POST_BUILD add_custom_command(TARGET rhubarb POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}" COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'" COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
) )
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
copy_and_install("lib/pocketsphinx-rev13216/model/en-us/*" "res/sphinx") copy_and_install("lib/pocketsphinx-rev13216/model/en-us/*" "res/sphinx")
@ -580,7 +580,7 @@ copy_and_install("lib/cmusphinx-en-us-5.2/*" "res/sphinx/acoustic-model")
copy_and_install("tests/resources/*" "tests/resources") copy_and_install("tests/resources/*" "tests/resources")
install( install(
TARGETS rhubarb TARGETS rhubarb
RUNTIME RUNTIME
DESTINATION . DESTINATION .
) )

View File

@ -8,79 +8,79 @@ using boost::adaptors::transformed;
template<typename T, bool AutoJoin> template<typename T, bool AutoJoin>
ContinuousTimeline<optional<T>, AutoJoin> boundedTimelinetoContinuousOptional( ContinuousTimeline<optional<T>, AutoJoin> boundedTimelinetoContinuousOptional(
const BoundedTimeline<T, AutoJoin>& timeline const BoundedTimeline<T, AutoJoin>& timeline
) { ) {
return { return {
timeline.getRange(), timeline.getRange(),
boost::none, boost::none,
timeline | transformed([](const Timed<T>& timedValue) { timeline | transformed([](const Timed<T>& timedValue) {
return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue()); return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue());
}) })
}; };
} }
ShapeRule::ShapeRule( ShapeRule::ShapeRule(
ShapeSet shapeSet, ShapeSet shapeSet,
optional<Phone> phone, optional<Phone> phone,
TimeRange phoneTiming TimeRange phoneTiming
) : ) :
shapeSet(std::move(shapeSet)), shapeSet(std::move(shapeSet)),
phone(std::move(phone)), phone(std::move(phone)),
phoneTiming(phoneTiming) phoneTiming(phoneTiming)
{} {}
ShapeRule ShapeRule::getInvalid() { ShapeRule ShapeRule::getInvalid() {
return { {}, boost::none, { 0_cs, 0_cs } }; return { {}, boost::none, { 0_cs, 0_cs } };
} }
bool ShapeRule::operator==(const ShapeRule& rhs) const { bool ShapeRule::operator==(const ShapeRule& rhs) const {
return shapeSet == rhs.shapeSet && phone == rhs.phone && phoneTiming == rhs.phoneTiming; return shapeSet == rhs.shapeSet && phone == rhs.phone && phoneTiming == rhs.phoneTiming;
} }
bool ShapeRule::operator!=(const ShapeRule& rhs) const { bool ShapeRule::operator!=(const ShapeRule& rhs) const {
return !operator==(rhs); return !operator==(rhs);
} }
bool ShapeRule::operator<(const ShapeRule& rhs) const { bool ShapeRule::operator<(const ShapeRule& rhs) const {
return shapeSet < rhs.shapeSet return shapeSet < rhs.shapeSet
|| phone < rhs.phone || phone < rhs.phone
|| phoneTiming.getStart() < rhs.phoneTiming.getStart() || phoneTiming.getStart() < rhs.phoneTiming.getStart()
|| phoneTiming.getEnd() < rhs.phoneTiming.getEnd(); || phoneTiming.getEnd() < rhs.phoneTiming.getEnd();
} }
ContinuousTimeline<ShapeRule> getShapeRules(const BoundedTimeline<Phone>& phones) { ContinuousTimeline<ShapeRule> getShapeRules(const BoundedTimeline<Phone>& phones) {
// Convert to continuous timeline so that silences aren't skipped when iterating // Convert to continuous timeline so that silences aren't skipped when iterating
auto continuousPhones = boundedTimelinetoContinuousOptional(phones); auto continuousPhones = boundedTimelinetoContinuousOptional(phones);
// Create timeline of shape rules // Create timeline of shape rules
ContinuousTimeline<ShapeRule> shapeRules( ContinuousTimeline<ShapeRule> shapeRules(
phones.getRange(), phones.getRange(),
{ { Shape::X }, boost::none, { 0_cs, 0_cs } } { { Shape::X }, boost::none, { 0_cs, 0_cs } }
); );
centiseconds previousDuration = 0_cs; centiseconds previousDuration = 0_cs;
for (const auto& timedPhone : continuousPhones) { for (const auto& timedPhone : continuousPhones) {
optional<Phone> phone = timedPhone.getValue(); optional<Phone> phone = timedPhone.getValue();
const centiseconds duration = timedPhone.getDuration(); const centiseconds duration = timedPhone.getDuration();
if (phone) { if (phone) {
// Animate one phone // Animate one phone
Timeline<ShapeSet> phoneShapeSets = getShapeSets(*phone, duration, previousDuration); Timeline<ShapeSet> phoneShapeSets = getShapeSets(*phone, duration, previousDuration);
// Result timing is relative to phone. Make absolute. // Result timing is relative to phone. Make absolute.
phoneShapeSets.shift(timedPhone.getStart()); phoneShapeSets.shift(timedPhone.getStart());
// Copy to timeline. // Copy to timeline.
// Later shape sets may overwrite earlier ones if overlapping. // Later shape sets may overwrite earlier ones if overlapping.
for (const auto& timedShapeSet : phoneShapeSets) { for (const auto& timedShapeSet : phoneShapeSets) {
shapeRules.set( shapeRules.set(
timedShapeSet.getTimeRange(), timedShapeSet.getTimeRange(),
ShapeRule(timedShapeSet.getValue(), phone, timedPhone.getTimeRange()) ShapeRule(timedShapeSet.getValue(), phone, timedPhone.getTimeRange())
); );
} }
} }
previousDuration = duration; previousDuration = duration;
} }
return shapeRules; return shapeRules;
} }

View File

@ -7,17 +7,17 @@
#include "time/TimeRange.h" #include "time/TimeRange.h"
struct ShapeRule { struct ShapeRule {
ShapeSet shapeSet; ShapeSet shapeSet;
boost::optional<Phone> phone; boost::optional<Phone> phone;
TimeRange phoneTiming; TimeRange phoneTiming;
ShapeRule(ShapeSet shapeSet, boost::optional<Phone> phone, TimeRange phoneTiming); ShapeRule(ShapeSet shapeSet, boost::optional<Phone> phone, TimeRange phoneTiming);
static ShapeRule getInvalid(); static ShapeRule getInvalid();
bool operator==(const ShapeRule&) const; bool operator==(const ShapeRule&) const;
bool operator!=(const ShapeRule&) const; bool operator!=(const ShapeRule&) const;
bool operator<(const ShapeRule&) const; bool operator<(const ShapeRule&) const;
}; };
// Returns shape rules for an entire timeline of phones. // Returns shape rules for an entire timeline of phones.

View File

@ -14,153 +14,153 @@ using std::map;
constexpr size_t shapeValueCount = static_cast<size_t>(Shape::EndSentinel); constexpr size_t shapeValueCount = static_cast<size_t>(Shape::EndSentinel);
Shape getBasicShape(Shape shape) { Shape getBasicShape(Shape shape) {
static constexpr array<Shape, shapeValueCount> basicShapes = static constexpr array<Shape, shapeValueCount> basicShapes =
make_array(A, B, C, D, E, F, A, C, A); make_array(A, B, C, D, E, F, A, C, A);
return basicShapes[static_cast<size_t>(shape)]; return basicShapes[static_cast<size_t>(shape)];
} }
Shape relax(Shape shape) { Shape relax(Shape shape) {
static constexpr array<Shape, shapeValueCount> relaxedShapes = static constexpr array<Shape, shapeValueCount> relaxedShapes =
make_array(A, B, B, C, C, B, X, B, X); make_array(A, B, B, C, C, B, X, B, X);
return relaxedShapes[static_cast<size_t>(shape)]; return relaxedShapes[static_cast<size_t>(shape)];
} }
Shape getClosestShape(Shape reference, ShapeSet shapes) { Shape getClosestShape(Shape reference, ShapeSet shapes) {
if (shapes.empty()) { if (shapes.empty()) {
throw std::invalid_argument("Cannot select from empty set of shapes."); throw std::invalid_argument("Cannot select from empty set of shapes.");
} }
// A matrix that for each shape contains all shapes in ascending order of effort required to // A matrix that for each shape contains all shapes in ascending order of effort required to
// move to them // move to them
constexpr static array<array<Shape, shapeValueCount>, shapeValueCount> effortMatrix = make_array( constexpr static array<array<Shape, shapeValueCount>, shapeValueCount> effortMatrix = make_array(
/* A */ make_array(A, X, G, B, C, H, E, D, F), /* A */ make_array(A, X, G, B, C, H, E, D, F),
/* B */ make_array(B, G, A, X, C, H, E, D, F), /* B */ make_array(B, G, A, X, C, H, E, D, F),
/* C */ make_array(C, H, B, G, D, A, X, E, F), /* C */ make_array(C, H, B, G, D, A, X, E, F),
/* D */ make_array(D, C, H, B, G, A, X, E, F), /* D */ make_array(D, C, H, B, G, A, X, E, F),
/* E */ make_array(E, C, H, B, G, A, X, D, F), /* E */ make_array(E, C, H, B, G, A, X, D, F),
/* F */ make_array(F, B, G, A, X, C, H, E, D), /* F */ make_array(F, B, G, A, X, C, H, E, D),
/* G */ make_array(G, A, B, C, H, X, E, D, F), /* G */ make_array(G, A, B, C, H, X, E, D, F),
/* H */ make_array(H, C, B, G, D, A, X, E, F), // Like C /* H */ make_array(H, C, B, G, D, A, X, E, F), // Like C
/* X */ make_array(X, A, G, B, C, H, E, D, F) // Like A /* X */ make_array(X, A, G, B, C, H, E, D, F) // Like A
); );
auto& closestShapes = effortMatrix.at(static_cast<size_t>(reference)); auto& closestShapes = effortMatrix.at(static_cast<size_t>(reference));
for (Shape closestShape : closestShapes) { for (Shape closestShape : closestShapes) {
if (shapes.find(closestShape) != shapes.end()) { if (shapes.find(closestShape) != shapes.end()) {
return closestShape; return closestShape;
} }
} }
throw std::invalid_argument("Unable to find closest shape."); throw std::invalid_argument("Unable to find closest shape.");
} }
optional<pair<Shape, TweenTiming>> getTween(Shape first, Shape second) { optional<pair<Shape, TweenTiming>> getTween(Shape first, Shape second) {
// Note that most of the following rules work in one direction only. // Note that most of the following rules work in one direction only.
// That's because in animation, the mouth should usually "pop" open without inbetweens, // That's because in animation, the mouth should usually "pop" open without inbetweens,
// then close slowly. // then close slowly.
static const map<pair<Shape, Shape>, pair<Shape, TweenTiming>> lookup { static const map<pair<Shape, Shape>, pair<Shape, TweenTiming>> lookup {
{ { D, A }, { C, TweenTiming::Early } }, { { D, A }, { C, TweenTiming::Early } },
{ { D, B }, { C, TweenTiming::Centered } }, { { D, B }, { C, TweenTiming::Centered } },
{ { D, G }, { C, TweenTiming::Early } }, { { D, G }, { C, TweenTiming::Early } },
{ { D, X }, { C, TweenTiming::Late } }, { { D, X }, { C, TweenTiming::Late } },
{ { C, F }, { E, TweenTiming::Centered } }, { { F, C }, { E, TweenTiming::Centered } }, { { C, F }, { E, TweenTiming::Centered } }, { { F, C }, { E, TweenTiming::Centered } },
{ { D, F }, { E, TweenTiming::Centered } }, { { D, F }, { E, TweenTiming::Centered } },
{ { H, F }, { E, TweenTiming::Late } }, { { F, H }, { E, TweenTiming::Early } } { { H, F }, { E, TweenTiming::Late } }, { { F, H }, { E, TweenTiming::Early } }
}; };
const auto it = lookup.find({ first, second }); const auto it = lookup.find({ first, second });
return it != lookup.end() ? it->second : optional<pair<Shape, TweenTiming>>(); return it != lookup.end() ? it->second : optional<pair<Shape, TweenTiming>>();
} }
Timeline<ShapeSet> getShapeSets(Phone phone, centiseconds duration, centiseconds previousDuration) { Timeline<ShapeSet> getShapeSets(Phone phone, centiseconds duration, centiseconds previousDuration) {
// Returns a timeline with a single shape set // Returns a timeline with a single shape set
const auto single = [duration](ShapeSet value) { const auto single = [duration](ShapeSet value) {
return Timeline<ShapeSet> { { 0_cs, duration, value } }; return Timeline<ShapeSet> { { 0_cs, duration, value } };
}; };
// Returns a timeline with two shape sets, timed as a diphthong // Returns a timeline with two shape sets, timed as a diphthong
const auto diphthong = [duration](ShapeSet first, ShapeSet second) { const auto diphthong = [duration](ShapeSet first, ShapeSet second) {
const centiseconds firstDuration = duration_cast<centiseconds>(duration * 0.6); const centiseconds firstDuration = duration_cast<centiseconds>(duration * 0.6);
return Timeline<ShapeSet> { return Timeline<ShapeSet> {
{ 0_cs, firstDuration, first }, { 0_cs, firstDuration, first },
{ firstDuration, duration, second } { firstDuration, duration, second }
}; };
}; };
// Returns a timeline with two shape sets, timed as a plosive // Returns a timeline with two shape sets, timed as a plosive
const auto plosive = [duration, previousDuration](ShapeSet first, ShapeSet second) { const auto plosive = [duration, previousDuration](ShapeSet first, ShapeSet second) {
const centiseconds minOcclusionDuration = 4_cs; const centiseconds minOcclusionDuration = 4_cs;
const centiseconds maxOcclusionDuration = 12_cs; const centiseconds maxOcclusionDuration = 12_cs;
const centiseconds occlusionDuration = const centiseconds occlusionDuration =
clamp(previousDuration / 2, minOcclusionDuration, maxOcclusionDuration); clamp(previousDuration / 2, minOcclusionDuration, maxOcclusionDuration);
return Timeline<ShapeSet> { return Timeline<ShapeSet> {
{ -occlusionDuration, 0_cs, first }, { -occlusionDuration, 0_cs, first },
{ 0_cs, duration, second } { 0_cs, duration, second }
}; };
}; };
// Returns the result of `getShapeSets` when called with identical arguments // Returns the result of `getShapeSets` when called with identical arguments
// except for a different phone. // except for a different phone.
const auto like = [duration, previousDuration](Phone referencePhone) { const auto like = [duration, previousDuration](Phone referencePhone) {
return getShapeSets(referencePhone, duration, previousDuration); return getShapeSets(referencePhone, duration, previousDuration);
}; };
static const ShapeSet any { A, B, C, D, E, F, G, H, X }; static const ShapeSet any { A, B, C, D, E, F, G, H, X };
static const ShapeSet anyOpen { B, C, D, E, F, G, H }; static const ShapeSet anyOpen { B, C, D, E, F, G, H };
// Note: // Note:
// The shapes {A, B, G, X} are very similar. You should avoid regular shape sets containing more // The shapes {A, B, G, X} are very similar. You should avoid regular shape sets containing more
// than one of these shapes. // than one of these shapes.
// Otherwise, the resulting shape may be more or less random and might not be a good fit. // Otherwise, the resulting shape may be more or less random and might not be a good fit.
// As an exception, a very flexible rule may contain *all* these shapes. // As an exception, a very flexible rule may contain *all* these shapes.
switch (phone) { switch (phone) {
case Phone::AO: return single({ E }); case Phone::AO: return single({ E });
case Phone::AA: return single({ D }); case Phone::AA: return single({ D });
case Phone::IY: return single({ B }); case Phone::IY: return single({ B });
case Phone::UW: return single({ F }); case Phone::UW: return single({ F });
case Phone::EH: return single({ C }); case Phone::EH: return single({ C });
case Phone::IH: return single({ B }); case Phone::IH: return single({ B });
case Phone::UH: return single({ F }); case Phone::UH: return single({ F });
case Phone::AH: return duration < 20_cs ? single({ C }) : single({ D }); case Phone::AH: return duration < 20_cs ? single({ C }) : single({ D });
case Phone::Schwa: return single({ B, C }); case Phone::Schwa: return single({ B, C });
case Phone::AE: return single({ C }); case Phone::AE: return single({ C });
case Phone::EY: return diphthong({ C }, { B }); case Phone::EY: return diphthong({ C }, { B });
case Phone::AY: return duration < 20_cs ? diphthong({ C }, { B }) : diphthong({ D }, { B }); case Phone::AY: return duration < 20_cs ? diphthong({ C }, { B }) : diphthong({ D }, { B });
case Phone::OW: return diphthong({ E }, { F }); case Phone::OW: return diphthong({ E }, { F });
case Phone::AW: return duration < 30_cs ? diphthong({ C }, { E }) : diphthong({ D }, { E }); case Phone::AW: return duration < 30_cs ? diphthong({ C }, { E }) : diphthong({ D }, { E });
case Phone::OY: return diphthong({ E }, { B }); case Phone::OY: return diphthong({ E }, { B });
case Phone::ER: return duration < 7_cs ? like(Phone::Schwa) : single({ E }); case Phone::ER: return duration < 7_cs ? like(Phone::Schwa) : single({ E });
case Phone::P: case Phone::P:
case Phone::B: return plosive({ A }, any); case Phone::B: return plosive({ A }, any);
case Phone::T: case Phone::T:
case Phone::D: return plosive({ B, F }, anyOpen); case Phone::D: return plosive({ B, F }, anyOpen);
case Phone::K: case Phone::K:
case Phone::G: return plosive({ B, C, E, F, H }, anyOpen); case Phone::G: return plosive({ B, C, E, F, H }, anyOpen);
case Phone::CH: case Phone::CH:
case Phone::JH: return single({ B, F }); case Phone::JH: return single({ B, F });
case Phone::F: case Phone::F:
case Phone::V: return single({ G }); case Phone::V: return single({ G });
case Phone::TH: case Phone::TH:
case Phone::DH: case Phone::DH:
case Phone::S: case Phone::S:
case Phone::Z: case Phone::Z:
case Phone::SH: case Phone::SH:
case Phone::ZH: return single({ B, F }); case Phone::ZH: return single({ B, F });
case Phone::HH: return single(any); // think "m-hm" case Phone::HH: return single(any); // think "m-hm"
case Phone::M: return single({ A }); case Phone::M: return single({ A });
case Phone::N: return single({ B, C, F, H }); case Phone::N: return single({ B, C, F, H });
case Phone::NG: return single({ B, C, E, F }); case Phone::NG: return single({ B, C, E, F });
case Phone::L: return duration < 20_cs ? single({ B, E, F, H }) : single({ H }); case Phone::L: return duration < 20_cs ? single({ B, E, F, H }) : single({ H });
case Phone::R: return single({ B, E, F }); case Phone::R: return single({ B, E, F });
case Phone::Y: return single({ B, C, F }); case Phone::Y: return single({ B, C, F });
case Phone::W: return single({ F }); case Phone::W: return single({ F });
case Phone::Breath: case Phone::Breath:
case Phone::Cough: case Phone::Cough:
case Phone::Smack: return single({ C }); case Phone::Smack: return single({ C });
case Phone::Noise: return single({ B }); case Phone::Noise: return single({ B });
default: throw std::invalid_argument("Unexpected phone."); default: throw std::invalid_argument("Unexpected phone.");
} }
} }

View File

@ -16,14 +16,14 @@ Shape getClosestShape(Shape reference, ShapeSet shapes);
// Indicates how to time a tween between two mouth shapes // Indicates how to time a tween between two mouth shapes
enum class TweenTiming { enum class TweenTiming {
// Tween should end at the original transition // Tween should end at the original transition
Early, Early,
// Tween should overlap both original mouth shapes equally // Tween should overlap both original mouth shapes equally
Centered, Centered,
// Tween should begin at the original transition // Tween should begin at the original transition
Late Late
}; };
// Returns the tween shape and timing to use to transition between the specified two mouth shapes. // Returns the tween shape and timing to use to transition between the specified two mouth shapes.

View File

@ -9,33 +9,33 @@
#include "staticSegments.h" #include "staticSegments.h"
JoiningContinuousTimeline<Shape> animate( JoiningContinuousTimeline<Shape> animate(
const BoundedTimeline<Phone>& phones, const BoundedTimeline<Phone>& phones,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
) { ) {
// Create timeline of shape rules // Create timeline of shape rules
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones); ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
// Modify shape rules to only contain allowed shapes -- plus X, which is needed for pauses and // Modify shape rules to only contain allowed shapes -- plus X, which is needed for pauses and
// will be replaced later // will be replaced later
ShapeSet targetShapeSetPlusX = targetShapeSet; ShapeSet targetShapeSetPlusX = targetShapeSet;
targetShapeSetPlusX.insert(Shape::X); targetShapeSetPlusX.insert(Shape::X);
shapeRules = convertToTargetShapeSet(shapeRules, targetShapeSetPlusX); shapeRules = convertToTargetShapeSet(shapeRules, targetShapeSetPlusX);
// Animate in multiple steps // Animate in multiple steps
const auto performMainAnimationSteps = [&targetShapeSet](const auto& shapeRules) { const auto performMainAnimationSteps = [&targetShapeSet](const auto& shapeRules) {
JoiningContinuousTimeline<Shape> animation = animateRough(shapeRules); JoiningContinuousTimeline<Shape> animation = animateRough(shapeRules);
animation = optimizeTiming(animation); animation = optimizeTiming(animation);
animation = animatePauses(animation); animation = animatePauses(animation);
animation = insertTweens(animation); animation = insertTweens(animation);
animation = convertToTargetShapeSet(animation, targetShapeSet); animation = convertToTargetShapeSet(animation, targetShapeSet);
return animation; return animation;
}; };
const JoiningContinuousTimeline<Shape> result = const JoiningContinuousTimeline<Shape> result =
avoidStaticSegments(shapeRules, performMainAnimationSteps); avoidStaticSegments(shapeRules, performMainAnimationSteps);
for (const auto& timedShape : result) { for (const auto& timedShape : result) {
logTimedEvent("shape", timedShape); logTimedEvent("shape", timedShape);
} }
return result; return result;
} }

View File

@ -6,6 +6,6 @@
#include "targetShapeSet.h" #include "targetShapeSet.h"
JoiningContinuousTimeline<Shape> animate( JoiningContinuousTimeline<Shape> animate(
const BoundedTimeline<Phone>& phones, const BoundedTimeline<Phone>& phones,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
); );

View File

@ -2,47 +2,47 @@
#include "animationRules.h" #include "animationRules.h"
Shape getPauseShape(Shape previous, Shape next, centiseconds duration) { Shape getPauseShape(Shape previous, Shape next, centiseconds duration) {
// For very short pauses: Just hold the previous shape // For very short pauses: Just hold the previous shape
if (duration < 12_cs) { if (duration < 12_cs) {
return previous; return previous;
} }
// For short pauses: Relax the mouth // For short pauses: Relax the mouth
if (duration <= 35_cs) { if (duration <= 35_cs) {
// It looks odd if the pause shape is identical to the next shape. // It looks odd if the pause shape is identical to the next shape.
// Make sure we find a relaxed shape that's different from the next one. // Make sure we find a relaxed shape that's different from the next one.
for (Shape currentRelaxedShape = previous;;) { for (Shape currentRelaxedShape = previous;;) {
const Shape nextRelaxedShape = relax(currentRelaxedShape); const Shape nextRelaxedShape = relax(currentRelaxedShape);
if (nextRelaxedShape != next) { if (nextRelaxedShape != next) {
return nextRelaxedShape; return nextRelaxedShape;
} }
if (nextRelaxedShape == currentRelaxedShape) { if (nextRelaxedShape == currentRelaxedShape) {
// We're going in circles // We're going in circles
break; break;
} }
currentRelaxedShape = nextRelaxedShape; currentRelaxedShape = nextRelaxedShape;
} }
} }
// For longer pauses: Close the mouth // For longer pauses: Close the mouth
return Shape::X; return Shape::X;
} }
JoiningContinuousTimeline<Shape> animatePauses(const JoiningContinuousTimeline<Shape>& animation) { JoiningContinuousTimeline<Shape> animatePauses(const JoiningContinuousTimeline<Shape>& animation) {
JoiningContinuousTimeline<Shape> result(animation); JoiningContinuousTimeline<Shape> result(animation);
for_each_adjacent( for_each_adjacent(
animation.begin(), animation.begin(),
animation.end(), animation.end(),
[&](const Timed<Shape>& previous, const Timed<Shape>& pause, const Timed<Shape>& next) { [&](const Timed<Shape>& previous, const Timed<Shape>& pause, const Timed<Shape>& next) {
if (pause.getValue() != Shape::X) return; if (pause.getValue() != Shape::X) return;
result.set( result.set(
pause.getTimeRange(), pause.getTimeRange(),
getPauseShape(previous.getValue(), next.getValue(), pause.getDuration()) getPauseShape(previous.getValue(), next.getValue(), pause.getDuration())
); );
} }
); );
return result; return result;
} }

View File

@ -13,48 +13,48 @@
// So whenever we come across a one-shape vowel, we backtrack a little, spreading that shape to // So whenever we come across a one-shape vowel, we backtrack a little, spreading that shape to
// the left. // the left.
JoiningContinuousTimeline<Shape> animateRough(const ContinuousTimeline<ShapeRule>& shapeRules) { JoiningContinuousTimeline<Shape> animateRough(const ContinuousTimeline<ShapeRule>& shapeRules) {
JoiningContinuousTimeline<Shape> animation(shapeRules.getRange(), Shape::X); JoiningContinuousTimeline<Shape> animation(shapeRules.getRange(), Shape::X);
Shape referenceShape = Shape::X; Shape referenceShape = Shape::X;
// Animate forwards // Animate forwards
centiseconds lastAnticipatedShapeStart = -1_cs; centiseconds lastAnticipatedShapeStart = -1_cs;
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) { for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
const ShapeRule shapeRule = it->getValue(); const ShapeRule shapeRule = it->getValue();
const Shape shape = getClosestShape(referenceShape, shapeRule.shapeSet); const Shape shape = getClosestShape(referenceShape, shapeRule.shapeSet);
animation.set(it->getTimeRange(), shape); animation.set(it->getTimeRange(), shape);
const bool anticipateShape = shapeRule.phone const bool anticipateShape = shapeRule.phone
&& isVowel(*shapeRule.phone) && isVowel(*shapeRule.phone)
&& shapeRule.shapeSet.size() == 1; && shapeRule.shapeSet.size() == 1;
if (anticipateShape) { if (anticipateShape) {
// Animate backwards a little // Animate backwards a little
const Shape anticipatedShape = shape; const Shape anticipatedShape = shape;
const centiseconds anticipatedShapeStart = it->getStart(); const centiseconds anticipatedShapeStart = it->getStart();
referenceShape = anticipatedShape; referenceShape = anticipatedShape;
for (auto reverseIt = it; reverseIt != shapeRules.begin();) { for (auto reverseIt = it; reverseIt != shapeRules.begin();) {
--reverseIt; --reverseIt;
// Make sure we haven't animated too far back // Make sure we haven't animated too far back
centiseconds anticipatingShapeStart = reverseIt->getStart(); centiseconds anticipatingShapeStart = reverseIt->getStart();
if (anticipatingShapeStart == lastAnticipatedShapeStart) break; if (anticipatingShapeStart == lastAnticipatedShapeStart) break;
const centiseconds maxAnticipationDuration = 20_cs; const centiseconds maxAnticipationDuration = 20_cs;
const centiseconds anticipationDuration = const centiseconds anticipationDuration =
anticipatedShapeStart - anticipatingShapeStart; anticipatedShapeStart - anticipatingShapeStart;
if (anticipationDuration > maxAnticipationDuration) break; if (anticipationDuration > maxAnticipationDuration) break;
// Overwrite forward-animated shape with backwards-animated, anticipating shape // Overwrite forward-animated shape with backwards-animated, anticipating shape
const Shape anticipatingShape = const Shape anticipatingShape =
getClosestShape(referenceShape, reverseIt->getValue().shapeSet); getClosestShape(referenceShape, reverseIt->getValue().shapeSet);
animation.set(reverseIt->getTimeRange(), anticipatingShape); animation.set(reverseIt->getTimeRange(), anticipatingShape);
// Make sure the new, backwards-animated shape still resembles the anticipated shape // Make sure the new, backwards-animated shape still resembles the anticipated shape
if (getBasicShape(anticipatingShape) != getBasicShape(anticipatedShape)) break; if (getBasicShape(anticipatingShape) != getBasicShape(anticipatedShape)) break;
referenceShape = anticipatingShape; referenceShape = anticipatingShape;
} }
lastAnticipatedShapeStart = anticipatedShapeStart; lastAnticipatedShapeStart = anticipatedShapeStart;
} }
referenceShape = anticipateShape ? shape : relax(shape); referenceShape = anticipateShape ? shape : relax(shape);
} }
return animation; return animation;
} }

View File

@ -6,71 +6,71 @@
using std::vector; using std::vector;
int getSyllableCount(const ContinuousTimeline<ShapeRule>& shapeRules, TimeRange timeRange) { int getSyllableCount(const ContinuousTimeline<ShapeRule>& shapeRules, TimeRange timeRange) {
if (timeRange.empty()) return 0; if (timeRange.empty()) return 0;
const auto begin = shapeRules.find(timeRange.getStart()); const auto begin = shapeRules.find(timeRange.getStart());
const auto end = std::next(shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft)); const auto end = std::next(shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft));
// Treat every vowel as one syllable // Treat every vowel as one syllable
int syllableCount = 0; int syllableCount = 0;
for (auto it = begin; it != end; ++it) { for (auto it = begin; it != end; ++it) {
const ShapeRule shapeRule = it->getValue(); const ShapeRule shapeRule = it->getValue();
// Disregard phones that are mostly outside the specified time range. // Disregard phones that are mostly outside the specified time range.
const centiseconds phoneMiddle = shapeRule.phoneTiming.getMiddle(); const centiseconds phoneMiddle = shapeRule.phoneTiming.getMiddle();
if (phoneMiddle < timeRange.getStart() || phoneMiddle >= timeRange.getEnd()) continue; if (phoneMiddle < timeRange.getStart() || phoneMiddle >= timeRange.getEnd()) continue;
auto phone = shapeRule.phone; auto phone = shapeRule.phone;
if (phone && isVowel(*phone)) { if (phone && isVowel(*phone)) {
++syllableCount; ++syllableCount;
} }
} }
return syllableCount; return syllableCount;
} }
// A static segment is a prolonged period during which the mouth shape doesn't change // A static segment is a prolonged period during which the mouth shape doesn't change
vector<TimeRange> getStaticSegments( vector<TimeRange> getStaticSegments(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const JoiningContinuousTimeline<Shape>& animation const JoiningContinuousTimeline<Shape>& animation
) { ) {
// A static segment must contain a certain number of syllables to look distractingly static // A static segment must contain a certain number of syllables to look distractingly static
const int minSyllableCount = 3; const int minSyllableCount = 3;
// It must also have a minimum duration. The same number of syllables in fast speech usually // It must also have a minimum duration. The same number of syllables in fast speech usually
// looks good. // looks good.
const centiseconds minDuration = 75_cs; const centiseconds minDuration = 75_cs;
vector<TimeRange> result; vector<TimeRange> result;
for (const auto& timedShape : animation) { for (const auto& timedShape : animation) {
const TimeRange timeRange = timedShape.getTimeRange(); const TimeRange timeRange = timedShape.getTimeRange();
const bool isStatic = timeRange.getDuration() >= minDuration const bool isStatic = timeRange.getDuration() >= minDuration
&& getSyllableCount(shapeRules, timeRange) >= minSyllableCount; && getSyllableCount(shapeRules, timeRange) >= minSyllableCount;
if (isStatic) { if (isStatic) {
result.push_back(timeRange); result.push_back(timeRange);
} }
} }
return result; return result;
} }
// Indicates whether this shape rule can potentially be replaced by a modified version that breaks // Indicates whether this shape rule can potentially be replaced by a modified version that breaks
// up long static segments // up long static segments
bool canChange(const ShapeRule& rule) { bool canChange(const ShapeRule& rule) {
return rule.phone && isVowel(*rule.phone) && rule.shapeSet.size() == 1; return rule.phone && isVowel(*rule.phone) && rule.shapeSet.size() == 1;
} }
// Returns a new shape rule that is identical to the specified one, except that it leads to a // Returns a new shape rule that is identical to the specified one, except that it leads to a
// slightly different visualization // slightly different visualization
ShapeRule getChangedShapeRule(const ShapeRule& rule) { ShapeRule getChangedShapeRule(const ShapeRule& rule) {
assert(canChange(rule)); assert(canChange(rule));
ShapeRule result(rule); ShapeRule result(rule);
// So far, I've only encountered B as a static shape. // So far, I've only encountered B as a static shape.
// If there is ever a problem with another static shape, this function can easily be extended. // If there is ever a problem with another static shape, this function can easily be extended.
if (rule.shapeSet == ShapeSet { Shape::B }) { if (rule.shapeSet == ShapeSet { Shape::B }) {
result.shapeSet = { Shape::C }; result.shapeSet = { Shape::C };
} }
return result; return result;
} }
// Contains the start times of all rules to be changed // Contains the start times of all rules to be changed
@ -78,162 +78,162 @@ using RuleChanges = vector<centiseconds>;
// Replaces the indicated shape rules with slightly different ones, breaking up long static segments // Replaces the indicated shape rules with slightly different ones, breaking up long static segments
ContinuousTimeline<ShapeRule> applyChanges( ContinuousTimeline<ShapeRule> applyChanges(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const RuleChanges& changes const RuleChanges& changes
) { ) {
ContinuousTimeline<ShapeRule> result(shapeRules); ContinuousTimeline<ShapeRule> result(shapeRules);
for (centiseconds changedRuleStart : changes) { for (centiseconds changedRuleStart : changes) {
const Timed<ShapeRule> timedOriginalRule = *shapeRules.get(changedRuleStart); const Timed<ShapeRule> timedOriginalRule = *shapeRules.get(changedRuleStart);
const ShapeRule changedRule = getChangedShapeRule(timedOriginalRule.getValue()); const ShapeRule changedRule = getChangedShapeRule(timedOriginalRule.getValue());
result.set(timedOriginalRule.getTimeRange(), changedRule); result.set(timedOriginalRule.getTimeRange(), changedRule);
} }
return result; return result;
} }
class RuleChangeScenario { class RuleChangeScenario {
public: public:
RuleChangeScenario( RuleChangeScenario(
const ContinuousTimeline<ShapeRule>& originalRules, const ContinuousTimeline<ShapeRule>& originalRules,
const RuleChanges& changes, const RuleChanges& changes,
const AnimationFunction& animate const AnimationFunction& animate
) : ) :
changedRules(applyChanges(originalRules, changes)), changedRules(applyChanges(originalRules, changes)),
animation(animate(changedRules)), animation(animate(changedRules)),
staticSegments(getStaticSegments(changedRules, animation)) staticSegments(getStaticSegments(changedRules, animation))
{} {}
bool isBetterThan(const RuleChangeScenario& rhs) const { bool isBetterThan(const RuleChangeScenario& rhs) const {
// We want zero static segments // We want zero static segments
if (staticSegments.empty() && !rhs.staticSegments.empty()) return true; if (staticSegments.empty() && !rhs.staticSegments.empty()) return true;
// Short shapes are better than long ones. Minimize sum-of-squares. // Short shapes are better than long ones. Minimize sum-of-squares.
if (getSumOfShapeDurationSquares() < rhs.getSumOfShapeDurationSquares()) return true; if (getSumOfShapeDurationSquares() < rhs.getSumOfShapeDurationSquares()) return true;
return false; return false;
} }
int getStaticSegmentCount() const { int getStaticSegmentCount() const {
return static_cast<int>(staticSegments.size()); return static_cast<int>(staticSegments.size());
} }
ContinuousTimeline<ShapeRule> getChangedRules() const { ContinuousTimeline<ShapeRule> getChangedRules() const {
return changedRules; return changedRules;
} }
private: private:
ContinuousTimeline<ShapeRule> changedRules; ContinuousTimeline<ShapeRule> changedRules;
JoiningContinuousTimeline<Shape> animation; JoiningContinuousTimeline<Shape> animation;
vector<TimeRange> staticSegments; vector<TimeRange> staticSegments;
double getSumOfShapeDurationSquares() const { double getSumOfShapeDurationSquares() const {
return std::accumulate( return std::accumulate(
animation.begin(), animation.begin(),
animation.end(), animation.end(),
0.0, 0.0,
[](const double sum, const Timed<Shape>& timedShape) { [](const double sum, const Timed<Shape>& timedShape) {
const double duration = std::chrono::duration_cast<std::chrono::duration<double>>( const double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
timedShape.getDuration() timedShape.getDuration()
).count(); ).count();
return sum + duration * duration; return sum + duration * duration;
} }
); );
} }
}; };
RuleChanges getPossibleRuleChanges(const ContinuousTimeline<ShapeRule>& shapeRules) { RuleChanges getPossibleRuleChanges(const ContinuousTimeline<ShapeRule>& shapeRules) {
RuleChanges result; RuleChanges result;
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) { for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
const ShapeRule rule = it->getValue(); const ShapeRule rule = it->getValue();
if (canChange(rule)) { if (canChange(rule)) {
result.push_back(it->getStart()); result.push_back(it->getStart());
} }
} }
return result; return result;
} }
ContinuousTimeline<ShapeRule> fixStaticSegmentRules( ContinuousTimeline<ShapeRule> fixStaticSegmentRules(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate const AnimationFunction& animate
) { ) {
// The complexity of this function is exponential with the number of replacements. // The complexity of this function is exponential with the number of replacements.
// So let's cap that value. // So let's cap that value.
const int maxReplacementCount = 3; const int maxReplacementCount = 3;
// All potential changes // All potential changes
const RuleChanges possibleRuleChanges = getPossibleRuleChanges(shapeRules); const RuleChanges possibleRuleChanges = getPossibleRuleChanges(shapeRules);
// Find best solution. Start with a single replacement, then increase as necessary. // Find best solution. Start with a single replacement, then increase as necessary.
RuleChangeScenario bestScenario(shapeRules, {}, animate); RuleChangeScenario bestScenario(shapeRules, {}, animate);
for ( for (
int replacementCount = 1; int replacementCount = 1;
bestScenario.getStaticSegmentCount() > 0 && replacementCount <= std::min(static_cast<int>(possibleRuleChanges.size()), maxReplacementCount); bestScenario.getStaticSegmentCount() > 0 && replacementCount <= std::min(static_cast<int>(possibleRuleChanges.size()), maxReplacementCount);
++replacementCount ++replacementCount
) { ) {
// Only the first <replacementCount> elements of `currentRuleChanges` count // Only the first <replacementCount> elements of `currentRuleChanges` count
auto currentRuleChanges(possibleRuleChanges); auto currentRuleChanges(possibleRuleChanges);
do { do {
RuleChangeScenario currentScenario( RuleChangeScenario currentScenario(
shapeRules, shapeRules,
{ currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount }, { currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount },
animate animate
); );
if (currentScenario.isBetterThan(bestScenario)) { if (currentScenario.isBetterThan(bestScenario)) {
bestScenario = currentScenario; bestScenario = currentScenario;
} }
} while (next_combination(currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount, currentRuleChanges.end())); } while (next_combination(currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount, currentRuleChanges.end()));
} }
return bestScenario.getChangedRules(); return bestScenario.getChangedRules();
} }
// Indicates whether the specified shape rule may result in different shapes depending on context // Indicates whether the specified shape rule may result in different shapes depending on context
bool isFlexible(const ShapeRule& rule) { bool isFlexible(const ShapeRule& rule) {
return rule.shapeSet.size() > 1; return rule.shapeSet.size() > 1;
} }
// Extends the specified time range until it starts and ends with a non-flexible shape rule, if // Extends the specified time range until it starts and ends with a non-flexible shape rule, if
// possible // possible
TimeRange extendToFixedRules( TimeRange extendToFixedRules(
const TimeRange& timeRange, const TimeRange& timeRange,
const ContinuousTimeline<ShapeRule>& shapeRules const ContinuousTimeline<ShapeRule>& shapeRules
) { ) {
auto first = shapeRules.find(timeRange.getStart()); auto first = shapeRules.find(timeRange.getStart());
while (first != shapeRules.begin() && isFlexible(first->getValue())) { while (first != shapeRules.begin() && isFlexible(first->getValue())) {
--first; --first;
} }
auto last = shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft); auto last = shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft);
while (std::next(last) != shapeRules.end() && isFlexible(last->getValue())) { while (std::next(last) != shapeRules.end() && isFlexible(last->getValue())) {
++last; ++last;
} }
return { first->getStart(), last->getEnd() }; return { first->getStart(), last->getEnd() };
} }
JoiningContinuousTimeline<Shape> avoidStaticSegments( JoiningContinuousTimeline<Shape> avoidStaticSegments(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate const AnimationFunction& animate
) { ) {
const auto animation = animate(shapeRules); const auto animation = animate(shapeRules);
const vector<TimeRange> staticSegments = getStaticSegments(shapeRules, animation); const vector<TimeRange> staticSegments = getStaticSegments(shapeRules, animation);
if (staticSegments.empty()) { if (staticSegments.empty()) {
return animation; return animation;
} }
// Modify shape rules to eliminate static segments // Modify shape rules to eliminate static segments
ContinuousTimeline<ShapeRule> fixedShapeRules(shapeRules); ContinuousTimeline<ShapeRule> fixedShapeRules(shapeRules);
for (const TimeRange& staticSegment : staticSegments) { for (const TimeRange& staticSegment : staticSegments) {
// Extend time range to the left and right so we don't lose adjacent rules that might // Extend time range to the left and right so we don't lose adjacent rules that might
// influence the animation // influence the animation
const TimeRange extendedStaticSegment = extendToFixedRules(staticSegment, shapeRules); const TimeRange extendedStaticSegment = extendToFixedRules(staticSegment, shapeRules);
// Fix shape rules within the static segment // Fix shape rules within the static segment
const auto fixedSegmentShapeRules = fixStaticSegmentRules( const auto fixedSegmentShapeRules = fixStaticSegmentRules(
{ extendedStaticSegment, ShapeRule::getInvalid(), fixedShapeRules }, { extendedStaticSegment, ShapeRule::getInvalid(), fixedShapeRules },
animate animate
); );
for (const auto& timedShapeRule : fixedSegmentShapeRules) { for (const auto& timedShapeRule : fixedSegmentShapeRules) {
fixedShapeRules.set(timedShapeRule); fixedShapeRules.set(timedShapeRule);
} }
} }
return animate(fixedShapeRules); return animate(fixedShapeRules);
} }

View File

@ -13,6 +13,6 @@ using AnimationFunction = std::function<JoiningContinuousTimeline<Shape>(const C
// Static segments happen rather often. // Static segments happen rather often.
// See http://animateducated.blogspot.de/2016/10/lip-sync-animation-2.html?showComment=1478861729702#c2940729096183546458. // See http://animateducated.blogspot.de/2016/10/lip-sync-animation-2.html?showComment=1478861729702#c2940729096183546458.
JoiningContinuousTimeline<Shape> avoidStaticSegments( JoiningContinuousTimeline<Shape> avoidStaticSegments(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate const AnimationFunction& animate
); );

View File

@ -1,48 +1,48 @@
#include "targetShapeSet.h" #include "targetShapeSet.h"
Shape convertToTargetShapeSet(Shape shape, const ShapeSet& targetShapeSet) { Shape convertToTargetShapeSet(Shape shape, const ShapeSet& targetShapeSet) {
if (targetShapeSet.find(shape) != targetShapeSet.end()) { if (targetShapeSet.find(shape) != targetShapeSet.end()) {
return shape; return shape;
} }
const Shape basicShape = getBasicShape(shape); const Shape basicShape = getBasicShape(shape);
if (targetShapeSet.find(basicShape) == targetShapeSet.end()) { if (targetShapeSet.find(basicShape) == targetShapeSet.end()) {
throw std::invalid_argument( throw std::invalid_argument(
fmt::format("Target shape set must contain basic shape {}.", basicShape)); fmt::format("Target shape set must contain basic shape {}.", basicShape));
} }
return basicShape; return basicShape;
} }
ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetShapeSet) { ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetShapeSet) {
ShapeSet result; ShapeSet result;
for (Shape shape : shapes) { for (Shape shape : shapes) {
result.insert(convertToTargetShapeSet(shape, targetShapeSet)); result.insert(convertToTargetShapeSet(shape, targetShapeSet));
} }
return result; return result;
} }
ContinuousTimeline<ShapeRule> convertToTargetShapeSet( ContinuousTimeline<ShapeRule> convertToTargetShapeSet(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
) { ) {
ContinuousTimeline<ShapeRule> result(shapeRules); ContinuousTimeline<ShapeRule> result(shapeRules);
for (const auto& timedShapeRule : shapeRules) { for (const auto& timedShapeRule : shapeRules) {
ShapeRule rule = timedShapeRule.getValue(); ShapeRule rule = timedShapeRule.getValue();
rule.shapeSet = convertToTargetShapeSet(rule.shapeSet, targetShapeSet); rule.shapeSet = convertToTargetShapeSet(rule.shapeSet, targetShapeSet);
result.set(timedShapeRule.getTimeRange(), rule); result.set(timedShapeRule.getTimeRange(), rule);
} }
return result; return result;
} }
JoiningContinuousTimeline<Shape> convertToTargetShapeSet( JoiningContinuousTimeline<Shape> convertToTargetShapeSet(
const JoiningContinuousTimeline<Shape>& animation, const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
) { ) {
JoiningContinuousTimeline<Shape> result(animation); JoiningContinuousTimeline<Shape> result(animation);
for (const auto& timedShape : animation) { for (const auto& timedShape : animation) {
result.set( result.set(
timedShape.getTimeRange(), timedShape.getTimeRange(),
convertToTargetShapeSet(timedShape.getValue(), targetShapeSet) convertToTargetShapeSet(timedShape.getValue(), targetShapeSet)
); );
} }
return result; return result;
} }

View File

@ -12,13 +12,13 @@ ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetS
// Replaces each shape in each rule with the closest shape that occurs in the target shape set. // Replaces each shape in each rule with the closest shape that occurs in the target shape set.
ContinuousTimeline<ShapeRule> convertToTargetShapeSet( ContinuousTimeline<ShapeRule> convertToTargetShapeSet(
const ContinuousTimeline<ShapeRule>& shapeRules, const ContinuousTimeline<ShapeRule>& shapeRules,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
); );
// Replaces each shape in the specified animation with the closest shape that occurs in the target // Replaces each shape in the specified animation with the closest shape that occurs in the target
// shape set. // shape set.
JoiningContinuousTimeline<Shape> convertToTargetShapeSet( JoiningContinuousTimeline<Shape> convertToTargetShapeSet(
const JoiningContinuousTimeline<Shape>& animation, const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
); );

View File

@ -9,255 +9,255 @@ using std::string;
using std::map; using std::map;
string getShapesString(const JoiningContinuousTimeline<Shape>& shapes) { string getShapesString(const JoiningContinuousTimeline<Shape>& shapes) {
string result; string result;
for (const auto& timedShape : shapes) { for (const auto& timedShape : shapes) {
if (!result.empty()) { if (!result.empty()) {
result.append(" "); result.append(" ");
} }
result.append(boost::lexical_cast<std::string>(timedShape.getValue())); result.append(boost::lexical_cast<std::string>(timedShape.getValue()));
} }
return result; return result;
} }
Shape getRepresentativeShape(const JoiningTimeline<Shape>& timeline) { Shape getRepresentativeShape(const JoiningTimeline<Shape>& timeline) {
if (timeline.empty()) { if (timeline.empty()) {
throw std::invalid_argument("Cannot determine representative shape from empty timeline."); throw std::invalid_argument("Cannot determine representative shape from empty timeline.");
} }
// Collect candidate shapes with weights // Collect candidate shapes with weights
map<Shape, centiseconds> candidateShapeWeights; map<Shape, centiseconds> candidateShapeWeights;
for (const auto& timedShape : timeline) { for (const auto& timedShape : timeline) {
candidateShapeWeights[timedShape.getValue()] += timedShape.getDuration(); candidateShapeWeights[timedShape.getValue()] += timedShape.getDuration();
} }
// Select shape with highest total duration within the candidate range // Select shape with highest total duration within the candidate range
const Shape bestShape = std::max_element( const Shape bestShape = std::max_element(
candidateShapeWeights.begin(), candidateShapeWeights.end(), candidateShapeWeights.begin(), candidateShapeWeights.end(),
[](auto a, auto b) { return a.second < b.second; } [](auto a, auto b) { return a.second < b.second; }
)->first; )->first;
// Shapes C and D are similar, but D is more interesting. // Shapes C and D are similar, but D is more interesting.
const bool substituteD = bestShape == Shape::C && candidateShapeWeights[Shape::D] > 0_cs; const bool substituteD = bestShape == Shape::C && candidateShapeWeights[Shape::D] > 0_cs;
return substituteD ? Shape::D : bestShape; return substituteD ? Shape::D : bestShape;
} }
struct ShapeReduction { struct ShapeReduction {
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes) : ShapeReduction(const JoiningTimeline<Shape>& sourceShapes) :
sourceShapes(sourceShapes), sourceShapes(sourceShapes),
shape(getRepresentativeShape(sourceShapes)) {} shape(getRepresentativeShape(sourceShapes)) {}
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes, TimeRange candidateRange) : ShapeReduction(const JoiningTimeline<Shape>& sourceShapes, TimeRange candidateRange) :
ShapeReduction(JoiningBoundedTimeline<Shape>(candidateRange, sourceShapes)) {} ShapeReduction(JoiningBoundedTimeline<Shape>(candidateRange, sourceShapes)) {}
JoiningTimeline<Shape> sourceShapes; JoiningTimeline<Shape> sourceShapes;
Shape shape; Shape shape;
}; };
// Returns a time range of candidate shapes for the next shape to draw. // Returns a time range of candidate shapes for the next shape to draw.
// Guaranteed to be non-empty. // Guaranteed to be non-empty.
TimeRange getNextMinimalCandidateRange(const JoiningContinuousTimeline<Shape>& sourceShapes, TimeRange getNextMinimalCandidateRange(const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange, const centiseconds writePosition) { const TimeRange targetRange, const centiseconds writePosition) {
if (sourceShapes.empty()) { if (sourceShapes.empty()) {
throw std::invalid_argument("Cannot determine candidate range for empty source timeline."); throw std::invalid_argument("Cannot determine candidate range for empty source timeline.");
} }
// Too short, and and we get flickering. Too long, and too many shapes are lost. // Too short, and and we get flickering. Too long, and too many shapes are lost.
// Good values turn out to be 5 to 7 cs, with 7 cs sometimes looking just marginally better. // Good values turn out to be 5 to 7 cs, with 7 cs sometimes looking just marginally better.
const centiseconds minShapeDuration = 7_cs; const centiseconds minShapeDuration = 7_cs;
// If the remaining time can hold more than one shape, but not two: split it evenly // If the remaining time can hold more than one shape, but not two: split it evenly
const centiseconds remainingTargetDuration = writePosition - targetRange.getStart(); const centiseconds remainingTargetDuration = writePosition - targetRange.getStart();
const bool canFitOneOrLess = remainingTargetDuration <= minShapeDuration; const bool canFitOneOrLess = remainingTargetDuration <= minShapeDuration;
const bool canFitTwo = remainingTargetDuration >= 2 * minShapeDuration; const bool canFitTwo = remainingTargetDuration >= 2 * minShapeDuration;
const centiseconds duration = canFitOneOrLess || canFitTwo const centiseconds duration = canFitOneOrLess || canFitTwo
? minShapeDuration ? minShapeDuration
: remainingTargetDuration / 2; : remainingTargetDuration / 2;
TimeRange candidateRange(writePosition - duration, writePosition); TimeRange candidateRange(writePosition - duration, writePosition);
if (writePosition == targetRange.getEnd()) { if (writePosition == targetRange.getEnd()) {
// This is the first iteration. // This is the first iteration.
// Extend the candidate range to the right in order to consider all source shapes after the // Extend the candidate range to the right in order to consider all source shapes after the
// target range. // target range.
candidateRange.setEndIfLater(sourceShapes.getRange().getEnd()); candidateRange.setEndIfLater(sourceShapes.getRange().getEnd());
} }
if (candidateRange.getStart() >= sourceShapes.getRange().getEnd()) { if (candidateRange.getStart() >= sourceShapes.getRange().getEnd()) {
// We haven't reached the source range yet. // We haven't reached the source range yet.
// Extend the candidate range to the left in order to encompass the right-most source shape. // Extend the candidate range to the left in order to encompass the right-most source shape.
candidateRange.setStart(sourceShapes.rbegin()->getStart()); candidateRange.setStart(sourceShapes.rbegin()->getStart());
} }
if (candidateRange.getEnd() <= sourceShapes.getRange().getStart()) { if (candidateRange.getEnd() <= sourceShapes.getRange().getStart()) {
// We're past the source range. This can happen in corner cases. // We're past the source range. This can happen in corner cases.
// Extend the candidate range to the right in order to encompass the left-most source shape // Extend the candidate range to the right in order to encompass the left-most source shape
candidateRange.setEnd(sourceShapes.begin()->getEnd()); candidateRange.setEnd(sourceShapes.begin()->getEnd());
} }
return candidateRange; return candidateRange;
} }
ShapeReduction getNextShapeReduction( ShapeReduction getNextShapeReduction(
const JoiningContinuousTimeline<Shape>& sourceShapes, const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange, const TimeRange targetRange,
centiseconds writePosition centiseconds writePosition
) { ) {
// Determine the next time range of candidate shapes. Consider two scenarios: // Determine the next time range of candidate shapes. Consider two scenarios:
// ... the shortest-possible candidate range // ... the shortest-possible candidate range
const ShapeReduction minReduction(sourceShapes, const ShapeReduction minReduction(sourceShapes,
getNextMinimalCandidateRange(sourceShapes, targetRange, writePosition)); getNextMinimalCandidateRange(sourceShapes, targetRange, writePosition));
// ... a candidate range extended to the left to fully encompass its left-most shape // ... a candidate range extended to the left to fully encompass its left-most shape
const ShapeReduction extendedReduction(sourceShapes, const ShapeReduction extendedReduction(sourceShapes,
{ {
minReduction.sourceShapes.begin()->getStart(), minReduction.sourceShapes.begin()->getStart(),
minReduction.sourceShapes.getRange().getEnd() minReduction.sourceShapes.getRange().getEnd()
} }
); );
// Determine the shape that might be picked *next* if we choose the shortest-possible candidate // Determine the shape that might be picked *next* if we choose the shortest-possible candidate
// range now // range now
const ShapeReduction nextReduction( const ShapeReduction nextReduction(
sourceShapes, sourceShapes,
getNextMinimalCandidateRange(sourceShapes, targetRange, minReduction.sourceShapes.getRange().getStart()) getNextMinimalCandidateRange(sourceShapes, targetRange, minReduction.sourceShapes.getRange().getStart())
); );
const bool minEqualsExtended = minReduction.shape == extendedReduction.shape; const bool minEqualsExtended = minReduction.shape == extendedReduction.shape;
const bool extendedIsSpecial = extendedReduction.shape != minReduction.shape const bool extendedIsSpecial = extendedReduction.shape != minReduction.shape
&& extendedReduction.shape != nextReduction.shape; && extendedReduction.shape != nextReduction.shape;
return minEqualsExtended || extendedIsSpecial ? extendedReduction : minReduction; return minEqualsExtended || extendedIsSpecial ? extendedReduction : minReduction;
} }
// Modifies the timing of the given animation to fit into the specified target time range without // Modifies the timing of the given animation to fit into the specified target time range without
// jitter. // jitter.
JoiningContinuousTimeline<Shape> retime(const JoiningContinuousTimeline<Shape>& sourceShapes, JoiningContinuousTimeline<Shape> retime(const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange) { const TimeRange targetRange) {
logTimedEvent("segment", targetRange, getShapesString(sourceShapes)); logTimedEvent("segment", targetRange, getShapesString(sourceShapes));
JoiningContinuousTimeline<Shape> result(targetRange, Shape::X); JoiningContinuousTimeline<Shape> result(targetRange, Shape::X);
if (sourceShapes.empty()) return result; if (sourceShapes.empty()) return result;
// Animate backwards // Animate backwards
centiseconds writePosition = targetRange.getEnd(); centiseconds writePosition = targetRange.getEnd();
while (writePosition > targetRange.getStart()) { while (writePosition > targetRange.getStart()) {
// Decide which shape to show next, possibly discarding short shapes // Decide which shape to show next, possibly discarding short shapes
const ShapeReduction shapeReduction = const ShapeReduction shapeReduction =
getNextShapeReduction(sourceShapes, targetRange, writePosition); getNextShapeReduction(sourceShapes, targetRange, writePosition);
// Determine how long to display the shape // Determine how long to display the shape
TimeRange targetShapeRange(shapeReduction.sourceShapes.getRange()); TimeRange targetShapeRange(shapeReduction.sourceShapes.getRange());
if (targetShapeRange.getStart() <= sourceShapes.getRange().getStart()) { if (targetShapeRange.getStart() <= sourceShapes.getRange().getStart()) {
// We've used up the left-most source shape. Fill the entire remaining target range. // We've used up the left-most source shape. Fill the entire remaining target range.
targetShapeRange.setStartIfEarlier(targetRange.getStart()); targetShapeRange.setStartIfEarlier(targetRange.getStart());
} }
targetShapeRange.trimRight(writePosition); targetShapeRange.trimRight(writePosition);
// Draw shape // Draw shape
result.set(targetShapeRange, shapeReduction.shape); result.set(targetShapeRange, shapeReduction.shape);
writePosition = targetShapeRange.getStart(); writePosition = targetShapeRange.getStart();
} }
return result; return result;
} }
JoiningContinuousTimeline<Shape> retime( JoiningContinuousTimeline<Shape> retime(
const JoiningContinuousTimeline<Shape>& animation, const JoiningContinuousTimeline<Shape>& animation,
TimeRange sourceRange, TimeRange sourceRange,
TimeRange targetRange TimeRange targetRange
) { ) {
const auto sourceShapes = JoiningContinuousTimeline<Shape>(sourceRange, Shape::X, animation); const auto sourceShapes = JoiningContinuousTimeline<Shape>(sourceRange, Shape::X, animation);
return retime(sourceShapes, targetRange); return retime(sourceShapes, targetRange);
} }
enum class MouthState { enum class MouthState {
Idle, Idle,
Closed, Closed,
Open Open
}; };
JoiningContinuousTimeline<Shape> optimizeTiming(const JoiningContinuousTimeline<Shape>& animation) { JoiningContinuousTimeline<Shape> optimizeTiming(const JoiningContinuousTimeline<Shape>& animation) {
// Identify segments with idle, closed, and open mouth shapes // Identify segments with idle, closed, and open mouth shapes
JoiningContinuousTimeline<MouthState> segments(animation.getRange(), MouthState::Idle); JoiningContinuousTimeline<MouthState> segments(animation.getRange(), MouthState::Idle);
for (const auto& timedShape : animation) { for (const auto& timedShape : animation) {
const Shape shape = timedShape.getValue(); const Shape shape = timedShape.getValue();
const MouthState mouthState = const MouthState mouthState =
shape == Shape::X shape == Shape::X
? MouthState::Idle ? MouthState::Idle
: shape == Shape::A : shape == Shape::A
? MouthState::Closed ? MouthState::Closed
: MouthState::Open; : MouthState::Open;
segments.set(timedShape.getTimeRange(), mouthState); segments.set(timedShape.getTimeRange(), mouthState);
} }
// The minimum duration a segment of open or closed mouth shapes must have to visually register // The minimum duration a segment of open or closed mouth shapes must have to visually register
const centiseconds minSegmentDuration = 8_cs; const centiseconds minSegmentDuration = 8_cs;
// The maximum amount by which the start of a shape can be brought forward // The maximum amount by which the start of a shape can be brought forward
const centiseconds maxExtensionDuration = 6_cs; const centiseconds maxExtensionDuration = 6_cs;
// Make sure all open and closed segments are long enough to register visually. // Make sure all open and closed segments are long enough to register visually.
JoiningContinuousTimeline<Shape> result(animation.getRange(), Shape::X); JoiningContinuousTimeline<Shape> result(animation.getRange(), Shape::X);
// ... we're filling the result timeline from right to left, so `resultStart` points to the // ... we're filling the result timeline from right to left, so `resultStart` points to the
// earliest shape already written // earliest shape already written
centiseconds resultStart = result.getRange().getEnd(); centiseconds resultStart = result.getRange().getEnd();
for (auto segmentIt = segments.rbegin(); segmentIt != segments.rend(); ++segmentIt) { for (auto segmentIt = segments.rbegin(); segmentIt != segments.rend(); ++segmentIt) {
// We don't care about idle shapes at this point. // We don't care about idle shapes at this point.
if (segmentIt->getValue() == MouthState::Idle) continue; if (segmentIt->getValue() == MouthState::Idle) continue;
resultStart = std::min(segmentIt->getEnd(), resultStart); resultStart = std::min(segmentIt->getEnd(), resultStart);
if (resultStart - segmentIt->getStart() >= minSegmentDuration) { if (resultStart - segmentIt->getStart() >= minSegmentDuration) {
// The segment is long enough; we don't have to extend it to the left. // The segment is long enough; we don't have to extend it to the left.
const TimeRange targetRange(segmentIt->getStart(), resultStart); const TimeRange targetRange(segmentIt->getStart(), resultStart);
const auto retimedSegment = retime(animation, segmentIt->getTimeRange(), targetRange); const auto retimedSegment = retime(animation, segmentIt->getTimeRange(), targetRange);
for (const auto& timedShape : retimedSegment) { for (const auto& timedShape : retimedSegment) {
result.set(timedShape); result.set(timedShape);
} }
resultStart = targetRange.getStart(); resultStart = targetRange.getStart();
} else { } else {
// The segment is too short; we have to extend it to the left. // The segment is too short; we have to extend it to the left.
// Find all adjacent segments to our left that are also too short, then distribute them // Find all adjacent segments to our left that are also too short, then distribute them
// evenly. // evenly.
const auto begin = segmentIt; const auto begin = segmentIt;
auto end = std::next(begin); auto end = std::next(begin);
while ( while (
end != segments.rend() end != segments.rend()
&& end->getValue() != MouthState::Idle && end->getValue() != MouthState::Idle
&& end->getDuration() < minSegmentDuration && end->getDuration() < minSegmentDuration
) { ) {
++end; ++end;
} }
// Determine how much we should extend the entire set of short segments to the left // Determine how much we should extend the entire set of short segments to the left
const size_t shortSegmentCount = std::distance(begin, end); const size_t shortSegmentCount = std::distance(begin, end);
const centiseconds desiredDuration = minSegmentDuration * shortSegmentCount; const centiseconds desiredDuration = minSegmentDuration * shortSegmentCount;
const centiseconds currentDuration = begin->getEnd() - std::prev(end)->getStart(); const centiseconds currentDuration = begin->getEnd() - std::prev(end)->getStart();
const centiseconds desiredExtensionDuration = desiredDuration - currentDuration; const centiseconds desiredExtensionDuration = desiredDuration - currentDuration;
const centiseconds availableExtensionDuration = end != segments.rend() const centiseconds availableExtensionDuration = end != segments.rend()
? end->getDuration() - 1_cs ? end->getDuration() - 1_cs
: 0_cs; : 0_cs;
const centiseconds extensionDuration = std::min({ const centiseconds extensionDuration = std::min({
desiredExtensionDuration, availableExtensionDuration, maxExtensionDuration desiredExtensionDuration, availableExtensionDuration, maxExtensionDuration
}); });
// Distribute available time range evenly among all short segments // Distribute available time range evenly among all short segments
const centiseconds shortSegmentsTargetStart = const centiseconds shortSegmentsTargetStart =
std::prev(end)->getStart() - extensionDuration; std::prev(end)->getStart() - extensionDuration;
for (auto shortSegmentIt = begin; shortSegmentIt != end; ++shortSegmentIt) { for (auto shortSegmentIt = begin; shortSegmentIt != end; ++shortSegmentIt) {
size_t remainingShortSegmentCount = std::distance(shortSegmentIt, end); size_t remainingShortSegmentCount = std::distance(shortSegmentIt, end);
const centiseconds segmentDuration = (resultStart - shortSegmentsTargetStart) / const centiseconds segmentDuration = (resultStart - shortSegmentsTargetStart) /
remainingShortSegmentCount; remainingShortSegmentCount;
const TimeRange segmentTargetRange(resultStart - segmentDuration, resultStart); const TimeRange segmentTargetRange(resultStart - segmentDuration, resultStart);
const auto retimedSegment = const auto retimedSegment =
retime(animation, shortSegmentIt->getTimeRange(), segmentTargetRange); retime(animation, shortSegmentIt->getTimeRange(), segmentTargetRange);
for (const auto& timedShape : retimedSegment) { for (const auto& timedShape : retimedSegment) {
result.set(timedShape); result.set(timedShape);
} }
resultStart = segmentTargetRange.getStart(); resultStart = segmentTargetRange.getStart();
} }
segmentIt = std::prev(end); segmentIt = std::prev(end);
} }
} }
return result; return result;
} }

View File

@ -2,53 +2,53 @@
#include "animationRules.h" #include "animationRules.h"
JoiningContinuousTimeline<Shape> insertTweens(const JoiningContinuousTimeline<Shape>& animation) { JoiningContinuousTimeline<Shape> insertTweens(const JoiningContinuousTimeline<Shape>& animation) {
const centiseconds minTweenDuration = 4_cs; const centiseconds minTweenDuration = 4_cs;
const centiseconds maxTweenDuration = 8_cs; const centiseconds maxTweenDuration = 8_cs;
JoiningContinuousTimeline<Shape> result(animation); JoiningContinuousTimeline<Shape> result(animation);
for_each_adjacent(animation.begin(), animation.end(), [&](const auto& first, const auto& second) { for_each_adjacent(animation.begin(), animation.end(), [&](const auto& first, const auto& second) {
auto pair = getTween(first.getValue(), second.getValue()); auto pair = getTween(first.getValue(), second.getValue());
if (!pair) return; if (!pair) return;
Shape tweenShape; Shape tweenShape;
TweenTiming tweenTiming; TweenTiming tweenTiming;
std::tie(tweenShape, tweenTiming) = *pair; std::tie(tweenShape, tweenTiming) = *pair;
TimeRange firstTimeRange = first.getTimeRange(); TimeRange firstTimeRange = first.getTimeRange();
TimeRange secondTimeRange = second.getTimeRange(); TimeRange secondTimeRange = second.getTimeRange();
centiseconds tweenStart, tweenDuration; centiseconds tweenStart, tweenDuration;
switch (tweenTiming) { switch (tweenTiming) {
case TweenTiming::Early: case TweenTiming::Early:
{ {
tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration); tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration);
tweenStart = firstTimeRange.getEnd() - tweenDuration; tweenStart = firstTimeRange.getEnd() - tweenDuration;
break; break;
} }
case TweenTiming::Centered: case TweenTiming::Centered:
{ {
tweenDuration = std::min({ tweenDuration = std::min({
firstTimeRange.getDuration() / 4, secondTimeRange.getDuration() / 4, maxTweenDuration firstTimeRange.getDuration() / 4, secondTimeRange.getDuration() / 4, maxTweenDuration
}); });
tweenStart = firstTimeRange.getEnd() - tweenDuration / 2; tweenStart = firstTimeRange.getEnd() - tweenDuration / 2;
break; break;
} }
case TweenTiming::Late: case TweenTiming::Late:
{ {
tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration); tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration);
tweenStart = secondTimeRange.getStart(); tweenStart = secondTimeRange.getStart();
break; break;
} }
default: default:
{ {
throw std::runtime_error("Unexpected tween timing."); throw std::runtime_error("Unexpected tween timing.");
} }
} }
if (tweenDuration < minTweenDuration) return; if (tweenDuration < minTweenDuration) return;
result.set(tweenStart, tweenStart + tweenDuration, tweenShape); result.set(tweenStart, tweenStart + tweenDuration, tweenShape);
}); });
return result; return result;
} }

View File

@ -4,66 +4,66 @@
using std::invalid_argument; using std::invalid_argument;
TimeRange AudioClip::getTruncatedRange() const { TimeRange AudioClip::getTruncatedRange() const {
return TimeRange(0_cs, centiseconds(100 * size() / getSampleRate())); return TimeRange(0_cs, centiseconds(100 * size() / getSampleRate()));
} }
class SafeSampleReader { class SafeSampleReader {
public: public:
SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size); SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size);
AudioClip::value_type operator()(AudioClip::size_type index); AudioClip::value_type operator()(AudioClip::size_type index);
private: private:
SampleReader unsafeRead; SampleReader unsafeRead;
AudioClip::size_type size; AudioClip::size_type size;
AudioClip::size_type lastIndex = -1; AudioClip::size_type lastIndex = -1;
AudioClip::value_type lastSample = 0; AudioClip::value_type lastSample = 0;
}; };
SafeSampleReader::SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size) : SafeSampleReader::SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size) :
unsafeRead(unsafeRead), unsafeRead(unsafeRead),
size(size) size(size)
{} {}
inline AudioClip::value_type SafeSampleReader::operator()(AudioClip::size_type index) { inline AudioClip::value_type SafeSampleReader::operator()(AudioClip::size_type index) {
if (index < 0) { if (index < 0) {
throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index)); throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index));
} }
if (index >= size) { if (index >= size) {
throw invalid_argument(fmt::format( throw invalid_argument(fmt::format(
"Cannot read from sample index {}. Clip size is {}.", "Cannot read from sample index {}. Clip size is {}.",
index, index,
size size
)); ));
} }
if (index == lastIndex) { if (index == lastIndex) {
return lastSample; return lastSample;
} }
lastIndex = index; lastIndex = index;
lastSample = unsafeRead(index); lastSample = unsafeRead(index);
return lastSample; return lastSample;
} }
SampleReader AudioClip::createSampleReader() const { SampleReader AudioClip::createSampleReader() const {
return SafeSampleReader(createUnsafeSampleReader(), size()); return SafeSampleReader(createUnsafeSampleReader(), size());
} }
AudioClip::iterator AudioClip::begin() const { AudioClip::iterator AudioClip::begin() const {
return SampleIterator(*this, 0); return SampleIterator(*this, 0);
} }
AudioClip::iterator AudioClip::end() const { AudioClip::iterator AudioClip::end() const {
return SampleIterator(*this, size()); return SampleIterator(*this, size());
} }
std::unique_ptr<AudioClip> operator|(std::unique_ptr<AudioClip> clip, const AudioEffect& effect) { std::unique_ptr<AudioClip> operator|(std::unique_ptr<AudioClip> clip, const AudioEffect& effect) {
return effect(std::move(clip)); return effect(std::move(clip));
} }
SampleIterator::SampleIterator() : SampleIterator::SampleIterator() :
sampleIndex(0) sampleIndex(0)
{} {}
SampleIterator::SampleIterator(const AudioClip& audioClip, size_type sampleIndex) : SampleIterator::SampleIterator(const AudioClip& audioClip, size_type sampleIndex) :
sampleReader([&audioClip] { return audioClip.createSampleReader(); }), sampleReader([&audioClip] { return audioClip.createSampleReader(); }),
sampleIndex(sampleIndex) sampleIndex(sampleIndex)
{} {}

View File

@ -10,22 +10,22 @@ class SampleIterator;
class AudioClip { class AudioClip {
public: public:
using value_type = float; using value_type = float;
using size_type = int64_t; using size_type = int64_t;
using difference_type = int64_t; using difference_type = int64_t;
using iterator = SampleIterator; using iterator = SampleIterator;
using SampleReader = std::function<value_type(size_type)>; using SampleReader = std::function<value_type(size_type)>;
virtual ~AudioClip() {} virtual ~AudioClip() {}
virtual std::unique_ptr<AudioClip> clone() const = 0; virtual std::unique_ptr<AudioClip> clone() const = 0;
virtual int getSampleRate() const = 0; virtual int getSampleRate() const = 0;
virtual size_type size() const = 0; virtual size_type size() const = 0;
TimeRange getTruncatedRange() const; TimeRange getTruncatedRange() const;
SampleReader createSampleReader() const; SampleReader createSampleReader() const;
iterator begin() const; iterator begin() const;
iterator end() const; iterator end() const;
private: private:
virtual SampleReader createUnsafeSampleReader() const = 0; virtual SampleReader createUnsafeSampleReader() const = 0;
}; };
using AudioEffect = std::function<std::unique_ptr<AudioClip>(std::unique_ptr<AudioClip>)>; using AudioEffect = std::function<std::unique_ptr<AudioClip>(std::unique_ptr<AudioClip>)>;
@ -36,107 +36,107 @@ using SampleReader = AudioClip::SampleReader;
class SampleIterator { class SampleIterator {
public: public:
using value_type = AudioClip::value_type; using value_type = AudioClip::value_type;
using size_type = AudioClip::size_type; using size_type = AudioClip::size_type;
using difference_type = AudioClip::difference_type; using difference_type = AudioClip::difference_type;
SampleIterator(); SampleIterator();
size_type getSampleIndex() const; size_type getSampleIndex() const;
void seek(size_type sampleIndex); void seek(size_type sampleIndex);
value_type operator*() const; value_type operator*() const;
value_type operator[](difference_type n) const; value_type operator[](difference_type n) const;
private: private:
friend AudioClip; friend AudioClip;
SampleIterator(const AudioClip& audioClip, size_type sampleIndex); SampleIterator(const AudioClip& audioClip, size_type sampleIndex);
Lazy<SampleReader> sampleReader; Lazy<SampleReader> sampleReader;
size_type sampleIndex; size_type sampleIndex;
}; };
inline SampleIterator::size_type SampleIterator::getSampleIndex() const { inline SampleIterator::size_type SampleIterator::getSampleIndex() const {
return sampleIndex; return sampleIndex;
} }
inline void SampleIterator::seek(size_type sampleIndex) { inline void SampleIterator::seek(size_type sampleIndex) {
this->sampleIndex = sampleIndex; this->sampleIndex = sampleIndex;
} }
inline SampleIterator::value_type SampleIterator::operator*() const { inline SampleIterator::value_type SampleIterator::operator*() const {
return (*sampleReader)(sampleIndex); return (*sampleReader)(sampleIndex);
} }
inline SampleIterator::value_type SampleIterator::operator[](difference_type n) const { inline SampleIterator::value_type SampleIterator::operator[](difference_type n) const {
return (*sampleReader)(sampleIndex + n); return (*sampleReader)(sampleIndex + n);
} }
inline bool operator==(const SampleIterator& lhs, const SampleIterator& rhs) { inline bool operator==(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() == rhs.getSampleIndex(); return lhs.getSampleIndex() == rhs.getSampleIndex();
} }
inline bool operator!=(const SampleIterator& lhs, const SampleIterator& rhs) { inline bool operator!=(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() != rhs.getSampleIndex(); return lhs.getSampleIndex() != rhs.getSampleIndex();
} }
inline bool operator<(const SampleIterator& lhs, const SampleIterator& rhs) { inline bool operator<(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() < rhs.getSampleIndex(); return lhs.getSampleIndex() < rhs.getSampleIndex();
} }
inline bool operator>(const SampleIterator& lhs, const SampleIterator& rhs) { inline bool operator>(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() > rhs.getSampleIndex(); return lhs.getSampleIndex() > rhs.getSampleIndex();
} }
inline bool operator<=(const SampleIterator& lhs, const SampleIterator& rhs) { inline bool operator<=(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() <= rhs.getSampleIndex(); return lhs.getSampleIndex() <= rhs.getSampleIndex();
} }
inline bool operator>=(const SampleIterator& lhs, const SampleIterator& rhs) { inline bool operator>=(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() >= rhs.getSampleIndex(); return lhs.getSampleIndex() >= rhs.getSampleIndex();
} }
inline SampleIterator& operator+=(SampleIterator& it, SampleIterator::difference_type n) { inline SampleIterator& operator+=(SampleIterator& it, SampleIterator::difference_type n) {
it.seek(it.getSampleIndex() + n); it.seek(it.getSampleIndex() + n);
return it; return it;
} }
inline SampleIterator& operator-=(SampleIterator& it, SampleIterator::difference_type n) { inline SampleIterator& operator-=(SampleIterator& it, SampleIterator::difference_type n) {
it.seek(it.getSampleIndex() - n); it.seek(it.getSampleIndex() - n);
return it; return it;
} }
inline SampleIterator& operator++(SampleIterator& it) { inline SampleIterator& operator++(SampleIterator& it) {
return operator+=(it, 1); return operator+=(it, 1);
} }
inline SampleIterator operator++(SampleIterator& it, int) { inline SampleIterator operator++(SampleIterator& it, int) {
SampleIterator tmp(it); SampleIterator tmp(it);
operator++(it); operator++(it);
return tmp; return tmp;
} }
inline SampleIterator& operator--(SampleIterator& it) { inline SampleIterator& operator--(SampleIterator& it) {
return operator-=(it, 1); return operator-=(it, 1);
} }
inline SampleIterator operator--(SampleIterator& it, int) { inline SampleIterator operator--(SampleIterator& it, int) {
SampleIterator tmp(it); SampleIterator tmp(it);
operator--(it); operator--(it);
return tmp; return tmp;
} }
inline SampleIterator operator+(const SampleIterator& it, SampleIterator::difference_type n) { inline SampleIterator operator+(const SampleIterator& it, SampleIterator::difference_type n) {
SampleIterator result(it); SampleIterator result(it);
result += n; result += n;
return result; return result;
} }
inline SampleIterator operator-(const SampleIterator& it, SampleIterator::difference_type n) { inline SampleIterator operator-(const SampleIterator& it, SampleIterator::difference_type n) {
SampleIterator result(it); SampleIterator result(it);
result -= n; result -= n;
return result; return result;
} }
inline SampleIterator::difference_type operator-(const SampleIterator& lhs, const SampleIterator& rhs) { inline SampleIterator::difference_type operator-(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() - rhs.getSampleIndex(); return lhs.getSampleIndex() - rhs.getSampleIndex();
} }

View File

@ -4,27 +4,27 @@ using std::unique_ptr;
using std::make_unique; using std::make_unique;
AudioSegment::AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range) : AudioSegment::AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range) :
inputClip(std::move(inputClip)), inputClip(std::move(inputClip)),
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100), sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100) sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100)
{ {
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) { if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
throw std::invalid_argument("Segment extends beyond input clip."); throw std::invalid_argument("Segment extends beyond input clip.");
} }
} }
unique_ptr<AudioClip> AudioSegment::clone() const { unique_ptr<AudioClip> AudioSegment::clone() const {
return make_unique<AudioSegment>(*this); return make_unique<AudioSegment>(*this);
} }
SampleReader AudioSegment::createUnsafeSampleReader() const { SampleReader AudioSegment::createUnsafeSampleReader() const {
return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) { return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) {
return read(index + sampleOffset); return read(index + sampleOffset);
}; };
} }
AudioEffect segment(const TimeRange& range) { AudioEffect segment(const TimeRange& range) {
return [range](unique_ptr<AudioClip> inputClip) { return [range](unique_ptr<AudioClip> inputClip) {
return make_unique<AudioSegment>(std::move(inputClip), range); return make_unique<AudioSegment>(std::move(inputClip), range);
}; };
} }

View File

@ -4,24 +4,24 @@
class AudioSegment : public AudioClip { class AudioSegment : public AudioClip {
public: public:
AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range); AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range);
std::unique_ptr<AudioClip> clone() const override; std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override; int getSampleRate() const override;
size_type size() const override; size_type size() const override;
private: private:
SampleReader createUnsafeSampleReader() const override; SampleReader createUnsafeSampleReader() const override;
std::shared_ptr<AudioClip> inputClip; std::shared_ptr<AudioClip> inputClip;
size_type sampleOffset, sampleCount; size_type sampleOffset, sampleCount;
}; };
inline int AudioSegment::getSampleRate() const { inline int AudioSegment::getSampleRate() const {
return inputClip->getSampleRate(); return inputClip->getSampleRate();
} }
inline AudioClip::size_type AudioSegment::size() const { inline AudioClip::size_type AudioSegment::size() const {
return sampleCount; return sampleCount;
} }
AudioEffect segment(const TimeRange& range); AudioEffect segment(const TimeRange& range);

View File

@ -5,65 +5,65 @@ using std::unique_ptr;
using std::make_unique; using std::make_unique;
DcOffset::DcOffset(unique_ptr<AudioClip> inputClip, float offset) : DcOffset::DcOffset(unique_ptr<AudioClip> inputClip, float offset) :
inputClip(std::move(inputClip)), inputClip(std::move(inputClip)),
offset(offset), offset(offset),
factor(1 / (1 + std::abs(offset))) factor(1 / (1 + std::abs(offset)))
{} {}
unique_ptr<AudioClip> DcOffset::clone() const { unique_ptr<AudioClip> DcOffset::clone() const {
return make_unique<DcOffset>(*this); return make_unique<DcOffset>(*this);
} }
SampleReader DcOffset::createUnsafeSampleReader() const { SampleReader DcOffset::createUnsafeSampleReader() const {
return [ return [
read = inputClip->createSampleReader(), read = inputClip->createSampleReader(),
factor = factor, factor = factor,
offset = offset offset = offset
](size_type index) { ](size_type index) {
const float sample = read(index); const float sample = read(index);
return sample * factor + offset; return sample * factor + offset;
}; };
} }
float getDcOffset(const AudioClip& audioClip) { float getDcOffset(const AudioClip& audioClip) {
int flatMeanSampleCount, fadingMeanSampleCount; int flatMeanSampleCount, fadingMeanSampleCount;
const int sampleRate = audioClip.getSampleRate(); const int sampleRate = audioClip.getSampleRate();
if (audioClip.size() > 4 * sampleRate) { if (audioClip.size() > 4 * sampleRate) {
// Long audio file. Average over the first 3 seconds, then fade out over the 4th. // Long audio file. Average over the first 3 seconds, then fade out over the 4th.
flatMeanSampleCount = 3 * sampleRate; flatMeanSampleCount = 3 * sampleRate;
fadingMeanSampleCount = 1 * sampleRate; fadingMeanSampleCount = 1 * sampleRate;
} else { } else {
// Short audio file. Average over the entire duration. // Short audio file. Average over the entire duration.
flatMeanSampleCount = static_cast<int>(audioClip.size()); flatMeanSampleCount = static_cast<int>(audioClip.size());
fadingMeanSampleCount = 0; fadingMeanSampleCount = 0;
} }
const auto read = audioClip.createSampleReader(); const auto read = audioClip.createSampleReader();
double sum = 0; double sum = 0;
for (int i = 0; i < flatMeanSampleCount; ++i) { for (int i = 0; i < flatMeanSampleCount; ++i) {
sum += read(i); sum += read(i);
} }
for (int i = 0; i < fadingMeanSampleCount; ++i) { for (int i = 0; i < fadingMeanSampleCount; ++i) {
const double weight = const double weight =
static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount; static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount;
sum += read(flatMeanSampleCount + i) * weight; sum += read(flatMeanSampleCount + i) * weight;
} }
const double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0; const double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0;
const double offset = sum / totalWeight; const double offset = sum / totalWeight;
return static_cast<float>(offset); return static_cast<float>(offset);
} }
AudioEffect addDcOffset(float offset, float epsilon) { AudioEffect addDcOffset(float offset, float epsilon) {
return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> { return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> {
if (std::abs(offset) < epsilon) return inputClip; if (std::abs(offset) < epsilon) return inputClip;
return make_unique<DcOffset>(std::move(inputClip), offset); return make_unique<DcOffset>(std::move(inputClip), offset);
}; };
} }
AudioEffect removeDcOffset(float epsilon) { AudioEffect removeDcOffset(float epsilon) {
return [epsilon](unique_ptr<AudioClip> inputClip) { return [epsilon](unique_ptr<AudioClip> inputClip) {
const float offset = getDcOffset(*inputClip); const float offset = getDcOffset(*inputClip);
return std::move(inputClip) | addDcOffset(-offset, epsilon); return std::move(inputClip) | addDcOffset(-offset, epsilon);
}; };
} }

View File

@ -6,24 +6,24 @@
// to prevent clipping // to prevent clipping
class DcOffset : public AudioClip { class DcOffset : public AudioClip {
public: public:
DcOffset(std::unique_ptr<AudioClip> inputClip, float offset); DcOffset(std::unique_ptr<AudioClip> inputClip, float offset);
std::unique_ptr<AudioClip> clone() const override; std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override; int getSampleRate() const override;
size_type size() const override; size_type size() const override;
private: private:
SampleReader createUnsafeSampleReader() const override; SampleReader createUnsafeSampleReader() const override;
std::shared_ptr<AudioClip> inputClip; std::shared_ptr<AudioClip> inputClip;
float offset; float offset;
float factor; float factor;
}; };
inline int DcOffset::getSampleRate() const { inline int DcOffset::getSampleRate() const {
return inputClip->getSampleRate(); return inputClip->getSampleRate();
} }
inline AudioClip::size_type DcOffset::size() const { inline AudioClip::size_type DcOffset::size() const {
return inputClip->size(); return inputClip->size();
} }
float getDcOffset(const AudioClip& audioClip); float getDcOffset(const AudioClip& audioClip);

View File

@ -13,154 +13,154 @@ using std::ifstream;
using std::ios_base; using std::ios_base;
std::string vorbisErrorToString(int64_t errorCode) { std::string vorbisErrorToString(int64_t errorCode) {
switch (errorCode) { switch (errorCode) {
case OV_EREAD: case OV_EREAD:
return "Read error while fetching compressed data for decode."; return "Read error while fetching compressed data for decode.";
case OV_EFAULT: case OV_EFAULT:
return "Internal logic fault; indicates a bug or heap/stack corruption."; return "Internal logic fault; indicates a bug or heap/stack corruption.";
case OV_EIMPL: case OV_EIMPL:
return "Feature not implemented"; return "Feature not implemented";
case OV_EINVAL: case OV_EINVAL:
return "Either an invalid argument, or incompletely initialized argument passed to a call."; return "Either an invalid argument, or incompletely initialized argument passed to a call.";
case OV_ENOTVORBIS: case OV_ENOTVORBIS:
return "The given file/data was not recognized as Ogg Vorbis data."; return "The given file/data was not recognized as Ogg Vorbis data.";
case OV_EBADHEADER: case OV_EBADHEADER:
return "The file/data is apparently an Ogg Vorbis stream, but contains a corrupted or undecipherable header."; return "The file/data is apparently an Ogg Vorbis stream, but contains a corrupted or undecipherable header.";
case OV_EVERSION: case OV_EVERSION:
return "The bitstream format revision of the given Vorbis stream is not supported."; return "The bitstream format revision of the given Vorbis stream is not supported.";
case OV_ENOTAUDIO: case OV_ENOTAUDIO:
return "Packet is not an audio packet."; return "Packet is not an audio packet.";
case OV_EBADPACKET: case OV_EBADPACKET:
return "Error in packet."; return "Error in packet.";
case OV_EBADLINK: case OV_EBADLINK:
return "The given link exists in the Vorbis data stream, but is not decipherable due to garbage or corruption."; return "The given link exists in the Vorbis data stream, but is not decipherable due to garbage or corruption.";
case OV_ENOSEEK: case OV_ENOSEEK:
return "The given stream is not seekable."; return "The given stream is not seekable.";
default: default:
return "An unexpected Vorbis error occurred."; return "An unexpected Vorbis error occurred.";
} }
} }
template<typename T> template<typename T>
T throwOnError(T code) { T throwOnError(T code) {
// OV_HOLE, though technically an error code, is only informational // OV_HOLE, though technically an error code, is only informational
const bool error = code < 0 && code != OV_HOLE; const bool error = code < 0 && code != OV_HOLE;
if (error) { if (error) {
const std::string message = const std::string message =
fmt::format("{} (Vorbis error {})", vorbisErrorToString(code), code); fmt::format("{} (Vorbis error {})", vorbisErrorToString(code), code);
throw std::runtime_error(message); throw std::runtime_error(message);
} }
return code; return code;
} }
size_t readCallback(void* buffer, size_t elementSize, size_t elementCount, void* dataSource) { size_t readCallback(void* buffer, size_t elementSize, size_t elementCount, void* dataSource) {
assert(elementSize == 1); assert(elementSize == 1);
ifstream& stream = *static_cast<ifstream*>(dataSource); ifstream& stream = *static_cast<ifstream*>(dataSource);
stream.read(static_cast<char*>(buffer), elementCount); stream.read(static_cast<char*>(buffer), elementCount);
const std::streamsize bytesRead = stream.gcount(); const std::streamsize bytesRead = stream.gcount();
stream.clear(); // In case we read past EOF stream.clear(); // In case we read past EOF
return static_cast<size_t>(bytesRead); return static_cast<size_t>(bytesRead);
} }
int seekCallback(void* dataSource, ogg_int64_t offset, int origin) { int seekCallback(void* dataSource, ogg_int64_t offset, int origin) {
static const vector<ios_base::seekdir> seekDirections { static const vector<ios_base::seekdir> seekDirections {
ios_base::beg, ios_base::cur, ios_base::end ios_base::beg, ios_base::cur, ios_base::end
}; };
ifstream& stream = *static_cast<ifstream*>(dataSource); ifstream& stream = *static_cast<ifstream*>(dataSource);
stream.seekg(offset, seekDirections.at(origin)); stream.seekg(offset, seekDirections.at(origin));
stream.clear(); // In case we sought to EOF stream.clear(); // In case we sought to EOF
return 0; return 0;
} }
long tellCallback(void* dataSource) { long tellCallback(void* dataSource) {
ifstream& stream = *static_cast<ifstream*>(dataSource); ifstream& stream = *static_cast<ifstream*>(dataSource);
const auto position = stream.tellg(); const auto position = stream.tellg();
assert(position >= 0); assert(position >= 0);
return static_cast<long>(position); return static_cast<long>(position);
} }
// RAII wrapper around OggVorbis_File // RAII wrapper around OggVorbis_File
class OggVorbisFile final { class OggVorbisFile final {
public: public:
OggVorbisFile(const path& filePath); OggVorbisFile(const path& filePath);
OggVorbisFile(const OggVorbisFile&) = delete; OggVorbisFile(const OggVorbisFile&) = delete;
OggVorbisFile& operator=(const OggVorbisFile&) = delete; OggVorbisFile& operator=(const OggVorbisFile&) = delete;
OggVorbis_File* get() { OggVorbis_File* get() {
return &oggVorbisHandle; return &oggVorbisHandle;
} }
~OggVorbisFile() { ~OggVorbisFile() {
ov_clear(&oggVorbisHandle); ov_clear(&oggVorbisHandle);
} }
private: private:
OggVorbis_File oggVorbisHandle; OggVorbis_File oggVorbisHandle;
ifstream stream; ifstream stream;
}; };
OggVorbisFile::OggVorbisFile(const path& filePath) : OggVorbisFile::OggVorbisFile(const path& filePath) :
oggVorbisHandle(), oggVorbisHandle(),
stream(openFile(filePath)) stream(openFile(filePath))
{ {
// Throw only on badbit, not on failbit. // Throw only on badbit, not on failbit.
// Ogg Vorbis expects read operations past the end of the file to // Ogg Vorbis expects read operations past the end of the file to
// succeed, not to throw. // succeed, not to throw.
stream.exceptions(ifstream::badbit); stream.exceptions(ifstream::badbit);
// Ogg Vorbis normally uses the `FILE` API from the C standard library. // Ogg Vorbis normally uses the `FILE` API from the C standard library.
// This doesn't handle Unicode paths on Windows. // This doesn't handle Unicode paths on Windows.
// Use wrapper functions around `ifstream` instead. // Use wrapper functions around `ifstream` instead.
const ov_callbacks callbacks { readCallback, seekCallback, nullptr, tellCallback }; const ov_callbacks callbacks { readCallback, seekCallback, nullptr, tellCallback };
throwOnError(ov_open_callbacks(&stream, &oggVorbisHandle, nullptr, 0, callbacks)); throwOnError(ov_open_callbacks(&stream, &oggVorbisHandle, nullptr, 0, callbacks));
} }
OggVorbisFileReader::OggVorbisFileReader(const path& filePath) : OggVorbisFileReader::OggVorbisFileReader(const path& filePath) :
filePath(filePath) filePath(filePath)
{ {
OggVorbisFile file(filePath); OggVorbisFile file(filePath);
vorbis_info* vorbisInfo = ov_info(file.get(), -1); vorbis_info* vorbisInfo = ov_info(file.get(), -1);
sampleRate = vorbisInfo->rate; sampleRate = vorbisInfo->rate;
channelCount = vorbisInfo->channels; channelCount = vorbisInfo->channels;
sampleCount = throwOnError(ov_pcm_total(file.get(), -1)); sampleCount = throwOnError(ov_pcm_total(file.get(), -1));
} }
std::unique_ptr<AudioClip> OggVorbisFileReader::clone() const { std::unique_ptr<AudioClip> OggVorbisFileReader::clone() const {
return std::make_unique<OggVorbisFileReader>(*this); return std::make_unique<OggVorbisFileReader>(*this);
} }
SampleReader OggVorbisFileReader::createUnsafeSampleReader() const { SampleReader OggVorbisFileReader::createUnsafeSampleReader() const {
return [ return [
channelCount = channelCount, channelCount = channelCount,
file = make_shared<OggVorbisFile>(filePath), file = make_shared<OggVorbisFile>(filePath),
buffer = static_cast<value_type**>(nullptr), buffer = static_cast<value_type**>(nullptr),
bufferStart = size_type(0), bufferStart = size_type(0),
bufferSize = size_type(0) bufferSize = size_type(0)
](size_type index) mutable { ](size_type index) mutable {
if (index < bufferStart || index >= bufferStart + bufferSize) { if (index < bufferStart || index >= bufferStart + bufferSize) {
// Seek // Seek
throwOnError(ov_pcm_seek(file->get(), index)); throwOnError(ov_pcm_seek(file->get(), index));
// Read a block of samples // Read a block of samples
constexpr int maxSize = 1024; constexpr int maxSize = 1024;
bufferStart = index; bufferStart = index;
bufferSize = throwOnError(ov_read_float(file->get(), &buffer, maxSize, nullptr)); bufferSize = throwOnError(ov_read_float(file->get(), &buffer, maxSize, nullptr));
if (bufferSize == 0) { if (bufferSize == 0) {
throw std::runtime_error("Unexpected end of file."); throw std::runtime_error("Unexpected end of file.");
} }
} }
// Downmix channels // Downmix channels
const size_type bufferIndex = index - bufferStart; const size_type bufferIndex = index - bufferStart;
value_type sum = 0.0f; value_type sum = 0.0f;
for (int channel = 0; channel < channelCount; ++channel) { for (int channel = 0; channel < channelCount; ++channel) {
sum += buffer[channel][bufferIndex]; sum += buffer[channel][bufferIndex];
} }
return sum / channelCount; return sum / channelCount;
}; };
} }

View File

@ -5,16 +5,16 @@
class OggVorbisFileReader : public AudioClip { class OggVorbisFileReader : public AudioClip {
public: public:
OggVorbisFileReader(const std::filesystem::path& filePath); OggVorbisFileReader(const std::filesystem::path& filePath);
std::unique_ptr<AudioClip> clone() const override; std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override { return sampleRate; } int getSampleRate() const override { return sampleRate; }
size_type size() const override { return sampleCount; } size_type size() const override { return sampleCount; }
private: private:
SampleReader createUnsafeSampleReader() const override; SampleReader createUnsafeSampleReader() const override;
std::filesystem::path filePath; std::filesystem::path filePath;
int sampleRate; int sampleRate;
int channelCount; int channelCount;
size_type sampleCount; size_type sampleCount;
}; };

View File

@ -8,63 +8,63 @@ using std::unique_ptr;
using std::make_unique; using std::make_unique;
SampleRateConverter::SampleRateConverter(unique_ptr<AudioClip> inputClip, int outputSampleRate) : SampleRateConverter::SampleRateConverter(unique_ptr<AudioClip> inputClip, int outputSampleRate) :
inputClip(std::move(inputClip)), inputClip(std::move(inputClip)),
downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate), downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate),
outputSampleRate(outputSampleRate), outputSampleRate(outputSampleRate),
outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor)) outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor))
{ {
if (outputSampleRate <= 0) { if (outputSampleRate <= 0) {
throw invalid_argument("Sample rate must be positive."); throw invalid_argument("Sample rate must be positive.");
} }
if (this->inputClip->getSampleRate() < outputSampleRate) { if (this->inputClip->getSampleRate() < outputSampleRate) {
throw invalid_argument(fmt::format( throw invalid_argument(fmt::format(
"Upsampling not supported. Input sample rate must not be below {}Hz.", "Upsampling not supported. Input sample rate must not be below {}Hz.",
outputSampleRate outputSampleRate
)); ));
} }
} }
unique_ptr<AudioClip> SampleRateConverter::clone() const { unique_ptr<AudioClip> SampleRateConverter::clone() const {
return make_unique<SampleRateConverter>(*this); return make_unique<SampleRateConverter>(*this);
} }
float mean(double inputStart, double inputEnd, const SampleReader& read) { float mean(double inputStart, double inputEnd, const SampleReader& read) {
// Calculate weighted sum... // Calculate weighted sum...
double sum = 0; double sum = 0;
// ... first sample (weight <= 1) // ... first sample (weight <= 1)
const int64_t startIndex = static_cast<int64_t>(inputStart); const int64_t startIndex = static_cast<int64_t>(inputStart);
sum += read(startIndex) * ((startIndex + 1) - inputStart); sum += read(startIndex) * ((startIndex + 1) - inputStart);
// ... middle samples (weight 1 each) // ... middle samples (weight 1 each)
const int64_t endIndex = static_cast<int64_t>(inputEnd); const int64_t endIndex = static_cast<int64_t>(inputEnd);
for (int64_t index = startIndex + 1; index < endIndex; ++index) { for (int64_t index = startIndex + 1; index < endIndex; ++index) {
sum += read(index); sum += read(index);
} }
// ... last sample (weight < 1) // ... last sample (weight < 1)
if (endIndex < inputEnd) { if (endIndex < inputEnd) {
sum += read(endIndex) * (inputEnd - endIndex); sum += read(endIndex) * (inputEnd - endIndex);
} }
return static_cast<float>(sum / (inputEnd - inputStart)); return static_cast<float>(sum / (inputEnd - inputStart));
} }
SampleReader SampleRateConverter::createUnsafeSampleReader() const { SampleReader SampleRateConverter::createUnsafeSampleReader() const {
return [ return [
read = inputClip->createSampleReader(), read = inputClip->createSampleReader(),
downscalingFactor = downscalingFactor, downscalingFactor = downscalingFactor,
size = inputClip->size() size = inputClip->size()
](size_type index) { ](size_type index) {
const double inputStart = index * downscalingFactor; const double inputStart = index * downscalingFactor;
const double inputEnd = const double inputEnd =
std::min((index + 1) * downscalingFactor, static_cast<double>(size)); std::min((index + 1) * downscalingFactor, static_cast<double>(size));
return mean(inputStart, inputEnd, read); return mean(inputStart, inputEnd, read);
}; };
} }
AudioEffect resample(int sampleRate) { AudioEffect resample(int sampleRate) {
return [sampleRate](unique_ptr<AudioClip> inputClip) { return [sampleRate](unique_ptr<AudioClip> inputClip) {
return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate); return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate);
}; };
} }

View File

@ -5,25 +5,25 @@
class SampleRateConverter : public AudioClip { class SampleRateConverter : public AudioClip {
public: public:
SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate); SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate);
std::unique_ptr<AudioClip> clone() const override; std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override; int getSampleRate() const override;
size_type size() const override; size_type size() const override;
private: private:
SampleReader createUnsafeSampleReader() const override; SampleReader createUnsafeSampleReader() const override;
std::shared_ptr<AudioClip> inputClip; std::shared_ptr<AudioClip> inputClip;
double downscalingFactor; // input sample rate / output sample rate double downscalingFactor; // input sample rate / output sample rate
int outputSampleRate; int outputSampleRate;
int64_t outputSampleCount; int64_t outputSampleCount;
}; };
AudioEffect resample(int sampleRate); AudioEffect resample(int sampleRate);
inline int SampleRateConverter::getSampleRate() const { inline int SampleRateConverter::getSampleRate() const {
return outputSampleRate; return outputSampleRate;
} }
inline AudioClip::size_type SampleRateConverter::size() const { inline AudioClip::size_type SampleRateConverter::size() const {
return outputSampleCount; return outputSampleCount;
} }

View File

@ -20,478 +20,478 @@ using std::streamoff;
// Converts an int in the range min..max to a float in the range -1..1 // Converts an int in the range min..max to a float in the range -1..1
float toNormalizedFloat(int value, int min, int max) { float toNormalizedFloat(int value, int min, int max) {
const float fMin = static_cast<float>(min); const float fMin = static_cast<float>(min);
const float fMax = static_cast<float>(max); const float fMax = static_cast<float>(max);
const float fValue = static_cast<float>(value); const float fValue = static_cast<float>(value);
return ((fValue - fMin) / (fMax - fMin) * 2) - 1; return ((fValue - fMin) / (fMax - fMin) * 2) - 1;
} }
streamoff roundUpToEven(streamoff i) { streamoff roundUpToEven(streamoff i) {
return (i + 1) & (~1); return (i + 1) & (~1);
} }
namespace Codec { namespace Codec {
constexpr int Pcm = 0x01; constexpr int Pcm = 0x01;
constexpr int Float = 0x03; constexpr int Float = 0x03;
constexpr int Extensible = 0xFFFE; constexpr int Extensible = 0xFFFE;
}; };
string codecToString(int codec); string codecToString(int codec);
WaveFormatInfo getWaveFormatInfo(const path& filePath) { WaveFormatInfo getWaveFormatInfo(const path& filePath) {
WaveFormatInfo formatInfo {}; WaveFormatInfo formatInfo {};
auto file = openFile(filePath); auto file = openFile(filePath);
file.seekg(0, std::ios_base::end); file.seekg(0, std::ios_base::end);
const streamoff fileSize = file.tellg(); const streamoff fileSize = file.tellg();
file.seekg(0); file.seekg(0);
auto remaining = [&](int byteCount) { auto remaining = [&](int byteCount) {
const streamoff filePosition = file.tellg(); const streamoff filePosition = file.tellg();
return byteCount <= fileSize - filePosition; return byteCount <= fileSize - filePosition;
}; };
// Read header // Read header
if (!remaining(10)) { if (!remaining(10)) {
throw runtime_error("WAVE file is corrupt. Header not found."); throw runtime_error("WAVE file is corrupt. Header not found.");
} }
const auto rootChunkId = read<uint32_t>(file); const auto rootChunkId = read<uint32_t>(file);
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) { if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
throw runtime_error("Unknown file format. Only WAVE files are supported."); throw runtime_error("Unknown file format. Only WAVE files are supported.");
} }
read<uint32_t>(file); // Chunk size read<uint32_t>(file); // Chunk size
const uint32_t waveId = read<uint32_t>(file); const uint32_t waveId = read<uint32_t>(file);
if (waveId != fourcc('W', 'A', 'V', 'E')) { if (waveId != fourcc('W', 'A', 'V', 'E')) {
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId))); throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
} }
// Read chunks until we reach the data chunk // Read chunks until we reach the data chunk
bool processedFormatChunk = false; bool processedFormatChunk = false;
bool processedDataChunk = false; bool processedDataChunk = false;
while ((!processedFormatChunk || !processedDataChunk) && remaining(8)) { while ((!processedFormatChunk || !processedDataChunk) && remaining(8)) {
const uint32_t chunkId = read<uint32_t>(file); const uint32_t chunkId = read<uint32_t>(file);
const streamoff chunkSize = read<int32_t>(file); const streamoff chunkSize = read<int32_t>(file);
const streamoff chunkEnd = roundUpToEven(file.tellg() + chunkSize); const streamoff chunkEnd = roundUpToEven(file.tellg() + chunkSize);
switch (chunkId) { switch (chunkId) {
case fourcc('f', 'm', 't', ' '): case fourcc('f', 'm', 't', ' '):
{ {
// Read relevant data // Read relevant data
uint16_t codec = read<uint16_t>(file); uint16_t codec = read<uint16_t>(file);
formatInfo.channelCount = read<uint16_t>(file); formatInfo.channelCount = read<uint16_t>(file);
formatInfo.frameRate = read<int32_t>(file); formatInfo.frameRate = read<int32_t>(file);
read<uint32_t>(file); // Bytes per second read<uint32_t>(file); // Bytes per second
const int bytesPerFrame = read<uint16_t>(file); const int bytesPerFrame = read<uint16_t>(file);
const int bitsPerSampleOnDisk = read<uint16_t>(file); const int bitsPerSampleOnDisk = read<uint16_t>(file);
int bitsPerSample = bitsPerSampleOnDisk; int bitsPerSample = bitsPerSampleOnDisk;
if (chunkSize > 16) { if (chunkSize > 16) {
const int extensionSize = read<uint16_t>(file); const int extensionSize = read<uint16_t>(file);
if (extensionSize >= 22) { if (extensionSize >= 22) {
// Read extension fields // Read extension fields
bitsPerSample = read<uint16_t>(file); bitsPerSample = read<uint16_t>(file);
read<uint32_t>(file); // Skip channel mask read<uint32_t>(file); // Skip channel mask
const uint16_t codecOverride = read<uint16_t>(file); const uint16_t codecOverride = read<uint16_t>(file);
if (codec == Codec::Extensible) { if (codec == Codec::Extensible) {
codec = codecOverride; codec = codecOverride;
} }
} }
} }
// Determine sample format // Determine sample format
int bytesPerSample; int bytesPerSample;
switch (codec) { switch (codec) {
case Codec::Pcm: case Codec::Pcm:
// Determine sample size. // Determine sample size.
// According to the WAVE standard, sample sizes that are not multiples of 8 // According to the WAVE standard, sample sizes that are not multiples of 8
// bits (e.g. 12 bits) can be treated like the next-larger byte size. // bits (e.g. 12 bits) can be treated like the next-larger byte size.
if (bitsPerSample == 8) { if (bitsPerSample == 8) {
formatInfo.sampleFormat = SampleFormat::UInt8; formatInfo.sampleFormat = SampleFormat::UInt8;
bytesPerSample = 1; bytesPerSample = 1;
} else if (bitsPerSample <= 16) { } else if (bitsPerSample <= 16) {
formatInfo.sampleFormat = SampleFormat::Int16; formatInfo.sampleFormat = SampleFormat::Int16;
bytesPerSample = 2; bytesPerSample = 2;
} else if (bitsPerSample <= 24) { } else if (bitsPerSample <= 24) {
formatInfo.sampleFormat = SampleFormat::Int24; formatInfo.sampleFormat = SampleFormat::Int24;
bytesPerSample = 3; bytesPerSample = 3;
} else if (bitsPerSample <= 32) { } else if (bitsPerSample <= 32) {
formatInfo.sampleFormat = SampleFormat::Int32; formatInfo.sampleFormat = SampleFormat::Int32;
bytesPerSample = 4; bytesPerSample = 4;
} else { } else {
throw runtime_error( throw runtime_error(
format("Unsupported sample format: {}-bit PCM.", bitsPerSample)); format("Unsupported sample format: {}-bit PCM.", bitsPerSample));
} }
if (bytesPerSample != bytesPerFrame / formatInfo.channelCount) { if (bytesPerSample != bytesPerFrame / formatInfo.channelCount) {
throw runtime_error("Unsupported sample organization."); throw runtime_error("Unsupported sample organization.");
} }
break; break;
case Codec::Float: case Codec::Float:
if (bitsPerSample == 32) { if (bitsPerSample == 32) {
formatInfo.sampleFormat = SampleFormat::Float32; formatInfo.sampleFormat = SampleFormat::Float32;
bytesPerSample = 4; bytesPerSample = 4;
} else if (bitsPerSample == 64) { } else if (bitsPerSample == 64) {
formatInfo.sampleFormat = SampleFormat::Float64; formatInfo.sampleFormat = SampleFormat::Float64;
bytesPerSample = 8; bytesPerSample = 8;
} else { } else {
throw runtime_error( throw runtime_error(
format("Unsupported sample format: {}-bit IEEE Float.", bitsPerSample) format("Unsupported sample format: {}-bit IEEE Float.", bitsPerSample)
); );
} }
break; break;
default: default:
throw runtime_error(format( throw runtime_error(format(
"Unsupported audio codec: '{}'. Only uncompressed codecs ('{}' and '{}') are supported.", "Unsupported audio codec: '{}'. Only uncompressed codecs ('{}' and '{}') are supported.",
codecToString(codec), codecToString(Codec::Pcm), codecToString(Codec::Float) codecToString(codec), codecToString(Codec::Pcm), codecToString(Codec::Float)
)); ));
} }
formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount; formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount;
processedFormatChunk = true; processedFormatChunk = true;
break; break;
} }
case fourcc('d', 'a', 't', 'a'): case fourcc('d', 'a', 't', 'a'):
{ {
formatInfo.dataOffset = file.tellg(); formatInfo.dataOffset = file.tellg();
formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame; formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame;
processedDataChunk = true; processedDataChunk = true;
break; break;
} }
default: default:
{ {
// Ignore unknown chunk // Ignore unknown chunk
break; break;
} }
} }
// Seek to end of chunk // Seek to end of chunk
file.seekg(chunkEnd, std::ios_base::beg); file.seekg(chunkEnd, std::ios_base::beg);
} }
if (!processedFormatChunk) throw runtime_error("Missing format chunk."); if (!processedFormatChunk) throw runtime_error("Missing format chunk.");
if (!processedDataChunk) throw runtime_error("Missing data chunk."); if (!processedDataChunk) throw runtime_error("Missing data chunk.");
return formatInfo; return formatInfo;
} }
WaveFileReader::WaveFileReader(const path& filePath) : WaveFileReader::WaveFileReader(const path& filePath) :
filePath(filePath), filePath(filePath),
formatInfo(getWaveFormatInfo(filePath)) {} formatInfo(getWaveFormatInfo(filePath)) {}
unique_ptr<AudioClip> WaveFileReader::clone() const { unique_ptr<AudioClip> WaveFileReader::clone() const {
return make_unique<WaveFileReader>(*this); return make_unique<WaveFileReader>(*this);
} }
inline AudioClip::value_type readSample( inline AudioClip::value_type readSample(
std::ifstream& file, std::ifstream& file,
SampleFormat sampleFormat, SampleFormat sampleFormat,
int channelCount int channelCount
) { ) {
float sum = 0; float sum = 0;
for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) { for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) {
switch (sampleFormat) { switch (sampleFormat) {
case SampleFormat::UInt8: case SampleFormat::UInt8:
{ {
const uint8_t raw = read<uint8_t>(file); const uint8_t raw = read<uint8_t>(file);
sum += toNormalizedFloat(raw, 0, UINT8_MAX); sum += toNormalizedFloat(raw, 0, UINT8_MAX);
break; break;
} }
case SampleFormat::Int16: case SampleFormat::Int16:
{ {
const int16_t raw = read<int16_t>(file); const int16_t raw = read<int16_t>(file);
sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX); sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
break; break;
} }
case SampleFormat::Int24: case SampleFormat::Int24:
{ {
int raw = read<int, 24>(file); int raw = read<int, 24>(file);
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX); sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
break; break;
} }
case SampleFormat::Int32: case SampleFormat::Int32:
{ {
const int32_t raw = read<int32_t>(file); const int32_t raw = read<int32_t>(file);
sum += toNormalizedFloat(raw, INT32_MIN, INT32_MAX); sum += toNormalizedFloat(raw, INT32_MIN, INT32_MAX);
break; break;
} }
case SampleFormat::Float32: case SampleFormat::Float32:
{ {
sum += read<float>(file); sum += read<float>(file);
break; break;
} }
case SampleFormat::Float64: case SampleFormat::Float64:
{ {
sum += static_cast<float>(read<double>(file)); sum += static_cast<float>(read<double>(file));
break; break;
} }
} }
} }
return sum / channelCount; return sum / channelCount;
} }
SampleReader WaveFileReader::createUnsafeSampleReader() const { SampleReader WaveFileReader::createUnsafeSampleReader() const {
return return
[ [
formatInfo = formatInfo, formatInfo = formatInfo,
file = std::make_shared<std::ifstream>(openFile(filePath)), file = std::make_shared<std::ifstream>(openFile(filePath)),
filePos = std::streampos(0) filePos = std::streampos(0)
](size_type index) mutable { ](size_type index) mutable {
const std::streampos newFilePos = formatInfo.dataOffset const std::streampos newFilePos = formatInfo.dataOffset
+ static_cast<streamoff>(index * formatInfo.bytesPerFrame); + static_cast<streamoff>(index * formatInfo.bytesPerFrame);
if (newFilePos != filePos) { if (newFilePos != filePos) {
file->seekg(newFilePos); file->seekg(newFilePos);
} }
const value_type result = const value_type result =
readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount); readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount);
filePos = newFilePos + static_cast<streamoff>(formatInfo.bytesPerFrame); filePos = newFilePos + static_cast<streamoff>(formatInfo.bytesPerFrame);
return result; return result;
}; };
} }
string codecToString(int codec) { string codecToString(int codec) {
switch (codec) { switch (codec) {
case 0x0001: return "PCM"; case 0x0001: return "PCM";
case 0x0002: return "Microsoft ADPCM"; case 0x0002: return "Microsoft ADPCM";
case 0x0003: return "IEEE Float"; case 0x0003: return "IEEE Float";
case 0x0004: return "Compaq VSELP"; case 0x0004: return "Compaq VSELP";
case 0x0005: return "IBM CVSD"; case 0x0005: return "IBM CVSD";
case 0x0006: return "Microsoft a-Law"; case 0x0006: return "Microsoft a-Law";
case 0x0007: return "Microsoft u-Law"; case 0x0007: return "Microsoft u-Law";
case 0x0008: return "Microsoft DTS"; case 0x0008: return "Microsoft DTS";
case 0x0009: return "DRM"; case 0x0009: return "DRM";
case 0x000a: return "WMA 9 Speech"; case 0x000a: return "WMA 9 Speech";
case 0x000b: return "Microsoft Windows Media RT Voice"; case 0x000b: return "Microsoft Windows Media RT Voice";
case 0x0010: return "OKI-ADPCM"; case 0x0010: return "OKI-ADPCM";
case 0x0011: return "Intel IMA/DVI-ADPCM"; case 0x0011: return "Intel IMA/DVI-ADPCM";
case 0x0012: return "Videologic Mediaspace ADPCM"; case 0x0012: return "Videologic Mediaspace ADPCM";
case 0x0013: return "Sierra ADPCM"; case 0x0013: return "Sierra ADPCM";
case 0x0014: return "Antex G.723 ADPCM"; case 0x0014: return "Antex G.723 ADPCM";
case 0x0015: return "DSP Solutions DIGISTD"; case 0x0015: return "DSP Solutions DIGISTD";
case 0x0016: return "DSP Solutions DIGIFIX"; case 0x0016: return "DSP Solutions DIGIFIX";
case 0x0017: return "Dialoic OKI ADPCM"; case 0x0017: return "Dialoic OKI ADPCM";
case 0x0018: return "Media Vision ADPCM"; case 0x0018: return "Media Vision ADPCM";
case 0x0019: return "HP CU"; case 0x0019: return "HP CU";
case 0x001a: return "HP Dynamic Voice"; case 0x001a: return "HP Dynamic Voice";
case 0x0020: return "Yamaha ADPCM"; case 0x0020: return "Yamaha ADPCM";
case 0x0021: return "SONARC Speech Compression"; case 0x0021: return "SONARC Speech Compression";
case 0x0022: return "DSP Group True Speech"; case 0x0022: return "DSP Group True Speech";
case 0x0023: return "Echo Speech Corp."; case 0x0023: return "Echo Speech Corp.";
case 0x0024: return "Virtual Music Audiofile AF36"; case 0x0024: return "Virtual Music Audiofile AF36";
case 0x0025: return "Audio Processing Tech."; case 0x0025: return "Audio Processing Tech.";
case 0x0026: return "Virtual Music Audiofile AF10"; case 0x0026: return "Virtual Music Audiofile AF10";
case 0x0027: return "Aculab Prosody 1612"; case 0x0027: return "Aculab Prosody 1612";
case 0x0028: return "Merging Tech. LRC"; case 0x0028: return "Merging Tech. LRC";
case 0x0030: return "Dolby AC2"; case 0x0030: return "Dolby AC2";
case 0x0031: return "Microsoft GSM610"; case 0x0031: return "Microsoft GSM610";
case 0x0032: return "MSN Audio"; case 0x0032: return "MSN Audio";
case 0x0033: return "Antex ADPCME"; case 0x0033: return "Antex ADPCME";
case 0x0034: return "Control Resources VQLPC"; case 0x0034: return "Control Resources VQLPC";
case 0x0035: return "DSP Solutions DIGIREAL"; case 0x0035: return "DSP Solutions DIGIREAL";
case 0x0036: return "DSP Solutions DIGIADPCM"; case 0x0036: return "DSP Solutions DIGIADPCM";
case 0x0037: return "Control Resources CR10"; case 0x0037: return "Control Resources CR10";
case 0x0038: return "Natural MicroSystems VBX ADPCM"; case 0x0038: return "Natural MicroSystems VBX ADPCM";
case 0x0039: return "Crystal Semiconductor IMA ADPCM"; case 0x0039: return "Crystal Semiconductor IMA ADPCM";
case 0x003a: return "Echo Speech ECHOSC3"; case 0x003a: return "Echo Speech ECHOSC3";
case 0x003b: return "Rockwell ADPCM"; case 0x003b: return "Rockwell ADPCM";
case 0x003c: return "Rockwell DIGITALK"; case 0x003c: return "Rockwell DIGITALK";
case 0x003d: return "Xebec Multimedia"; case 0x003d: return "Xebec Multimedia";
case 0x0040: return "Antex G.721 ADPCM"; case 0x0040: return "Antex G.721 ADPCM";
case 0x0041: return "Antex G.728 CELP"; case 0x0041: return "Antex G.728 CELP";
case 0x0042: return "Microsoft MSG723"; case 0x0042: return "Microsoft MSG723";
case 0x0043: return "IBM AVC ADPCM"; case 0x0043: return "IBM AVC ADPCM";
case 0x0045: return "ITU-T G.726"; case 0x0045: return "ITU-T G.726";
case 0x0050: return "Microsoft MPEG"; case 0x0050: return "Microsoft MPEG";
case 0x0051: return "RT23 or PAC"; case 0x0051: return "RT23 or PAC";
case 0x0052: return "InSoft RT24"; case 0x0052: return "InSoft RT24";
case 0x0053: return "InSoft PAC"; case 0x0053: return "InSoft PAC";
case 0x0055: return "MP3"; case 0x0055: return "MP3";
case 0x0059: return "Cirrus"; case 0x0059: return "Cirrus";
case 0x0060: return "Cirrus Logic"; case 0x0060: return "Cirrus Logic";
case 0x0061: return "ESS Tech. PCM"; case 0x0061: return "ESS Tech. PCM";
case 0x0062: return "Voxware Inc."; case 0x0062: return "Voxware Inc.";
case 0x0063: return "Canopus ATRAC"; case 0x0063: return "Canopus ATRAC";
case 0x0064: return "APICOM G.726 ADPCM"; case 0x0064: return "APICOM G.726 ADPCM";
case 0x0065: return "APICOM G.722 ADPCM"; case 0x0065: return "APICOM G.722 ADPCM";
case 0x0066: return "Microsoft DSAT"; case 0x0066: return "Microsoft DSAT";
case 0x0067: return "Micorsoft DSAT DISPLAY"; case 0x0067: return "Micorsoft DSAT DISPLAY";
case 0x0069: return "Voxware Byte Aligned"; case 0x0069: return "Voxware Byte Aligned";
case 0x0070: return "Voxware AC8"; case 0x0070: return "Voxware AC8";
case 0x0071: return "Voxware AC10"; case 0x0071: return "Voxware AC10";
case 0x0072: return "Voxware AC16"; case 0x0072: return "Voxware AC16";
case 0x0073: return "Voxware AC20"; case 0x0073: return "Voxware AC20";
case 0x0074: return "Voxware MetaVoice"; case 0x0074: return "Voxware MetaVoice";
case 0x0075: return "Voxware MetaSound"; case 0x0075: return "Voxware MetaSound";
case 0x0076: return "Voxware RT29HW"; case 0x0076: return "Voxware RT29HW";
case 0x0077: return "Voxware VR12"; case 0x0077: return "Voxware VR12";
case 0x0078: return "Voxware VR18"; case 0x0078: return "Voxware VR18";
case 0x0079: return "Voxware TQ40"; case 0x0079: return "Voxware TQ40";
case 0x007a: return "Voxware SC3"; case 0x007a: return "Voxware SC3";
case 0x007b: return "Voxware SC3"; case 0x007b: return "Voxware SC3";
case 0x0080: return "Soundsoft"; case 0x0080: return "Soundsoft";
case 0x0081: return "Voxware TQ60"; case 0x0081: return "Voxware TQ60";
case 0x0082: return "Microsoft MSRT24"; case 0x0082: return "Microsoft MSRT24";
case 0x0083: return "AT&T G.729A"; case 0x0083: return "AT&T G.729A";
case 0x0084: return "Motion Pixels MVI MV12"; case 0x0084: return "Motion Pixels MVI MV12";
case 0x0085: return "DataFusion G.726"; case 0x0085: return "DataFusion G.726";
case 0x0086: return "DataFusion GSM610"; case 0x0086: return "DataFusion GSM610";
case 0x0088: return "Iterated Systems Audio"; case 0x0088: return "Iterated Systems Audio";
case 0x0089: return "Onlive"; case 0x0089: return "Onlive";
case 0x008a: return "Multitude, Inc. FT SX20"; case 0x008a: return "Multitude, Inc. FT SX20";
case 0x008b: return "Infocom ITS A/S G.721 ADPCM"; case 0x008b: return "Infocom ITS A/S G.721 ADPCM";
case 0x008c: return "Convedia G729"; case 0x008c: return "Convedia G729";
case 0x008d: return "Not specified congruency, Inc."; case 0x008d: return "Not specified congruency, Inc.";
case 0x0091: return "Siemens SBC24"; case 0x0091: return "Siemens SBC24";
case 0x0092: return "Sonic Foundry Dolby AC3 APDIF"; case 0x0092: return "Sonic Foundry Dolby AC3 APDIF";
case 0x0093: return "MediaSonic G.723"; case 0x0093: return "MediaSonic G.723";
case 0x0094: return "Aculab Prosody 8kbps"; case 0x0094: return "Aculab Prosody 8kbps";
case 0x0097: return "ZyXEL ADPCM"; case 0x0097: return "ZyXEL ADPCM";
case 0x0098: return "Philips LPCBB"; case 0x0098: return "Philips LPCBB";
case 0x0099: return "Studer Professional Audio Packed"; case 0x0099: return "Studer Professional Audio Packed";
case 0x00a0: return "Malden PhonyTalk"; case 0x00a0: return "Malden PhonyTalk";
case 0x00a1: return "Racal Recorder GSM"; case 0x00a1: return "Racal Recorder GSM";
case 0x00a2: return "Racal Recorder G720.a"; case 0x00a2: return "Racal Recorder G720.a";
case 0x00a3: return "Racal G723.1"; case 0x00a3: return "Racal G723.1";
case 0x00a4: return "Racal Tetra ACELP"; case 0x00a4: return "Racal Tetra ACELP";
case 0x00b0: return "NEC AAC NEC Corporation"; case 0x00b0: return "NEC AAC NEC Corporation";
case 0x00ff: return "AAC"; case 0x00ff: return "AAC";
case 0x0100: return "Rhetorex ADPCM"; case 0x0100: return "Rhetorex ADPCM";
case 0x0101: return "IBM u-Law"; case 0x0101: return "IBM u-Law";
case 0x0102: return "IBM a-Law"; case 0x0102: return "IBM a-Law";
case 0x0103: return "IBM ADPCM"; case 0x0103: return "IBM ADPCM";
case 0x0111: return "Vivo G.723"; case 0x0111: return "Vivo G.723";
case 0x0112: return "Vivo Siren"; case 0x0112: return "Vivo Siren";
case 0x0120: return "Philips Speech Processing CELP"; case 0x0120: return "Philips Speech Processing CELP";
case 0x0121: return "Philips Speech Processing GRUNDIG"; case 0x0121: return "Philips Speech Processing GRUNDIG";
case 0x0123: return "Digital G.723"; case 0x0123: return "Digital G.723";
case 0x0125: return "Sanyo LD ADPCM"; case 0x0125: return "Sanyo LD ADPCM";
case 0x0130: return "Sipro Lab ACEPLNET"; case 0x0130: return "Sipro Lab ACEPLNET";
case 0x0131: return "Sipro Lab ACELP4800"; case 0x0131: return "Sipro Lab ACELP4800";
case 0x0132: return "Sipro Lab ACELP8V3"; case 0x0132: return "Sipro Lab ACELP8V3";
case 0x0133: return "Sipro Lab G.729"; case 0x0133: return "Sipro Lab G.729";
case 0x0134: return "Sipro Lab G.729A"; case 0x0134: return "Sipro Lab G.729A";
case 0x0135: return "Sipro Lab Kelvin"; case 0x0135: return "Sipro Lab Kelvin";
case 0x0136: return "VoiceAge AMR"; case 0x0136: return "VoiceAge AMR";
case 0x0140: return "Dictaphone G.726 ADPCM"; case 0x0140: return "Dictaphone G.726 ADPCM";
case 0x0150: return "Qualcomm PureVoice"; case 0x0150: return "Qualcomm PureVoice";
case 0x0151: return "Qualcomm HalfRate"; case 0x0151: return "Qualcomm HalfRate";
case 0x0155: return "Ring Zero Systems TUBGSM"; case 0x0155: return "Ring Zero Systems TUBGSM";
case 0x0160: return "Microsoft Audio1"; case 0x0160: return "Microsoft Audio1";
case 0x0161: return "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio"; case 0x0161: return "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio";
case 0x0162: return "Windows Media Audio Professional V9"; case 0x0162: return "Windows Media Audio Professional V9";
case 0x0163: return "Windows Media Audio Lossless V9"; case 0x0163: return "Windows Media Audio Lossless V9";
case 0x0164: return "WMA Pro over S/PDIF"; case 0x0164: return "WMA Pro over S/PDIF";
case 0x0170: return "UNISYS NAP ADPCM"; case 0x0170: return "UNISYS NAP ADPCM";
case 0x0171: return "UNISYS NAP ULAW"; case 0x0171: return "UNISYS NAP ULAW";
case 0x0172: return "UNISYS NAP ALAW"; case 0x0172: return "UNISYS NAP ALAW";
case 0x0173: return "UNISYS NAP 16K"; case 0x0173: return "UNISYS NAP 16K";
case 0x0174: return "MM SYCOM ACM SYC008 SyCom Technologies"; case 0x0174: return "MM SYCOM ACM SYC008 SyCom Technologies";
case 0x0175: return "MM SYCOM ACM SYC701 G726L SyCom Technologies"; case 0x0175: return "MM SYCOM ACM SYC701 G726L SyCom Technologies";
case 0x0176: return "MM SYCOM ACM SYC701 CELP54 SyCom Technologies"; case 0x0176: return "MM SYCOM ACM SYC701 CELP54 SyCom Technologies";
case 0x0177: return "MM SYCOM ACM SYC701 CELP68 SyCom Technologies"; case 0x0177: return "MM SYCOM ACM SYC701 CELP68 SyCom Technologies";
case 0x0178: return "Knowledge Adventure ADPCM"; case 0x0178: return "Knowledge Adventure ADPCM";
case 0x0180: return "Fraunhofer IIS MPEG2AAC"; case 0x0180: return "Fraunhofer IIS MPEG2AAC";
case 0x0190: return "Digital Theater Systems DTS DS"; case 0x0190: return "Digital Theater Systems DTS DS";
case 0x0200: return "Creative Labs ADPCM"; case 0x0200: return "Creative Labs ADPCM";
case 0x0202: return "Creative Labs FASTSPEECH8"; case 0x0202: return "Creative Labs FASTSPEECH8";
case 0x0203: return "Creative Labs FASTSPEECH10"; case 0x0203: return "Creative Labs FASTSPEECH10";
case 0x0210: return "UHER ADPCM"; case 0x0210: return "UHER ADPCM";
case 0x0215: return "Ulead DV ACM"; case 0x0215: return "Ulead DV ACM";
case 0x0216: return "Ulead DV ACM"; case 0x0216: return "Ulead DV ACM";
case 0x0220: return "Quarterdeck Corp."; case 0x0220: return "Quarterdeck Corp.";
case 0x0230: return "I-Link VC"; case 0x0230: return "I-Link VC";
case 0x0240: return "Aureal Semiconductor Raw Sport"; case 0x0240: return "Aureal Semiconductor Raw Sport";
case 0x0241: return "ESST AC3"; case 0x0241: return "ESST AC3";
case 0x0250: return "Interactive Products HSX"; case 0x0250: return "Interactive Products HSX";
case 0x0251: return "Interactive Products RPELP"; case 0x0251: return "Interactive Products RPELP";
case 0x0260: return "Consistent CS2"; case 0x0260: return "Consistent CS2";
case 0x0270: return "Sony SCX"; case 0x0270: return "Sony SCX";
case 0x0271: return "Sony SCY"; case 0x0271: return "Sony SCY";
case 0x0272: return "Sony ATRAC3"; case 0x0272: return "Sony ATRAC3";
case 0x0273: return "Sony SPC"; case 0x0273: return "Sony SPC";
case 0x0280: return "TELUM Telum Inc."; case 0x0280: return "TELUM Telum Inc.";
case 0x0281: return "TELUMIA Telum Inc."; case 0x0281: return "TELUMIA Telum Inc.";
case 0x0285: return "Norcom Voice Systems ADPCM"; case 0x0285: return "Norcom Voice Systems ADPCM";
case 0x0300: return "Fujitsu FM TOWNS SND"; case 0x0300: return "Fujitsu FM TOWNS SND";
case 0x0301: case 0x0301:
case 0x0302: case 0x0302:
case 0x0303: case 0x0303:
case 0x0304: case 0x0304:
case 0x0305: case 0x0305:
case 0x0306: case 0x0306:
case 0x0307: case 0x0307:
case 0x0308: return "Fujitsu (not specified)"; case 0x0308: return "Fujitsu (not specified)";
case 0x0350: return "Micronas Semiconductors, Inc. Development"; case 0x0350: return "Micronas Semiconductors, Inc. Development";
case 0x0351: return "Micronas Semiconductors, Inc. CELP833"; case 0x0351: return "Micronas Semiconductors, Inc. CELP833";
case 0x0400: return "Brooktree Digital"; case 0x0400: return "Brooktree Digital";
case 0x0401: return "Intel Music Coder (IMC)"; case 0x0401: return "Intel Music Coder (IMC)";
case 0x0402: return "Ligos Indeo Audio"; case 0x0402: return "Ligos Indeo Audio";
case 0x0450: return "QDesign Music"; case 0x0450: return "QDesign Music";
case 0x0500: return "On2 VP7 On2 Technologies"; case 0x0500: return "On2 VP7 On2 Technologies";
case 0x0501: return "On2 VP6 On2 Technologies"; case 0x0501: return "On2 VP6 On2 Technologies";
case 0x0680: return "AT&T VME VMPCM"; case 0x0680: return "AT&T VME VMPCM";
case 0x0681: return "AT&T TCP"; case 0x0681: return "AT&T TCP";
case 0x0700: return "YMPEG Alpha (dummy for MPEG-2 compressor)"; case 0x0700: return "YMPEG Alpha (dummy for MPEG-2 compressor)";
case 0x08ae: return "ClearJump LiteWave (lossless)"; case 0x08ae: return "ClearJump LiteWave (lossless)";
case 0x1000: return "Olivetti GSM"; case 0x1000: return "Olivetti GSM";
case 0x1001: return "Olivetti ADPCM"; case 0x1001: return "Olivetti ADPCM";
case 0x1002: return "Olivetti CELP"; case 0x1002: return "Olivetti CELP";
case 0x1003: return "Olivetti SBC"; case 0x1003: return "Olivetti SBC";
case 0x1004: return "Olivetti OPR"; case 0x1004: return "Olivetti OPR";
case 0x1100: return "Lernout & Hauspie"; case 0x1100: return "Lernout & Hauspie";
case 0x1101: return "Lernout & Hauspie CELP codec"; case 0x1101: return "Lernout & Hauspie CELP codec";
case 0x1102: case 0x1102:
case 0x1103: case 0x1103:
case 0x1104: return "Lernout & Hauspie SBC codec"; case 0x1104: return "Lernout & Hauspie SBC codec";
case 0x1400: return "Norris Comm. Inc."; case 0x1400: return "Norris Comm. Inc.";
case 0x1401: return "ISIAudio"; case 0x1401: return "ISIAudio";
case 0x1500: return "AT&T Soundspace Music Compression"; case 0x1500: return "AT&T Soundspace Music Compression";
case 0x181c: return "VoxWare RT24 speech codec"; case 0x181c: return "VoxWare RT24 speech codec";
case 0x181e: return "Lucent elemedia AX24000P Music codec"; case 0x181e: return "Lucent elemedia AX24000P Music codec";
case 0x1971: return "Sonic Foundry LOSSLESS"; case 0x1971: return "Sonic Foundry LOSSLESS";
case 0x1979: return "Innings Telecom Inc. ADPCM"; case 0x1979: return "Innings Telecom Inc. ADPCM";
case 0x1c07: return "Lucent SX8300P speech codec"; case 0x1c07: return "Lucent SX8300P speech codec";
case 0x1c0c: return "Lucent SX5363S G.723 compliant codec"; case 0x1c0c: return "Lucent SX5363S G.723 compliant codec";
case 0x1f03: return "CUseeMe DigiTalk (ex-Rocwell)"; case 0x1f03: return "CUseeMe DigiTalk (ex-Rocwell)";
case 0x1fc4: return "NCT Soft ALF2CD ACM"; case 0x1fc4: return "NCT Soft ALF2CD ACM";
case 0x2000: return "FAST Multimedia DVM"; case 0x2000: return "FAST Multimedia DVM";
case 0x2001: return "Dolby DTS (Digital Theater System)"; case 0x2001: return "Dolby DTS (Digital Theater System)";
case 0x2002: return "RealAudio 1 / 2 14.4"; case 0x2002: return "RealAudio 1 / 2 14.4";
case 0x2003: return "RealAudio 1 / 2 28.8"; case 0x2003: return "RealAudio 1 / 2 28.8";
case 0x2004: return "RealAudio G2 / 8 Cook (low bitrate)"; case 0x2004: return "RealAudio G2 / 8 Cook (low bitrate)";
case 0x2005: return "RealAudio 3 / 4 / 5 Music (DNET)"; case 0x2005: return "RealAudio 3 / 4 / 5 Music (DNET)";
case 0x2006: return "RealAudio 10 AAC (RAAC)"; case 0x2006: return "RealAudio 10 AAC (RAAC)";
case 0x2007: return "RealAudio 10 AAC+ (RACP)"; case 0x2007: return "RealAudio 10 AAC+ (RACP)";
case 0x2500: return "Reserved range to 0x2600 Microsoft"; case 0x2500: return "Reserved range to 0x2600 Microsoft";
case 0x3313: return "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)"; case 0x3313: return "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)";
case 0x4143: return "Divio MPEG-4 AAC audio"; case 0x4143: return "Divio MPEG-4 AAC audio";
case 0x4201: return "Nokia adaptive multirate"; case 0x4201: return "Nokia adaptive multirate";
case 0x4243: return "Divio G726 Divio, Inc."; case 0x4243: return "Divio G726 Divio, Inc.";
case 0x434c: return "LEAD Speech"; case 0x434c: return "LEAD Speech";
case 0x564c: return "LEAD Vorbis"; case 0x564c: return "LEAD Vorbis";
case 0x5756: return "WavPack Audio"; case 0x5756: return "WavPack Audio";
case 0x674f: return "Ogg Vorbis (mode 1)"; case 0x674f: return "Ogg Vorbis (mode 1)";
case 0x6750: return "Ogg Vorbis (mode 2)"; case 0x6750: return "Ogg Vorbis (mode 2)";
case 0x6751: return "Ogg Vorbis (mode 3)"; case 0x6751: return "Ogg Vorbis (mode 3)";
case 0x676f: return "Ogg Vorbis (mode 1+)"; case 0x676f: return "Ogg Vorbis (mode 1+)";
case 0x6770: return "Ogg Vorbis (mode 2+)"; case 0x6770: return "Ogg Vorbis (mode 2+)";
case 0x6771: return "Ogg Vorbis (mode 3+)"; case 0x6771: return "Ogg Vorbis (mode 3+)";
case 0x7000: return "3COM NBX 3Com Corporation"; case 0x7000: return "3COM NBX 3Com Corporation";
case 0x706d: return "FAAD AAC"; case 0x706d: return "FAAD AAC";
case 0x7a21: return "GSM-AMR (CBR, no SID)"; case 0x7a21: return "GSM-AMR (CBR, no SID)";
case 0x7a22: return "GSM-AMR (VBR, including SID)"; case 0x7a22: return "GSM-AMR (VBR, including SID)";
case 0xa100: return "Comverse Infosys Ltd. G723 1"; case 0xa100: return "Comverse Infosys Ltd. G723 1";
case 0xa101: return "Comverse Infosys Ltd. AVQSBC"; case 0xa101: return "Comverse Infosys Ltd. AVQSBC";
case 0xa102: return "Comverse Infosys Ltd. OLDSBC"; case 0xa102: return "Comverse Infosys Ltd. OLDSBC";
case 0xa103: return "Symbol Technologies G729A"; case 0xa103: return "Symbol Technologies G729A";
case 0xa104: return "VoiceAge AMR WB VoiceAge Corporation"; case 0xa104: return "VoiceAge AMR WB VoiceAge Corporation";
case 0xa105: return "Ingenient Technologies Inc. G726"; case 0xa105: return "Ingenient Technologies Inc. G726";
case 0xa106: return "ISO/MPEG-4 advanced audio Coding"; case 0xa106: return "ISO/MPEG-4 advanced audio Coding";
case 0xa107: return "Encore Software Ltd G726"; case 0xa107: return "Encore Software Ltd G726";
case 0xa109: return "Speex ACM Codec xiph.org"; case 0xa109: return "Speex ACM Codec xiph.org";
case 0xdfac: return "DebugMode SonicFoundry Vegas FrameServer ACM Codec"; case 0xdfac: return "DebugMode SonicFoundry Vegas FrameServer ACM Codec";
case 0xf1ac: return "Free Lossless Audio Codec FLAC"; case 0xf1ac: return "Free Lossless Audio Codec FLAC";
case 0xfffe: return "Extensible"; case 0xfffe: return "Extensible";
case 0xffff: return "Development"; case 0xffff: return "Development";
default: default:
return format("{0:#x}", codec); return format("{0:#x}", codec);
} }
} }

View File

@ -4,43 +4,43 @@
#include "AudioClip.h" #include "AudioClip.h"
enum class SampleFormat { enum class SampleFormat {
UInt8, UInt8,
Int16, Int16,
Int24, Int24,
Int32, Int32,
Float32, Float32,
Float64 Float64
}; };
struct WaveFormatInfo { struct WaveFormatInfo {
int bytesPerFrame; int bytesPerFrame;
SampleFormat sampleFormat; SampleFormat sampleFormat;
int frameRate; int frameRate;
int64_t frameCount; int64_t frameCount;
int channelCount; int channelCount;
std::streampos dataOffset; std::streampos dataOffset;
}; };
WaveFormatInfo getWaveFormatInfo(const std::filesystem::path& filePath); WaveFormatInfo getWaveFormatInfo(const std::filesystem::path& filePath);
class WaveFileReader : public AudioClip { class WaveFileReader : public AudioClip {
public: public:
WaveFileReader(const std::filesystem::path& filePath); WaveFileReader(const std::filesystem::path& filePath);
std::unique_ptr<AudioClip> clone() const override; std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override; int getSampleRate() const override;
size_type size() const override; size_type size() const override;
private: private:
SampleReader createUnsafeSampleReader() const override; SampleReader createUnsafeSampleReader() const override;
std::filesystem::path filePath; std::filesystem::path filePath;
WaveFormatInfo formatInfo; WaveFormatInfo formatInfo;
}; };
inline int WaveFileReader::getSampleRate() const { inline int WaveFileReader::getSampleRate() const {
return formatInfo.frameRate; return formatInfo.frameRate;
} }
inline AudioClip::size_type WaveFileReader::size() const { inline AudioClip::size_type WaveFileReader::size() const {
return formatInfo.frameCount; return formatInfo.frameCount;
} }

View File

@ -10,20 +10,20 @@ using std::runtime_error;
using fmt::format; using fmt::format;
std::unique_ptr<AudioClip> createAudioFileClip(path filePath) { std::unique_ptr<AudioClip> createAudioFileClip(path filePath) {
try { try {
const string extension = const string extension =
boost::algorithm::to_lower_copy(filePath.extension().u8string()); boost::algorithm::to_lower_copy(filePath.extension().u8string());
if (extension == ".wav") { if (extension == ".wav") {
return std::make_unique<WaveFileReader>(filePath); return std::make_unique<WaveFileReader>(filePath);
} }
if (extension == ".ogg") { if (extension == ".ogg") {
return std::make_unique<OggVorbisFileReader>(filePath); return std::make_unique<OggVorbisFileReader>(filePath);
} }
throw runtime_error(format( throw runtime_error(format(
"Unsupported file extension '{}'. Supported extensions are '.wav' and '.ogg'.", "Unsupported file extension '{}'. Supported extensions are '.wav' and '.ogg'.",
extension extension
)); ));
} catch (...) { } catch (...) {
std::throw_with_nested(runtime_error(format("Could not open sound file {}.", filePath.u8string()))); std::throw_with_nested(runtime_error(format("Could not open sound file {}.", filePath.u8string())));
} }
} }

View File

@ -4,43 +4,43 @@
namespace little_endian { namespace little_endian {
template<typename Type, int bitsToRead = 8 * sizeof(Type)> template<typename Type, int bitsToRead = 8 * sizeof(Type)>
Type read(std::istream& stream) { Type read(std::istream& stream) {
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes."); static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size."); static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
Type result = 0; Type result = 0;
char* p = reinterpret_cast<char*>(&result); char* p = reinterpret_cast<char*>(&result);
const int bytesToRead = bitsToRead / 8; const int bytesToRead = bitsToRead / 8;
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) { for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
*(p + byteIndex) = static_cast<char>(stream.get()); *(p + byteIndex) = static_cast<char>(stream.get());
} }
return result; return result;
} }
template<typename Type, int bitsToWrite = 8 * sizeof(Type)> template<typename Type, int bitsToWrite = 8 * sizeof(Type)>
void write(Type value, std::ostream& stream) { void write(Type value, std::ostream& stream) {
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes."); static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size."); static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
char* p = reinterpret_cast<char*>(&value); char* p = reinterpret_cast<char*>(&value);
const int bytesToWrite = bitsToWrite / 8; const int bytesToWrite = bitsToWrite / 8;
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) { for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
stream.put(*(p + byteIndex)); stream.put(*(p + byteIndex));
} }
} }
constexpr uint32_t fourcc( constexpr uint32_t fourcc(
unsigned char c0, unsigned char c0,
unsigned char c1, unsigned char c1,
unsigned char c2, unsigned char c2,
unsigned char c3 unsigned char c3
) { ) {
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
} }
inline std::string fourccToString(uint32_t fourcc) { inline std::string fourccToString(uint32_t fourcc) {
return std::string(reinterpret_cast<char*>(&fourcc), 4); return std::string(reinterpret_cast<char*>(&fourcc), 4);
} }
} }

View File

@ -6,53 +6,53 @@ using std::vector;
// Converts a float in the range -1..1 to a signed 16-bit int // Converts a float in the range -1..1 to a signed 16-bit int
inline int16_t floatSampleToInt16(float sample) { inline int16_t floatSampleToInt16(float sample) {
sample = std::max(sample, -1.0f); sample = std::max(sample, -1.0f);
sample = std::min(sample, 1.0f); sample = std::min(sample, 1.0f);
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN); return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
} }
void process16bitAudioClip( void process16bitAudioClip(
const AudioClip& audioClip, const AudioClip& audioClip,
const function<void(const vector<int16_t>&)>& processBuffer, const function<void(const vector<int16_t>&)>& processBuffer,
size_t bufferCapacity, size_t bufferCapacity,
ProgressSink& progressSink ProgressSink& progressSink
) { ) {
// Process entire sound stream // Process entire sound stream
vector<int16_t> buffer; vector<int16_t> buffer;
buffer.reserve(bufferCapacity); buffer.reserve(bufferCapacity);
size_t sampleCount = 0; size_t sampleCount = 0;
auto it = audioClip.begin(); auto it = audioClip.begin();
const auto end = audioClip.end(); const auto end = audioClip.end();
do { do {
// Read to buffer // Read to buffer
buffer.clear(); buffer.clear();
for (; buffer.size() < bufferCapacity && it != end; ++it) { for (; buffer.size() < bufferCapacity && it != end; ++it) {
// Read sample to buffer // Read sample to buffer
buffer.push_back(floatSampleToInt16(*it)); buffer.push_back(floatSampleToInt16(*it));
} }
// Process buffer // Process buffer
processBuffer(buffer); processBuffer(buffer);
sampleCount += buffer.size(); sampleCount += buffer.size();
progressSink.reportProgress(static_cast<double>(sampleCount) / static_cast<double>(audioClip.size())); progressSink.reportProgress(static_cast<double>(sampleCount) / static_cast<double>(audioClip.size()));
} while (!buffer.empty()); } while (!buffer.empty());
} }
void process16bitAudioClip( void process16bitAudioClip(
const AudioClip& audioClip, const AudioClip& audioClip,
const function<void(const vector<int16_t>&)>& processBuffer, const function<void(const vector<int16_t>&)>& processBuffer,
ProgressSink& progressSink ProgressSink& progressSink
) { ) {
const size_t capacity = 1600; // 0.1 second capacity const size_t capacity = 1600; // 0.1 second capacity
process16bitAudioClip(audioClip, processBuffer, capacity, progressSink); process16bitAudioClip(audioClip, processBuffer, capacity, progressSink);
} }
vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip) { vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip) {
vector<int16_t> result(static_cast<size_t>(audioClip.size())); vector<int16_t> result(static_cast<size_t>(audioClip.size()));
int index = 0; int index = 0;
for (float sample : audioClip) { for (float sample : audioClip) {
result[index++] = floatSampleToInt16(sample); result[index++] = floatSampleToInt16(sample);
} }
return result; return result;
} }

View File

@ -6,16 +6,16 @@
#include "tools/progress.h" #include "tools/progress.h"
void process16bitAudioClip( void process16bitAudioClip(
const AudioClip& audioClip, const AudioClip& audioClip,
const std::function<void(const std::vector<int16_t>&)>& processBuffer, const std::function<void(const std::vector<int16_t>&)>& processBuffer,
size_t bufferCapacity, size_t bufferCapacity,
ProgressSink& progressSink ProgressSink& progressSink
); );
void process16bitAudioClip( void process16bitAudioClip(
const AudioClip& audioClip, const AudioClip& audioClip,
const std::function<void(const std::vector<int16_t>&)>& processBuffer, const std::function<void(const std::vector<int16_t>&)>& processBuffer,
ProgressSink& progressSink ProgressSink& progressSink
); );
std::vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip); std::vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip);

View File

@ -17,79 +17,79 @@ using std::runtime_error;
using std::unique_ptr; using std::unique_ptr;
JoiningBoundedTimeline<void> detectVoiceActivity( JoiningBoundedTimeline<void> detectVoiceActivity(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
ProgressSink& progressSink ProgressSink& progressSink
) { ) {
// Prepare audio for VAD // Prepare audio for VAD
constexpr int webRtcSamplingRate = 8000; constexpr int webRtcSamplingRate = 8000;
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() const unique_ptr<AudioClip> audioClip = inputAudioClip.clone()
| resample(webRtcSamplingRate) | resample(webRtcSamplingRate)
| removeDcOffset(); | removeDcOffset();
VadInst* vadHandle = WebRtcVad_Create(); VadInst* vadHandle = WebRtcVad_Create();
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle."); if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
auto freeHandle = gsl::finally([&]() { WebRtcVad_Free(vadHandle); }); auto freeHandle = gsl::finally([&]() { WebRtcVad_Free(vadHandle); });
int error = WebRtcVad_Init(vadHandle); int error = WebRtcVad_Init(vadHandle);
if (error) throw runtime_error("Error initializing WebRTC VAD."); if (error) throw runtime_error("Error initializing WebRTC VAD.");
const int aggressiveness = 2; // 0..3. The higher, the more is cut off. const int aggressiveness = 2; // 0..3. The higher, the more is cut off.
error = WebRtcVad_set_mode(vadHandle, aggressiveness); error = WebRtcVad_set_mode(vadHandle, aggressiveness);
if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness."); if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness.");
// Detect activity // Detect activity
JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange()); JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange());
centiseconds time = 0_cs; centiseconds time = 0_cs;
const size_t frameSize = webRtcSamplingRate / 100; const size_t frameSize = webRtcSamplingRate / 100;
const auto processBuffer = [&](const vector<int16_t>& buffer) { const auto processBuffer = [&](const vector<int16_t>& buffer) {
// WebRTC is picky regarding buffer size // WebRTC is picky regarding buffer size
if (buffer.size() < frameSize) return; if (buffer.size() < frameSize) return;
const int result = WebRtcVad_Process( const int result = WebRtcVad_Process(
vadHandle, vadHandle,
webRtcSamplingRate, webRtcSamplingRate,
buffer.data(), buffer.data(),
buffer.size() buffer.size()
); );
if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD."); if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD.");
// Ignore the result of WebRtcVad_Process, instead directly interpret the internal VAD flag. // Ignore the result of WebRtcVad_Process, instead directly interpret the internal VAD flag.
// The result of WebRtcVad_Process stays 1 for a number of frames after the last detected // The result of WebRtcVad_Process stays 1 for a number of frames after the last detected
// activity. // activity.
const bool isActive = reinterpret_cast<VadInstT*>(vadHandle)->vad == 1; const bool isActive = reinterpret_cast<VadInstT*>(vadHandle)->vad == 1;
if (isActive) { if (isActive) {
activity.set(time, time + 1_cs); activity.set(time, time + 1_cs);
} }
time += 1_cs; time += 1_cs;
}; };
process16bitAudioClip(*audioClip, processBuffer, frameSize, progressSink); process16bitAudioClip(*audioClip, processBuffer, frameSize, progressSink);
// Fill small gaps in activity // Fill small gaps in activity
const centiseconds maxGap(10); const centiseconds maxGap(10);
for (const auto& pair : getPairs(activity)) { for (const auto& pair : getPairs(activity)) {
if (pair.second.getStart() - pair.first.getEnd() <= maxGap) { if (pair.second.getStart() - pair.first.getEnd() <= maxGap) {
activity.set(pair.first.getEnd(), pair.second.getStart()); activity.set(pair.first.getEnd(), pair.second.getStart());
} }
} }
// Discard very short segments of activity // Discard very short segments of activity
const centiseconds minSegmentLength(5); const centiseconds minSegmentLength(5);
for (const auto& segment : Timeline<void>(activity)) { for (const auto& segment : Timeline<void>(activity)) {
if (segment.getDuration() < minSegmentLength) { if (segment.getDuration() < minSegmentLength) {
activity.clear(segment.getTimeRange()); activity.clear(segment.getTimeRange());
} }
} }
logging::debugFormat( logging::debugFormat(
"Found {} sections of voice activity: {}", "Found {} sections of voice activity: {}",
activity.size(), activity.size(),
join(activity | transformed([](const Timed<void>& t) { join(activity | transformed([](const Timed<void>& t) {
return format("{0}-{1}", t.getStart(), t.getEnd()); return format("{0}-{1}", t.getStart(), t.getEnd());
}), ", ") }), ", ")
); );
return activity; return activity;
} }

View File

@ -4,6 +4,6 @@
#include "tools/progress.h" #include "tools/progress.h"
JoiningBoundedTimeline<void> detectVoiceActivity( JoiningBoundedTimeline<void> detectVoiceActivity(
const AudioClip& audioClip, const AudioClip& audioClip,
ProgressSink& progressSink ProgressSink& progressSink
); );

View File

@ -5,39 +5,39 @@
using namespace little_endian; using namespace little_endian;
void createWaveFile(const AudioClip& audioClip, std::string fileName) { void createWaveFile(const AudioClip& audioClip, std::string fileName) {
// Open file // Open file
std::ofstream file; std::ofstream file;
file.exceptions(std::ofstream::failbit | std::ofstream::badbit); file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
file.open(fileName, std::ios::out | std::ios::binary); file.open(fileName, std::ios::out | std::ios::binary);
// Write RIFF chunk // Write RIFF chunk
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file); write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
const uint32_t formatChunkSize = 16; const uint32_t formatChunkSize = 16;
const uint16_t channelCount = 1; const uint16_t channelCount = 1;
const uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float)); const uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
const uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize); const uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize);
const uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize); const uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
write<uint32_t>(riffChunkSize, file); write<uint32_t>(riffChunkSize, file);
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file); write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
// Write format chunk // Write format chunk
write<uint32_t>(fourcc('f', 'm', 't', ' '), file); write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
write<uint32_t>(formatChunkSize, file); write<uint32_t>(formatChunkSize, file);
const uint16_t codec = 0x03; // 32-bit float const uint16_t codec = 0x03; // 32-bit float
write<uint16_t>(codec, file); write<uint16_t>(codec, file);
write<uint16_t>(channelCount, file); write<uint16_t>(channelCount, file);
const uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate()); const uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate());
write<uint32_t>(frameRate, file); write<uint32_t>(frameRate, file);
const uint32_t bytesPerSecond = frameRate * frameSize; const uint32_t bytesPerSecond = frameRate * frameSize;
write<uint32_t>(bytesPerSecond, file); write<uint32_t>(bytesPerSecond, file);
write<uint16_t>(frameSize, file); write<uint16_t>(frameSize, file);
const uint16_t bitsPerSample = 8 * sizeof(float); const uint16_t bitsPerSample = 8 * sizeof(float);
write<uint16_t>(bitsPerSample, file); write<uint16_t>(bitsPerSample, file);
// Write data chunk // Write data chunk
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file); write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
write<uint32_t>(dataChunkSize, file); write<uint32_t>(dataChunkSize, file);
for (float sample : audioClip) { for (float sample : audioClip) {
write<float>(sample, file); write<float>(sample, file);
} }
} }

View File

@ -4,89 +4,89 @@ using std::string;
using boost::optional; using boost::optional;
PhoneConverter& PhoneConverter::get() { PhoneConverter& PhoneConverter::get() {
static PhoneConverter converter; static PhoneConverter converter;
return converter; return converter;
} }
string PhoneConverter::getTypeName() { string PhoneConverter::getTypeName() {
return "Phone"; return "Phone";
} }
EnumConverter<Phone>::member_data PhoneConverter::getMemberData() { EnumConverter<Phone>::member_data PhoneConverter::getMemberData() {
return member_data { return member_data {
{ Phone::AO, "AO" }, { Phone::AO, "AO" },
{ Phone::AA, "AA" }, { Phone::AA, "AA" },
{ Phone::IY, "IY" }, { Phone::IY, "IY" },
{ Phone::UW, "UW" }, { Phone::UW, "UW" },
{ Phone::EH, "EH" }, { Phone::EH, "EH" },
{ Phone::IH, "IH" }, { Phone::IH, "IH" },
{ Phone::UH, "UH" }, { Phone::UH, "UH" },
{ Phone::AH, "AH" }, { Phone::AH, "AH" },
{ Phone::Schwa, "Schwa" }, { Phone::Schwa, "Schwa" },
{ Phone::AE, "AE" }, { Phone::AE, "AE" },
{ Phone::EY, "EY" }, { Phone::EY, "EY" },
{ Phone::AY, "AY" }, { Phone::AY, "AY" },
{ Phone::OW, "OW" }, { Phone::OW, "OW" },
{ Phone::AW, "AW" }, { Phone::AW, "AW" },
{ Phone::OY, "OY" }, { Phone::OY, "OY" },
{ Phone::ER, "ER" }, { Phone::ER, "ER" },
{ Phone::P, "P" }, { Phone::P, "P" },
{ Phone::B, "B" }, { Phone::B, "B" },
{ Phone::T, "T" }, { Phone::T, "T" },
{ Phone::D, "D" }, { Phone::D, "D" },
{ Phone::K, "K" }, { Phone::K, "K" },
{ Phone::G, "G" }, { Phone::G, "G" },
{ Phone::CH, "CH" }, { Phone::CH, "CH" },
{ Phone::JH, "JH" }, { Phone::JH, "JH" },
{ Phone::F, "F" }, { Phone::F, "F" },
{ Phone::V, "V" }, { Phone::V, "V" },
{ Phone::TH, "TH" }, { Phone::TH, "TH" },
{ Phone::DH, "DH" }, { Phone::DH, "DH" },
{ Phone::S, "S" }, { Phone::S, "S" },
{ Phone::Z, "Z" }, { Phone::Z, "Z" },
{ Phone::SH, "SH" }, { Phone::SH, "SH" },
{ Phone::ZH, "ZH" }, { Phone::ZH, "ZH" },
{ Phone::HH, "HH" }, { Phone::HH, "HH" },
{ Phone::M, "M" }, { Phone::M, "M" },
{ Phone::N, "N" }, { Phone::N, "N" },
{ Phone::NG, "NG" }, { Phone::NG, "NG" },
{ Phone::L, "L" }, { Phone::L, "L" },
{ Phone::R, "R" }, { Phone::R, "R" },
{ Phone::Y, "Y" }, { Phone::Y, "Y" },
{ Phone::W, "W" }, { Phone::W, "W" },
{ Phone::Breath, "Breath" }, { Phone::Breath, "Breath" },
{ Phone::Cough, "Cough" }, { Phone::Cough, "Cough" },
{ Phone::Smack, "Smack" }, { Phone::Smack, "Smack" },
{ Phone::Noise, "Noise" } { Phone::Noise, "Noise" }
}; };
} }
optional<Phone> PhoneConverter::tryParse(const string& s) { optional<Phone> PhoneConverter::tryParse(const string& s) {
auto result = EnumConverter<Phone>::tryParse(s); auto result = EnumConverter<Phone>::tryParse(s);
if (result) return result; if (result) return result;
if (s == "+BREATH+") { if (s == "+BREATH+") {
return Phone::Breath; return Phone::Breath;
} }
if (s == "+COUGH+") { if (s == "+COUGH+") {
return Phone::Cough; return Phone::Cough;
} }
if (s == "+SMACK+") { if (s == "+SMACK+") {
return Phone::Smack; return Phone::Smack;
} }
return Phone::Noise; return Phone::Noise;
} }
std::ostream& operator<<(std::ostream& stream, Phone value) { std::ostream& operator<<(std::ostream& stream, Phone value) {
return PhoneConverter::get().write(stream, value); return PhoneConverter::get().write(stream, value);
} }
std::istream& operator>>(std::istream& stream, Phone& value) { std::istream& operator>>(std::istream& stream, Phone& value) {
return PhoneConverter::get().read(stream, value); return PhoneConverter::get().read(stream, value);
} }
bool isVowel(Phone phone) { bool isVowel(Phone phone) {
return phone <= Phone::LastVowel; return phone <= Phone::LastVowel;
} }

View File

@ -4,88 +4,88 @@
// Defines a subset of the Arpabet // Defines a subset of the Arpabet
enum class Phone { enum class Phone {
///////// /////////
// Vowels // Vowels
// ... monophthongs // ... monophthongs
AO, // [ɔ] as in [o]ff, f[a]ll, fr[o]st AO, // [ɔ] as in [o]ff, f[a]ll, fr[o]st
AA, // [ɑ] as in f[a]ther AA, // [ɑ] as in f[a]ther
IY, // [i] as in b[ee], sh[e] IY, // [i] as in b[ee], sh[e]
UW, // [u] as in y[ou], n[ew], f[oo]d UW, // [u] as in y[ou], n[ew], f[oo]d
EH, // [ɛ] as in r[e]d, m[e]n EH, // [ɛ] as in r[e]d, m[e]n
IH, // [ɪ] as in b[i]g, w[i]n IH, // [ɪ] as in b[i]g, w[i]n
UH, // [ʊ] as in sh[ou]ld, c[ou]ld UH, // [ʊ] as in sh[ou]ld, c[ou]ld
AH, // [ʌ] as in b[u]t, s[u]n AH, // [ʌ] as in b[u]t, s[u]n
Schwa, // [ə] as in [a]lone, disc[u]s Schwa, // [ə] as in [a]lone, disc[u]s
AE, // [æ] as in [a]t, b[a]t AE, // [æ] as in [a]t, b[a]t
// ... diphthongs // ... diphthongs
EY, // [eɪ] as in s[ay], [ei]ght EY, // [eɪ] as in s[ay], [ei]ght
AY, // [aɪ] as in m[y], wh[y], r[i]de AY, // [aɪ] as in m[y], wh[y], r[i]de
OW, // [oʊ] as in sh[ow], c[oa]t OW, // [oʊ] as in sh[ow], c[oa]t
AW, // [aʊ] as in h[ow], n[ow] AW, // [aʊ] as in h[ow], n[ow]
OY, // [ɔɪ] as in b[oy], t[oy] OY, // [ɔɪ] as in b[oy], t[oy]
// ... r-colored // ... r-colored
ER, // [ɝ] as in h[er], b[ir]d, h[ur]t ER, // [ɝ] as in h[er], b[ir]d, h[ur]t
LastVowel = ER, LastVowel = ER,
///////////// /////////////
// Consonants // Consonants
// ... stops // ... stops
P, // [p] as in [p]ay P, // [p] as in [p]ay
B, // [b] as in [b]uy B, // [b] as in [b]uy
T, // [t] as in [t]ake T, // [t] as in [t]ake
D, // [d] as in [d]ay D, // [d] as in [d]ay
K, // [k] as in [k]ey K, // [k] as in [k]ey
G, // [g] as in [g]o G, // [g] as in [g]o
// ... affricates // ... affricates
CH, // [tʃ] as in [ch]air CH, // [tʃ] as in [ch]air
JH, // [dʒ] as in [j]ust JH, // [dʒ] as in [j]ust
// ... fricatives // ... fricatives
F, // [f] as in [f]or F, // [f] as in [f]or
V, // [v] as in [v]ery V, // [v] as in [v]ery
TH, // [θ] as in [th]anks TH, // [θ] as in [th]anks
DH, // [ð] as in [th]at DH, // [ð] as in [th]at
S, // [s] as in [s]ay S, // [s] as in [s]ay
Z, // [z] as in [z]oo Z, // [z] as in [z]oo
SH, // [ʃ] as in [sh]ow SH, // [ʃ] as in [sh]ow
ZH, // [ʒ] as in mea[s]ure, plea[s]ure ZH, // [ʒ] as in mea[s]ure, plea[s]ure
HH, // [h] as in [h]ouse HH, // [h] as in [h]ouse
// ... nasals // ... nasals
M, // [m] as in [m]an M, // [m] as in [m]an
N, // [n] as in [no] N, // [n] as in [no]
NG, // [ŋ] as in si[ng] NG, // [ŋ] as in si[ng]
// ... liquids // ... liquids
L, // [ɫ] as in [l]ate L, // [ɫ] as in [l]ate
R, // [r, ɹ] as in [r]un R, // [r, ɹ] as in [r]un
// ... semivowels // ... semivowels
Y, // [j] as in [y]es Y, // [j] as in [y]es
W, // [w] as in [w]ay W, // [w] as in [w]ay
///////////// /////////////
// Misc. // Misc.
Breath, Breath,
Cough, Cough,
Smack, Smack,
Noise Noise
}; };
class PhoneConverter : public EnumConverter<Phone> { class PhoneConverter : public EnumConverter<Phone> {
public: public:
static PhoneConverter& get(); static PhoneConverter& get();
protected: protected:
std::string getTypeName() override; std::string getTypeName() override;
member_data getMemberData() override; member_data getMemberData() override;
public: public:
boost::optional<Phone> tryParse(const std::string& s) override; boost::optional<Phone> tryParse(const std::string& s) override;
}; };
std::ostream& operator<<(std::ostream& stream, Phone value); std::ostream& operator<<(std::ostream& stream, Phone value);

View File

@ -4,54 +4,54 @@ using std::string;
using std::set; using std::set;
ShapeConverter& ShapeConverter::get() { ShapeConverter& ShapeConverter::get() {
static ShapeConverter converter; static ShapeConverter converter;
return converter; return converter;
} }
set<Shape> ShapeConverter::getBasicShapes() { set<Shape> ShapeConverter::getBasicShapes() {
static const set<Shape> result = [] { static const set<Shape> result = [] {
set<Shape> result; set<Shape> result;
for (int i = 0; i <= static_cast<int>(Shape::LastBasicShape); ++i) { for (int i = 0; i <= static_cast<int>(Shape::LastBasicShape); ++i) {
result.insert(static_cast<Shape>(i)); result.insert(static_cast<Shape>(i));
} }
return result; return result;
}(); }();
return result; return result;
} }
set<Shape> ShapeConverter::getExtendedShapes() { set<Shape> ShapeConverter::getExtendedShapes() {
static const set<Shape> result = [] { static const set<Shape> result = [] {
set<Shape> result; set<Shape> result;
for (int i = static_cast<int>(Shape::LastBasicShape) + 1; i < static_cast<int>(Shape::EndSentinel); ++i) { for (int i = static_cast<int>(Shape::LastBasicShape) + 1; i < static_cast<int>(Shape::EndSentinel); ++i) {
result.insert(static_cast<Shape>(i)); result.insert(static_cast<Shape>(i));
} }
return result; return result;
}(); }();
return result; return result;
} }
string ShapeConverter::getTypeName() { string ShapeConverter::getTypeName() {
return "Shape"; return "Shape";
} }
EnumConverter<Shape>::member_data ShapeConverter::getMemberData() { EnumConverter<Shape>::member_data ShapeConverter::getMemberData() {
return member_data { return member_data {
{ Shape::A, "A" }, { Shape::A, "A" },
{ Shape::B, "B" }, { Shape::B, "B" },
{ Shape::C, "C" }, { Shape::C, "C" },
{ Shape::D, "D" }, { Shape::D, "D" },
{ Shape::E, "E" }, { Shape::E, "E" },
{ Shape::F, "F" }, { Shape::F, "F" },
{ Shape::G, "G" }, { Shape::G, "G" },
{ Shape::H, "H" }, { Shape::H, "H" },
{ Shape::X, "X" } { Shape::X, "X" }
}; };
} }
std::ostream& operator<<(std::ostream& stream, Shape value) { std::ostream& operator<<(std::ostream& stream, Shape value) {
return ShapeConverter::get().write(stream, value); return ShapeConverter::get().write(stream, value);
} }
std::istream& operator>>(std::istream& stream, Shape& value) { std::istream& operator>>(std::istream& stream, Shape& value) {
return ShapeConverter::get().read(stream, value); return ShapeConverter::get().read(stream, value);
} }

View File

@ -7,33 +7,33 @@
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php // For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H. // For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
enum class Shape { enum class Shape {
// Basic shapes // Basic shapes
A, // Closed mouth (M, B, P) A, // Closed mouth (M, B, P)
B, // Clenched teeth (most consonants, some vowels like EE as in b[ee]) B, // Clenched teeth (most consonants, some vowels like EE as in b[ee])
C, // Open mouth (vowels like m[e]n, s[u]n, s[a]y) C, // Open mouth (vowels like m[e]n, s[u]n, s[a]y)
D, // Mouth wide open (vowels like f[a]ther, b[a]t, wh[y]) D, // Mouth wide open (vowels like f[a]ther, b[a]t, wh[y])
E, // Rounded mouth (vowels like [o]ff) E, // Rounded mouth (vowels like [o]ff)
F, // Puckered lips (y[ou], b[o]y, [w]ay) F, // Puckered lips (y[ou], b[o]y, [w]ay)
LastBasicShape = F, LastBasicShape = F,
// Extended shapes // Extended shapes
G, // "F", "V" G, // "F", "V"
H, // "L" H, // "L"
X, // Idle X, // Idle
EndSentinel EndSentinel
}; };
class ShapeConverter : public EnumConverter<Shape> { class ShapeConverter : public EnumConverter<Shape> {
public: public:
static ShapeConverter& get(); static ShapeConverter& get();
static std::set<Shape> getBasicShapes(); static std::set<Shape> getBasicShapes();
static std::set<Shape> getExtendedShapes(); static std::set<Shape> getExtendedShapes();
protected: protected:
std::string getTypeName() override; std::string getTypeName() override;
member_data getMemberData() override; member_data getMemberData() override;
}; };
std::ostream& operator<<(std::ostream& stream, Shape value); std::ostream& operator<<(std::ostream& stream, Shape value);
@ -41,7 +41,7 @@ std::ostream& operator<<(std::ostream& stream, Shape value);
std::istream& operator>>(std::istream& stream, Shape& value); std::istream& operator>>(std::istream& stream, Shape& value);
inline bool isClosed(Shape shape) { inline bool isClosed(Shape shape) {
return shape == Shape::A || shape == Shape::X; return shape == Shape::A || shape == Shape::X;
} }
// A set of mouth shapes. // A set of mouth shapes.

View File

@ -7,66 +7,66 @@ using std::chrono::duration_cast;
using std::string; using std::string;
DatExporter::DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair) : DatExporter::DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair) :
frameRate(frameRate), frameRate(frameRate),
convertToPrestonBlair(convertToPrestonBlair), convertToPrestonBlair(convertToPrestonBlair),
prestonBlairShapeNames { prestonBlairShapeNames {
{ Shape::A, "MBP" }, { Shape::A, "MBP" },
{ Shape::B, "etc" }, { Shape::B, "etc" },
{ Shape::C, "E" }, { Shape::C, "E" },
{ Shape::D, "AI" }, { Shape::D, "AI" },
{ Shape::E, "O" }, { Shape::E, "O" },
{ Shape::F, "U" }, { Shape::F, "U" },
{ Shape::G, "FV" }, { Shape::G, "FV" },
{ Shape::H, "L" }, { Shape::H, "L" },
{ Shape::X, "rest" }, { Shape::X, "rest" },
} }
{ {
// Animation works with a fixed frame rate of 100. // Animation works with a fixed frame rate of 100.
// Downsampling to much less than 25 fps may result in dropped frames. // Downsampling to much less than 25 fps may result in dropped frames.
// Upsampling to more than 100 fps doesn't make sense. // Upsampling to more than 100 fps doesn't make sense.
const double minFrameRate = 24.0; const double minFrameRate = 24.0;
const double maxFrameRate = 100.0; const double maxFrameRate = 100.0;
if (frameRate < minFrameRate || frameRate > maxFrameRate) { if (frameRate < minFrameRate || frameRate > maxFrameRate) {
throw std::runtime_error(fmt::format("Frame rate must be between {} and {} fps.", minFrameRate, maxFrameRate)); throw std::runtime_error(fmt::format("Frame rate must be between {} and {} fps.", minFrameRate, maxFrameRate));
} }
if (convertToPrestonBlair) { if (convertToPrestonBlair) {
for (Shape shape : targetShapeSet) { for (Shape shape : targetShapeSet) {
if (prestonBlairShapeNames.find(shape) == prestonBlairShapeNames.end()) { if (prestonBlairShapeNames.find(shape) == prestonBlairShapeNames.end()) {
throw std::runtime_error(fmt::format("Mouth shape {} cannot be converted to Preston Blair shape names.")); throw std::runtime_error(fmt::format("Mouth shape {} cannot be converted to Preston Blair shape names."));
} }
} }
} }
} }
void DatExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) { void DatExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
outputStream << "MohoSwitch1" << "\n"; outputStream << "MohoSwitch1" << "\n";
// Output shapes with start times // Output shapes with start times
int lastFrameNumber = 0; int lastFrameNumber = 0;
for (auto& timedShape : input.animation) { for (auto& timedShape : input.animation) {
const int frameNumber = toFrameNumber(timedShape.getStart()); const int frameNumber = toFrameNumber(timedShape.getStart());
if (frameNumber == lastFrameNumber) continue; if (frameNumber == lastFrameNumber) continue;
const string shapeName = toString(timedShape.getValue()); const string shapeName = toString(timedShape.getValue());
outputStream << frameNumber << " " << shapeName << "\n"; outputStream << frameNumber << " " << shapeName << "\n";
lastFrameNumber = frameNumber; lastFrameNumber = frameNumber;
} }
// Output closed mouth with end time // Output closed mouth with end time
int frameNumber = toFrameNumber(input.animation.getRange().getEnd()); int frameNumber = toFrameNumber(input.animation.getRange().getEnd());
if (frameNumber == lastFrameNumber) ++frameNumber; if (frameNumber == lastFrameNumber) ++frameNumber;
const string shapeName = toString(convertToTargetShapeSet(Shape::X, input.targetShapeSet)); const string shapeName = toString(convertToTargetShapeSet(Shape::X, input.targetShapeSet));
outputStream << frameNumber << " " << shapeName << "\n"; outputStream << frameNumber << " " << shapeName << "\n";
} }
string DatExporter::toString(Shape shape) const { string DatExporter::toString(Shape shape) const {
return convertToPrestonBlair return convertToPrestonBlair
? prestonBlairShapeNames.at(shape) ? prestonBlairShapeNames.at(shape)
: boost::lexical_cast<std::string>(shape); : boost::lexical_cast<std::string>(shape);
} }
int DatExporter::toFrameNumber(centiseconds time) const { int DatExporter::toFrameNumber(centiseconds time) const {
return 1 + static_cast<int>(frameRate * duration_cast<duration<double>>(time).count()); return 1 + static_cast<int>(frameRate * duration_cast<duration<double>>(time).count());
} }

View File

@ -8,14 +8,14 @@
// Exporter for Moho's switch data file format // Exporter for Moho's switch data file format
class DatExporter : public Exporter { class DatExporter : public Exporter {
public: public:
DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair); DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair);
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override; void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
private: private:
int toFrameNumber(centiseconds time) const; int toFrameNumber(centiseconds time) const;
std::string toString(Shape shape) const; std::string toString(Shape shape) const;
double frameRate; double frameRate;
bool convertToPrestonBlair; bool convertToPrestonBlair;
std::map<Shape, std::string> prestonBlairShapeNames; std::map<Shape, std::string> prestonBlairShapeNames;
}; };

View File

@ -6,21 +6,21 @@
class ExporterInput { class ExporterInput {
public: public:
ExporterInput( ExporterInput(
const std::filesystem::path& inputFilePath, const std::filesystem::path& inputFilePath,
const JoiningContinuousTimeline<Shape>& animation, const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet) : const ShapeSet& targetShapeSet) :
inputFilePath(inputFilePath), inputFilePath(inputFilePath),
animation(animation), animation(animation),
targetShapeSet(targetShapeSet) {} targetShapeSet(targetShapeSet) {}
std::filesystem::path inputFilePath; std::filesystem::path inputFilePath;
JoiningContinuousTimeline<Shape> animation; JoiningContinuousTimeline<Shape> animation;
ShapeSet targetShapeSet; ShapeSet targetShapeSet;
}; };
class Exporter { class Exporter {
public: public:
virtual ~Exporter() {} virtual ~Exporter() {}
virtual void exportAnimation(const ExporterInput& input, std::ostream& outputStream) = 0; virtual void exportAnimation(const ExporterInput& input, std::ostream& outputStream) = 0;
}; };

View File

@ -5,24 +5,24 @@
using std::string; using std::string;
void JsonExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) { void JsonExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
// Export as JSON. // Export as JSON.
// I'm not using a library because the code is short enough without one and it lets me control // I'm not using a library because the code is short enough without one and it lets me control
// the formatting. // the formatting.
outputStream << "{\n"; outputStream << "{\n";
outputStream << " \"metadata\": {\n"; outputStream << " \"metadata\": {\n";
outputStream << " \"soundFile\": \"" << escapeJsonString(absolute(input.inputFilePath).u8string()) << "\",\n"; outputStream << " \"soundFile\": \"" << escapeJsonString(absolute(input.inputFilePath).u8string()) << "\",\n";
outputStream << " \"duration\": " << formatDuration(input.animation.getRange().getDuration()) << "\n"; outputStream << " \"duration\": " << formatDuration(input.animation.getRange().getDuration()) << "\n";
outputStream << " },\n"; outputStream << " },\n";
outputStream << " \"mouthCues\": [\n"; outputStream << " \"mouthCues\": [\n";
bool isFirst = true; bool isFirst = true;
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) { for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
if (!isFirst) outputStream << ",\n"; if (!isFirst) outputStream << ",\n";
isFirst = false; isFirst = false;
outputStream << " { \"start\": " << formatDuration(timedShape.getStart()) outputStream << " { \"start\": " << formatDuration(timedShape.getStart())
<< ", \"end\": " << formatDuration(timedShape.getEnd()) << ", \"end\": " << formatDuration(timedShape.getEnd())
<< ", \"value\": \"" << timedShape.getValue() << "\" }"; << ", \"value\": \"" << timedShape.getValue() << "\" }";
} }
outputStream << "\n"; outputStream << "\n";
outputStream << " ]\n"; outputStream << " ]\n";
outputStream << "}\n"; outputStream << "}\n";
} }

View File

@ -4,5 +4,5 @@
class JsonExporter : public Exporter { class JsonExporter : public Exporter {
public: public:
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override; void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
}; };

View File

@ -2,19 +2,19 @@
#include "animation/targetShapeSet.h" #include "animation/targetShapeSet.h"
void TsvExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) { void TsvExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
// Output shapes with start times // Output shapes with start times
for (auto& timedShape : input.animation) { for (auto& timedShape : input.animation) {
outputStream outputStream
<< formatDuration(timedShape.getStart()) << formatDuration(timedShape.getStart())
<< "\t" << "\t"
<< timedShape.getValue() << timedShape.getValue()
<< "\n"; << "\n";
} }
// Output closed mouth with end time // Output closed mouth with end time
outputStream outputStream
<< formatDuration(input.animation.getRange().getEnd()) << formatDuration(input.animation.getRange().getEnd())
<< "\t" << "\t"
<< convertToTargetShapeSet(Shape::X, input.targetShapeSet) << convertToTargetShapeSet(Shape::X, input.targetShapeSet)
<< "\n"; << "\n";
} }

View File

@ -4,6 +4,6 @@
class TsvExporter : public Exporter { class TsvExporter : public Exporter {
public: public:
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override; void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
}; };

View File

@ -8,33 +8,33 @@ using std::string;
using boost::property_tree::ptree; using boost::property_tree::ptree;
void XmlExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) { void XmlExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
ptree tree; ptree tree;
// Add metadata // Add metadata
tree.put("rhubarbResult.metadata.soundFile", absolute(input.inputFilePath).u8string()); tree.put("rhubarbResult.metadata.soundFile", absolute(input.inputFilePath).u8string());
tree.put( tree.put(
"rhubarbResult.metadata.duration", "rhubarbResult.metadata.duration",
formatDuration(input.animation.getRange().getDuration()) formatDuration(input.animation.getRange().getDuration())
); );
// Add mouth cues // Add mouth cues
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) { for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
ptree& mouthCueElement = tree.add( ptree& mouthCueElement = tree.add(
"rhubarbResult.mouthCues.mouthCue", "rhubarbResult.mouthCues.mouthCue",
timedShape.getValue() timedShape.getValue()
); );
mouthCueElement.put("<xmlattr>.start", formatDuration(timedShape.getStart())); mouthCueElement.put("<xmlattr>.start", formatDuration(timedShape.getStart()));
mouthCueElement.put("<xmlattr>.end", formatDuration(timedShape.getEnd())); mouthCueElement.put("<xmlattr>.end", formatDuration(timedShape.getEnd()));
} }
#ifndef BOOST_VERSION //present in version.hpp #ifndef BOOST_VERSION //present in version.hpp
#error "Could not detect Boost version." #error "Could not detect Boost version."
#endif #endif
#if BOOST_VERSION < 105600 // Support legacy syntax #if BOOST_VERSION < 105600 // Support legacy syntax
using writer_setting = boost::property_tree::xml_writer_settings<char>; using writer_setting = boost::property_tree::xml_writer_settings<char>;
#else #else
using writer_setting = boost::property_tree::xml_writer_settings<string>; using writer_setting = boost::property_tree::xml_writer_settings<string>;
#endif #endif
write_xml(outputStream, tree, writer_setting(' ', 2)); write_xml(outputStream, tree, writer_setting(' ', 2));
} }

View File

@ -4,5 +4,5 @@
class XmlExporter : public Exporter { class XmlExporter : public Exporter {
public: public:
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override; void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
}; };

View File

@ -3,14 +3,14 @@
// Makes sure there is at least one mouth shape // Makes sure there is at least one mouth shape
std::vector<Timed<Shape>> dummyShapeIfEmpty( std::vector<Timed<Shape>> dummyShapeIfEmpty(
const JoiningTimeline<Shape>& animation, const JoiningTimeline<Shape>& animation,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
) { ) {
std::vector<Timed<Shape>> result; std::vector<Timed<Shape>> result;
std::copy(animation.begin(), animation.end(), std::back_inserter(result)); std::copy(animation.begin(), animation.end(), std::back_inserter(result));
if (result.empty()) { if (result.empty()) {
// Add zero-length empty mouth // Add zero-length empty mouth
result.emplace_back(0_cs, 0_cs, convertToTargetShapeSet(Shape::X, targetShapeSet)); result.emplace_back(0_cs, 0_cs, convertToTargetShapeSet(Shape::X, targetShapeSet));
} }
return result; return result;
} }

View File

@ -5,6 +5,6 @@
// Makes sure there is at least one mouth shape // Makes sure there is at least one mouth shape
std::vector<Timed<Shape>> dummyShapeIfEmpty( std::vector<Timed<Shape>> dummyShapeIfEmpty(
const JoiningTimeline<Shape>& animation, const JoiningTimeline<Shape>& animation,
const ShapeSet& targetShapeSet const ShapeSet& targetShapeSet
); );

View File

@ -9,27 +9,27 @@ using std::string;
using std::filesystem::path; using std::filesystem::path;
JoiningContinuousTimeline<Shape> animateAudioClip( JoiningContinuousTimeline<Shape> animateAudioClip(
const AudioClip& audioClip, const AudioClip& audioClip,
const optional<string>& dialog, const optional<string>& dialog,
const Recognizer& recognizer, const Recognizer& recognizer,
const ShapeSet& targetShapeSet, const ShapeSet& targetShapeSet,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink) ProgressSink& progressSink)
{ {
const BoundedTimeline<Phone> phones = const BoundedTimeline<Phone> phones =
recognizer.recognizePhones(audioClip, dialog, maxThreadCount, progressSink); recognizer.recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
JoiningContinuousTimeline<Shape> result = animate(phones, targetShapeSet); JoiningContinuousTimeline<Shape> result = animate(phones, targetShapeSet);
return result; return result;
} }
JoiningContinuousTimeline<Shape> animateWaveFile( JoiningContinuousTimeline<Shape> animateWaveFile(
path filePath, path filePath,
const optional<string>& dialog, const optional<string>& dialog,
const Recognizer& recognizer, const Recognizer& recognizer,
const ShapeSet& targetShapeSet, const ShapeSet& targetShapeSet,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink) ProgressSink& progressSink)
{ {
const auto audioClip = createAudioFileClip(filePath); const auto audioClip = createAudioFileClip(filePath);
return animateAudioClip(*audioClip, dialog, recognizer, targetShapeSet, maxThreadCount, progressSink); return animateAudioClip(*audioClip, dialog, recognizer, targetShapeSet, maxThreadCount, progressSink);
} }

View File

@ -9,17 +9,17 @@
#include "recognition/Recognizer.h" #include "recognition/Recognizer.h"
JoiningContinuousTimeline<Shape> animateAudioClip( JoiningContinuousTimeline<Shape> animateAudioClip(
const AudioClip& audioClip, const AudioClip& audioClip,
const boost::optional<std::string>& dialog, const boost::optional<std::string>& dialog,
const Recognizer& recognizer, const Recognizer& recognizer,
const ShapeSet& targetShapeSet, const ShapeSet& targetShapeSet,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink); ProgressSink& progressSink);
JoiningContinuousTimeline<Shape> animateWaveFile( JoiningContinuousTimeline<Shape> animateWaveFile(
std::filesystem::path filePath, std::filesystem::path filePath,
const boost::optional<std::string>& dialog, const boost::optional<std::string>& dialog,
const Recognizer& recognizer, const Recognizer& recognizer,
const ShapeSet& targetShapeSet, const ShapeSet& targetShapeSet,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink); ProgressSink& progressSink);

View File

@ -10,30 +10,30 @@ using std::string;
namespace logging { namespace logging {
// Returns an int representing the current thread. // Returns an int representing the current thread.
// This used to be a simple thread_local variable, but Xcode doesn't support that yet // This used to be a simple thread_local variable, but Xcode doesn't support that yet
int getThreadCounter() { int getThreadCounter() {
using thread_id = std::thread::id; using thread_id = std::thread::id;
static std::mutex counterMutex; static std::mutex counterMutex;
lock_guard<std::mutex> lock(counterMutex); lock_guard<std::mutex> lock(counterMutex);
static unordered_map<thread_id, int> threadCounters; static unordered_map<thread_id, int> threadCounters;
static int lastThreadId = 0; static int lastThreadId = 0;
thread_id threadId = std::this_thread::get_id(); thread_id threadId = std::this_thread::get_id();
if (threadCounters.find(threadId) == threadCounters.end()) { if (threadCounters.find(threadId) == threadCounters.end()) {
threadCounters.insert({ threadId, ++lastThreadId }); threadCounters.insert({ threadId, ++lastThreadId });
} }
return threadCounters.find(threadId)->second; return threadCounters.find(threadId)->second;
} }
Entry::Entry(Level level, const string& message) : Entry::Entry(Level level, const string& message) :
timestamp(), timestamp(),
level(level), level(level),
message(message) message(message)
{ {
time(&timestamp); time(&timestamp);
this->threadCounter = getThreadCounter(); this->threadCounter = getThreadCounter();
} }
} }

View File

@ -3,15 +3,15 @@
#include "Level.h" #include "Level.h"
namespace logging { namespace logging {
struct Entry { struct Entry {
Entry(Level level, const std::string& message); Entry(Level level, const std::string& message);
virtual ~Entry() = default; virtual ~Entry() = default;
time_t timestamp; time_t timestamp;
int threadCounter; int threadCounter;
Level level; Level level;
std::string message; std::string message;
}; };
} }

View File

@ -5,10 +5,10 @@
namespace logging { namespace logging {
class Formatter { class Formatter {
public: public:
virtual ~Formatter() = default; virtual ~Formatter() = default;
virtual std::string format(const Entry& entry) = 0; virtual std::string format(const Entry& entry) = 0;
}; };
} }

View File

@ -4,32 +4,32 @@ using std::string;
namespace logging { namespace logging {
LevelConverter& LevelConverter::get() { LevelConverter& LevelConverter::get() {
static LevelConverter converter; static LevelConverter converter;
return converter; return converter;
} }
string LevelConverter::getTypeName() { string LevelConverter::getTypeName() {
return "Level"; return "Level";
} }
EnumConverter<Level>::member_data LevelConverter::getMemberData() { EnumConverter<Level>::member_data LevelConverter::getMemberData() {
return member_data { return member_data {
{ Level::Trace, "Trace" }, { Level::Trace, "Trace" },
{ Level::Debug, "Debug" }, { Level::Debug, "Debug" },
{ Level::Info, "Info" }, { Level::Info, "Info" },
{ Level::Warn, "Warn" }, { Level::Warn, "Warn" },
{ Level::Error, "Error" }, { Level::Error, "Error" },
{ Level::Fatal, "Fatal" } { Level::Fatal, "Fatal" }
}; };
} }
std::ostream& operator<<(std::ostream& stream, Level value) { std::ostream& operator<<(std::ostream& stream, Level value) {
return LevelConverter::get().write(stream, value); return LevelConverter::get().write(stream, value);
} }
std::istream& operator >>(std::istream& stream, Level& value) { std::istream& operator >>(std::istream& stream, Level& value) {
return LevelConverter::get().read(stream, value); return LevelConverter::get().read(stream, value);
} }
} }

View File

@ -4,26 +4,26 @@
namespace logging { namespace logging {
enum class Level { enum class Level {
Trace, Trace,
Debug, Debug,
Info, Info,
Warn, Warn,
Error, Error,
Fatal, Fatal,
EndSentinel EndSentinel
}; };
class LevelConverter : public EnumConverter<Level> { class LevelConverter : public EnumConverter<Level> {
public: public:
static LevelConverter& get(); static LevelConverter& get();
protected: protected:
std::string getTypeName() override; std::string getTypeName() override;
member_data getMemberData() override; member_data getMemberData() override;
}; };
std::ostream& operator<<(std::ostream& stream, Level value); std::ostream& operator<<(std::ostream& stream, Level value);
std::istream& operator >>(std::istream& stream, Level& value); std::istream& operator >>(std::istream& stream, Level& value);
} }

View File

@ -4,10 +4,10 @@
namespace logging { namespace logging {
class Sink { class Sink {
public: public:
virtual ~Sink() = default; virtual ~Sink() = default;
virtual void receive(const Entry& entry) = 0; virtual void receive(const Entry& entry) = 0;
}; };
} }

View File

@ -7,17 +7,17 @@ using std::string;
namespace logging { namespace logging {
string SimpleConsoleFormatter::format(const Entry& entry) { string SimpleConsoleFormatter::format(const Entry& entry) {
return fmt::format("[{0}] {1}", entry.level, entry.message); return fmt::format("[{0}] {1}", entry.level, entry.message);
} }
string SimpleFileFormatter::format(const Entry& entry) { string SimpleFileFormatter::format(const Entry& entry) {
return fmt::format( return fmt::format(
"[{0}] {1} {2}", "[{0}] {1} {2}",
formatTime(entry.timestamp, "%F %H:%M:%S"), formatTime(entry.timestamp, "%F %H:%M:%S"),
entry.threadCounter, entry.threadCounter,
consoleFormatter.format(entry) consoleFormatter.format(entry)
); );
} }
} }

View File

@ -4,16 +4,16 @@
namespace logging { namespace logging {
class SimpleConsoleFormatter : public Formatter { class SimpleConsoleFormatter : public Formatter {
public: public:
std::string format(const Entry& entry) override; std::string format(const Entry& entry) override;
}; };
class SimpleFileFormatter : public Formatter { class SimpleFileFormatter : public Formatter {
public: public:
std::string format(const Entry& entry) override; std::string format(const Entry& entry) override;
private: private:
SimpleConsoleFormatter consoleFormatter; SimpleConsoleFormatter consoleFormatter;
}; };
} }

View File

@ -10,46 +10,46 @@ using std::shared_ptr;
using std::lock_guard; using std::lock_guard;
std::mutex& getLogMutex() { std::mutex& getLogMutex() {
static std::mutex mutex; static std::mutex mutex;
return mutex; return mutex;
} }
vector<shared_ptr<Sink>>& getSinks() { vector<shared_ptr<Sink>>& getSinks() {
static vector<shared_ptr<Sink>> sinks; static vector<shared_ptr<Sink>> sinks;
return sinks; return sinks;
} }
bool logging::addSink(shared_ptr<Sink> sink) { bool logging::addSink(shared_ptr<Sink> sink) {
lock_guard<std::mutex> lock(getLogMutex()); lock_guard<std::mutex> lock(getLogMutex());
auto& sinks = getSinks(); auto& sinks = getSinks();
if (std::find(sinks.begin(), sinks.end(), sink) == sinks.end()) { if (std::find(sinks.begin(), sinks.end(), sink) == sinks.end()) {
sinks.push_back(sink); sinks.push_back(sink);
return true; return true;
} }
return false; return false;
} }
bool logging::removeSink(std::shared_ptr<Sink> sink) { bool logging::removeSink(std::shared_ptr<Sink> sink) {
lock_guard<std::mutex> lock(getLogMutex()); lock_guard<std::mutex> lock(getLogMutex());
auto& sinks = getSinks(); auto& sinks = getSinks();
const auto it = std::find(sinks.begin(), sinks.end(), sink); const auto it = std::find(sinks.begin(), sinks.end(), sink);
if (it != sinks.end()) { if (it != sinks.end()) {
sinks.erase(it); sinks.erase(it);
return true; return true;
} }
return false; return false;
} }
void logging::log(const Entry& entry) { void logging::log(const Entry& entry) {
lock_guard<std::mutex> lock(getLogMutex()); lock_guard<std::mutex> lock(getLogMutex());
for (auto& sink : getSinks()) { for (auto& sink : getSinks()) {
sink->receive(entry); sink->receive(entry);
} }
} }
void logging::log(Level level, const string& message) { void logging::log(Level level, const string& message) {
const Entry entry = Entry(level, message); const Entry entry = Entry(level, message);
log(entry); log(entry);
} }

View File

@ -6,32 +6,32 @@
namespace logging { namespace logging {
bool addSink(std::shared_ptr<Sink> sink); bool addSink(std::shared_ptr<Sink> sink);
bool removeSink(std::shared_ptr<Sink> sink); bool removeSink(std::shared_ptr<Sink> sink);
void log(const Entry& entry); void log(const Entry& entry);
void log(Level level, const std::string& message); void log(Level level, const std::string& message);
template<typename... Args> template<typename... Args>
void logFormat(Level level, fmt::CStringRef format, const Args&... args) { void logFormat(Level level, fmt::CStringRef format, const Args&... args) {
log(level, fmt::format(format, args...)); log(level, fmt::format(format, args...));
} }
#define LOG_WITH_LEVEL(levelName, levelEnum) \ #define LOG_WITH_LEVEL(levelName, levelEnum) \
inline void levelName(const std::string& message) { \ inline void levelName(const std::string& message) { \
log(Level::levelEnum, message); \ log(Level::levelEnum, message); \
} \ } \
template <typename... Args> \ template <typename... Args> \
void levelName ## Format(fmt::CStringRef format, const Args&... args) { \ void levelName ## Format(fmt::CStringRef format, const Args&... args) { \
logFormat(Level::levelEnum, format, args...); \ logFormat(Level::levelEnum, format, args...); \
} }
LOG_WITH_LEVEL(trace, Trace) LOG_WITH_LEVEL(trace, Trace)
LOG_WITH_LEVEL(debug, Debug) LOG_WITH_LEVEL(debug, Debug)
LOG_WITH_LEVEL(info, Info) LOG_WITH_LEVEL(info, Info)
LOG_WITH_LEVEL(warn, Warn) LOG_WITH_LEVEL(warn, Warn)
LOG_WITH_LEVEL(error, Error) LOG_WITH_LEVEL(error, Error)
LOG_WITH_LEVEL(fatal, Fatal) LOG_WITH_LEVEL(fatal, Fatal)
} }

View File

@ -7,29 +7,29 @@ using std::shared_ptr;
namespace logging { namespace logging {
LevelFilter::LevelFilter(shared_ptr<Sink> innerSink, Level minLevel) : LevelFilter::LevelFilter(shared_ptr<Sink> innerSink, Level minLevel) :
innerSink(innerSink), innerSink(innerSink),
minLevel(minLevel) minLevel(minLevel)
{} {}
void LevelFilter::receive(const Entry& entry) { void LevelFilter::receive(const Entry& entry) {
if (entry.level >= minLevel) { if (entry.level >= minLevel) {
innerSink->receive(entry); innerSink->receive(entry);
} }
} }
StreamSink::StreamSink(shared_ptr<std::ostream> stream, shared_ptr<Formatter> formatter) : StreamSink::StreamSink(shared_ptr<std::ostream> stream, shared_ptr<Formatter> formatter) :
stream(stream), stream(stream),
formatter(formatter) formatter(formatter)
{} {}
void StreamSink::receive(const Entry& entry) { void StreamSink::receive(const Entry& entry) {
const string line = formatter->format(entry); const string line = formatter->format(entry);
*stream << line << std::endl; *stream << line << std::endl;
} }
StdErrSink::StdErrSink(shared_ptr<Formatter> formatter) : StdErrSink::StdErrSink(shared_ptr<Formatter> formatter) :
StreamSink(std::shared_ptr<std::ostream>(&std::cerr, [](void*) {}), formatter) StreamSink(std::shared_ptr<std::ostream>(&std::cerr, [](void*) {}), formatter)
{} {}
} }

View File

@ -5,29 +5,29 @@
#include "Formatter.h" #include "Formatter.h"
namespace logging { namespace logging {
enum class Level; enum class Level;
class LevelFilter : public Sink { class LevelFilter : public Sink {
public: public:
LevelFilter(std::shared_ptr<Sink> innerSink, Level minLevel); LevelFilter(std::shared_ptr<Sink> innerSink, Level minLevel);
void receive(const Entry& entry) override; void receive(const Entry& entry) override;
private: private:
std::shared_ptr<Sink> innerSink; std::shared_ptr<Sink> innerSink;
Level minLevel; Level minLevel;
}; };
class StreamSink : public Sink { class StreamSink : public Sink {
public: public:
StreamSink(std::shared_ptr<std::ostream> stream, std::shared_ptr<Formatter> formatter); StreamSink(std::shared_ptr<std::ostream> stream, std::shared_ptr<Formatter> formatter);
void receive(const Entry& entry) override; void receive(const Entry& entry) override;
private: private:
std::shared_ptr<std::ostream> stream; std::shared_ptr<std::ostream> stream;
std::shared_ptr<Formatter> formatter; std::shared_ptr<Formatter> formatter;
}; };
class StdErrSink : public StreamSink { class StdErrSink : public StreamSink {
public: public:
explicit StdErrSink(std::shared_ptr<Formatter> formatter); explicit StdErrSink(std::shared_ptr<Formatter> formatter);
}; };
} }

View File

@ -11,103 +11,103 @@ using std::string;
using boost::optional; using boost::optional;
static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) { static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) {
UNUSED(dialog); UNUSED(dialog);
lambda_unique_ptr<cmd_ln_t> config( lambda_unique_ptr<cmd_ln_t> config(
cmd_ln_init( cmd_ln_init(
nullptr, ps_args(), true, nullptr, ps_args(), true,
// Set acoustic model // Set acoustic model
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(), "-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
// Set phonetic language model // Set phonetic language model
"-allphone", (getSphinxModelDirectory() / "en-us-phone.lm.bin").u8string().c_str(), "-allphone", (getSphinxModelDirectory() / "en-us-phone.lm.bin").u8string().c_str(),
"-allphone_ci", "yes", "-allphone_ci", "yes",
// Set language model probability weight. // Set language model probability weight.
// Low values (<= 0.4) can lead to fluttering animation. // Low values (<= 0.4) can lead to fluttering animation.
// High values (>= 1.0) can lead to imprecise or freezing animation. // High values (>= 1.0) can lead to imprecise or freezing animation.
"-lw", "0.8", "-lw", "0.8",
// Add noise against zero silence // Add noise against zero silence
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor) // (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
"-dither", "yes", "-dither", "yes",
// Disable VAD -- we're doing that ourselves // Disable VAD -- we're doing that ourselves
"-remove_silence", "no", "-remove_silence", "no",
// Perform per-utterance cepstral mean normalization // Perform per-utterance cepstral mean normalization
"-cmn", "batch", "-cmn", "batch",
// The following settings are recommended at // The following settings are recommended at
// http://cmusphinx.sourceforge.net/wiki/phonemerecognition // http://cmusphinx.sourceforge.net/wiki/phonemerecognition
// Set beam width applied to every frame in Viterbi search // Set beam width applied to every frame in Viterbi search
"-beam", "1e-20", "-beam", "1e-20",
// Set beam width applied to phone transitions // Set beam width applied to phone transitions
"-pbeam", "1e-20", "-pbeam", "1e-20",
nullptr), nullptr),
[](cmd_ln_t* config) { cmd_ln_free_r(config); }); [](cmd_ln_t* config) { cmd_ln_free_r(config); });
if (!config) throw runtime_error("Error creating configuration."); if (!config) throw runtime_error("Error creating configuration.");
lambda_unique_ptr<ps_decoder_t> decoder( lambda_unique_ptr<ps_decoder_t> decoder(
ps_init(config.get()), ps_init(config.get()),
[](ps_decoder_t* recognizer) { ps_free(recognizer); }); [](ps_decoder_t* recognizer) { ps_free(recognizer); });
if (!decoder) throw runtime_error("Error creating speech decoder."); if (!decoder) throw runtime_error("Error creating speech decoder.");
return decoder; return decoder;
} }
static Timeline<Phone> utteranceToPhones( static Timeline<Phone> utteranceToPhones(
const AudioClip& audioClip, const AudioClip& audioClip,
TimeRange utteranceTimeRange, TimeRange utteranceTimeRange,
ps_decoder_t& decoder, ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink ProgressSink& utteranceProgressSink
) { ) {
// Pad time range to give PocketSphinx some breathing room // Pad time range to give PocketSphinx some breathing room
TimeRange paddedTimeRange = utteranceTimeRange; TimeRange paddedTimeRange = utteranceTimeRange;
const centiseconds padding(3); const centiseconds padding(3);
paddedTimeRange.grow(padding); paddedTimeRange.grow(padding);
paddedTimeRange.trim(audioClip.getTruncatedRange()); paddedTimeRange.trim(audioClip.getTruncatedRange());
const unique_ptr<AudioClip> clipSegment = audioClip.clone() const unique_ptr<AudioClip> clipSegment = audioClip.clone()
| segment(paddedTimeRange) | segment(paddedTimeRange)
| resample(sphinxSampleRate); | resample(sphinxSampleRate);
const auto audioBuffer = copyTo16bitBuffer(*clipSegment); const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
// Detect phones (returned as words) // Detect phones (returned as words)
BoundedTimeline<string> phoneStrings = recognizeWords(audioBuffer, decoder); BoundedTimeline<string> phoneStrings = recognizeWords(audioBuffer, decoder);
phoneStrings.shift(paddedTimeRange.getStart()); phoneStrings.shift(paddedTimeRange.getStart());
Timeline<Phone> utterancePhones; Timeline<Phone> utterancePhones;
for (const auto& timedPhoneString : phoneStrings) { for (const auto& timedPhoneString : phoneStrings) {
Phone phone = PhoneConverter::get().parse(timedPhoneString.getValue()); Phone phone = PhoneConverter::get().parse(timedPhoneString.getValue());
if (phone == Phone::AH && timedPhoneString.getDuration() < 6_cs) { if (phone == Phone::AH && timedPhoneString.getDuration() < 6_cs) {
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate. // Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
phone = Phone::Schwa; phone = Phone::Schwa;
} }
utterancePhones.set(timedPhoneString.getTimeRange(), phone); utterancePhones.set(timedPhoneString.getTimeRange(), phone);
} }
// Log raw phones // Log raw phones
for (const auto& timedPhone : utterancePhones) { for (const auto& timedPhone : utterancePhones) {
logTimedEvent("rawPhone", timedPhone); logTimedEvent("rawPhone", timedPhone);
} }
// Guess positions of noise sounds // Guess positions of noise sounds
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones); JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
for (const auto& noiseSound : noiseSounds) { for (const auto& noiseSound : noiseSounds) {
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise); utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
} }
// Log phones // Log phones
for (const auto& timedPhone : utterancePhones) { for (const auto& timedPhone : utterancePhones) {
logTimedEvent("phone", timedPhone); logTimedEvent("phone", timedPhone);
} }
utteranceProgressSink.reportProgress(1.0); utteranceProgressSink.reportProgress(1.0);
return utterancePhones; return utterancePhones;
} }
BoundedTimeline<Phone> PhoneticRecognizer::recognizePhones( BoundedTimeline<Phone> PhoneticRecognizer::recognizePhones(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
optional<std::string> dialog, optional<std::string> dialog,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
) const { ) const {
return ::recognizePhones(inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink); return ::recognizePhones(inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
} }

View File

@ -5,10 +5,10 @@
class PhoneticRecognizer : public Recognizer { class PhoneticRecognizer : public Recognizer {
public: public:
BoundedTimeline<Phone> recognizePhones( BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
boost::optional<std::string> dialog, boost::optional<std::string> dialog,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
) const override; ) const override;
}; };

View File

@ -27,316 +27,316 @@ using boost::optional;
using std::array; using std::array;
bool dictionaryContains(dict_t& dictionary, const string& word) { bool dictionaryContains(dict_t& dictionary, const string& word) {
return dict_wordid(&dictionary, word.c_str()) != BAD_S3WID; return dict_wordid(&dictionary, word.c_str()) != BAD_S3WID;
} }
s3wid_t getWordId(const string& word, dict_t& dictionary) { s3wid_t getWordId(const string& word, dict_t& dictionary) {
const s3wid_t wordId = dict_wordid(&dictionary, word.c_str()); const s3wid_t wordId = dict_wordid(&dictionary, word.c_str());
if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word)); if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word));
return wordId; return wordId;
} }
void addMissingDictionaryWords(const vector<string>& words, ps_decoder_t& decoder) { void addMissingDictionaryWords(const vector<string>& words, ps_decoder_t& decoder) {
map<string, string> missingPronunciations; map<string, string> missingPronunciations;
for (const string& word : words) { for (const string& word : words) {
if (!dictionaryContains(*decoder.dict, word)) { if (!dictionaryContains(*decoder.dict, word)) {
string pronunciation; string pronunciation;
for (Phone phone : wordToPhones(word)) { for (Phone phone : wordToPhones(word)) {
if (pronunciation.length() > 0) pronunciation += " "; if (pronunciation.length() > 0) pronunciation += " ";
pronunciation += PhoneConverter::get().toString(phone); pronunciation += PhoneConverter::get().toString(phone);
} }
missingPronunciations[word] = pronunciation; missingPronunciations[word] = pronunciation;
} }
} }
for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) { for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) {
const bool isLast = it == --missingPronunciations.end(); const bool isLast = it == --missingPronunciations.end();
logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second); logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second);
ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast); ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast);
} }
} }
lambda_unique_ptr<ngram_model_t> createDefaultLanguageModel(ps_decoder_t& decoder) { lambda_unique_ptr<ngram_model_t> createDefaultLanguageModel(ps_decoder_t& decoder) {
path modelPath = getSphinxModelDirectory() / "en-us.lm.bin"; path modelPath = getSphinxModelDirectory() / "en-us.lm.bin";
lambda_unique_ptr<ngram_model_t> result( lambda_unique_ptr<ngram_model_t> result(
ngram_model_read(decoder.config, modelPath.u8string().c_str(), NGRAM_AUTO, decoder.lmath), ngram_model_read(decoder.config, modelPath.u8string().c_str(), NGRAM_AUTO, decoder.lmath),
[](ngram_model_t* lm) { ngram_model_free(lm); }); [](ngram_model_t* lm) { ngram_model_free(lm); });
if (!result) { if (!result) {
throw runtime_error(fmt::format("Error reading language model from {}.", modelPath.u8string())); throw runtime_error(fmt::format("Error reading language model from {}.", modelPath.u8string()));
} }
return result; return result;
} }
lambda_unique_ptr<ngram_model_t> createDialogLanguageModel( lambda_unique_ptr<ngram_model_t> createDialogLanguageModel(
ps_decoder_t& decoder, ps_decoder_t& decoder,
const string& dialog const string& dialog
) { ) {
// Split dialog into normalized words // Split dialog into normalized words
vector<string> words = tokenizeText( vector<string> words = tokenizeText(
dialog, dialog,
[&](const string& word) { return dictionaryContains(*decoder.dict, word); } [&](const string& word) { return dictionaryContains(*decoder.dict, word); }
); );
// Add dialog-specific words to the dictionary // Add dialog-specific words to the dictionary
addMissingDictionaryWords(words, decoder); addMissingDictionaryWords(words, decoder);
// Create dialog-specific language model // Create dialog-specific language model
words.insert(words.begin(), "<s>"); words.insert(words.begin(), "<s>");
words.emplace_back("</s>"); words.emplace_back("</s>");
return createLanguageModel(words, decoder); return createLanguageModel(words, decoder);
} }
lambda_unique_ptr<ngram_model_t> createBiasedLanguageModel( lambda_unique_ptr<ngram_model_t> createBiasedLanguageModel(
ps_decoder_t& decoder, ps_decoder_t& decoder,
const string& dialog const string& dialog
) { ) {
auto defaultLanguageModel = createDefaultLanguageModel(decoder); auto defaultLanguageModel = createDefaultLanguageModel(decoder);
auto dialogLanguageModel = createDialogLanguageModel(decoder, dialog); auto dialogLanguageModel = createDialogLanguageModel(decoder, dialog);
constexpr int modelCount = 2; constexpr int modelCount = 2;
array<ngram_model_t*, modelCount> languageModels { array<ngram_model_t*, modelCount> languageModels {
defaultLanguageModel.get(), defaultLanguageModel.get(),
dialogLanguageModel.get() dialogLanguageModel.get()
}; };
array<const char*, modelCount> modelNames { "defaultLM", "dialogLM" }; array<const char*, modelCount> modelNames { "defaultLM", "dialogLM" };
array<float, modelCount> modelWeights { 0.1f, 0.9f }; array<float, modelCount> modelWeights { 0.1f, 0.9f };
lambda_unique_ptr<ngram_model_t> result( lambda_unique_ptr<ngram_model_t> result(
ngram_model_set_init( ngram_model_set_init(
nullptr, nullptr,
languageModels.data(), languageModels.data(),
const_cast<char**>(modelNames.data()), const_cast<char**>(modelNames.data()),
modelWeights.data(), modelWeights.data(),
modelCount modelCount
), ),
[](ngram_model_t* lm) { ngram_model_free(lm); }); [](ngram_model_t* lm) { ngram_model_free(lm); });
if (!result) { if (!result) {
throw runtime_error("Error creating biased language model."); throw runtime_error("Error creating biased language model.");
} }
return result; return result;
} }
static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) { static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) {
lambda_unique_ptr<cmd_ln_t> config( lambda_unique_ptr<cmd_ln_t> config(
cmd_ln_init( cmd_ln_init(
nullptr, ps_args(), true, nullptr, ps_args(), true,
// Set acoustic model // Set acoustic model
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(), "-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
// Set pronunciation dictionary // Set pronunciation dictionary
"-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").u8string().c_str(), "-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").u8string().c_str(),
// Add noise against zero silence // Add noise against zero silence
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor) // (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
"-dither", "yes", "-dither", "yes",
// Disable VAD -- we're doing that ourselves // Disable VAD -- we're doing that ourselves
"-remove_silence", "no", "-remove_silence", "no",
// Perform per-utterance cepstral mean normalization // Perform per-utterance cepstral mean normalization
"-cmn", "batch", "-cmn", "batch",
nullptr), nullptr),
[](cmd_ln_t* config) { cmd_ln_free_r(config); }); [](cmd_ln_t* config) { cmd_ln_free_r(config); });
if (!config) throw runtime_error("Error creating configuration."); if (!config) throw runtime_error("Error creating configuration.");
lambda_unique_ptr<ps_decoder_t> decoder( lambda_unique_ptr<ps_decoder_t> decoder(
ps_init(config.get()), ps_init(config.get()),
[](ps_decoder_t* recognizer) { ps_free(recognizer); }); [](ps_decoder_t* recognizer) { ps_free(recognizer); });
if (!decoder) throw runtime_error("Error creating speech decoder."); if (!decoder) throw runtime_error("Error creating speech decoder.");
// Set language model // Set language model
lambda_unique_ptr<ngram_model_t> languageModel(dialog lambda_unique_ptr<ngram_model_t> languageModel(dialog
? createBiasedLanguageModel(*decoder, *dialog) ? createBiasedLanguageModel(*decoder, *dialog)
: createDefaultLanguageModel(*decoder)); : createDefaultLanguageModel(*decoder));
ps_set_lm(decoder.get(), "lm", languageModel.get()); ps_set_lm(decoder.get(), "lm", languageModel.get());
ps_set_search(decoder.get(), "lm"); ps_set_search(decoder.get(), "lm");
return decoder; return decoder;
} }
optional<Timeline<Phone>> getPhoneAlignment( optional<Timeline<Phone>> getPhoneAlignment(
const vector<s3wid_t>& wordIds, const vector<s3wid_t>& wordIds,
const vector<int16_t>& audioBuffer, const vector<int16_t>& audioBuffer,
ps_decoder_t& decoder) ps_decoder_t& decoder)
{ {
if (wordIds.empty()) return boost::none; if (wordIds.empty()) return boost::none;
// Create alignment list // Create alignment list
lambda_unique_ptr<ps_alignment_t> alignment( lambda_unique_ptr<ps_alignment_t> alignment(
ps_alignment_init(decoder.d2p), ps_alignment_init(decoder.d2p),
[](ps_alignment_t* alignment) { ps_alignment_free(alignment); }); [](ps_alignment_t* alignment) { ps_alignment_free(alignment); });
if (!alignment) throw runtime_error("Error creating alignment."); if (!alignment) throw runtime_error("Error creating alignment.");
for (s3wid_t wordId : wordIds) { for (s3wid_t wordId : wordIds) {
// Add word. Initial value for duration is ignored. // Add word. Initial value for duration is ignored.
ps_alignment_add_word(alignment.get(), wordId, 0); ps_alignment_add_word(alignment.get(), wordId, 0);
} }
int error = ps_alignment_populate(alignment.get()); int error = ps_alignment_populate(alignment.get());
if (error) throw runtime_error("Error populating alignment struct."); if (error) throw runtime_error("Error populating alignment struct.");
// Create search structure // Create search structure
acmod_t* acousticModel = decoder.acmod; acmod_t* acousticModel = decoder.acmod;
lambda_unique_ptr<ps_search_t> search( lambda_unique_ptr<ps_search_t> search(
state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()), state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()),
[](ps_search_t* search) { ps_search_free(search); }); [](ps_search_t* search) { ps_search_free(search); });
if (!search) throw runtime_error("Error creating search."); if (!search) throw runtime_error("Error creating search.");
// Start recognition // Start recognition
error = acmod_start_utt(acousticModel); error = acmod_start_utt(acousticModel);
if (error) throw runtime_error("Error starting utterance processing for alignment."); if (error) throw runtime_error("Error starting utterance processing for alignment.");
{ {
// Eventually end recognition // Eventually end recognition
auto endRecognition = gsl::finally([&]() { acmod_end_utt(acousticModel); }); auto endRecognition = gsl::finally([&]() { acmod_end_utt(acousticModel); });
// Start search // Start search
ps_search_start(search.get()); ps_search_start(search.get());
// Process entire audio clip // Process entire audio clip
const int16* nextSample = audioBuffer.data(); const int16* nextSample = audioBuffer.data();
size_t remainingSamples = audioBuffer.size(); size_t remainingSamples = audioBuffer.size();
const bool fullUtterance = true; const bool fullUtterance = true;
while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, fullUtterance) > 0) { while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, fullUtterance) > 0) {
while (acousticModel->n_feat_frame > 0) { while (acousticModel->n_feat_frame > 0) {
ps_search_step(search.get(), acousticModel->output_frame); ps_search_step(search.get(), acousticModel->output_frame);
acmod_advance(acousticModel); acmod_advance(acousticModel);
} }
} }
// End search // End search
error = ps_search_finish(search.get()); error = ps_search_finish(search.get());
if (error) return boost::none; if (error) return boost::none;
} }
// Extract phones with timestamps // Extract phones with timestamps
char** phoneNames = decoder.dict->mdef->ciname; char** phoneNames = decoder.dict->mdef->ciname;
Timeline<Phone> result; Timeline<Phone> result;
for ( for (
ps_alignment_iter_t* it = ps_alignment_phones(alignment.get()); ps_alignment_iter_t* it = ps_alignment_phones(alignment.get());
it; it;
it = ps_alignment_iter_next(it) it = ps_alignment_iter_next(it)
) { ) {
// Get phone // Get phone
ps_alignment_entry_t* phoneEntry = ps_alignment_iter_get(it); ps_alignment_entry_t* phoneEntry = ps_alignment_iter_get(it);
const s3cipid_t phoneId = phoneEntry->id.pid.cipid; const s3cipid_t phoneId = phoneEntry->id.pid.cipid;
string phoneName = phoneNames[phoneId]; string phoneName = phoneNames[phoneId];
if (phoneName == "SIL") continue; if (phoneName == "SIL") continue;
// Add entry // Add entry
centiseconds start(phoneEntry->start); centiseconds start(phoneEntry->start);
centiseconds duration(phoneEntry->duration); centiseconds duration(phoneEntry->duration);
Phone phone = PhoneConverter::get().parse(phoneName); Phone phone = PhoneConverter::get().parse(phoneName);
if (phone == Phone::AH && duration < 6_cs) { if (phone == Phone::AH && duration < 6_cs) {
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate. // Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
phone = Phone::Schwa; phone = Phone::Schwa;
} }
const Timed<Phone> timedPhone(start, start + duration, phone); const Timed<Phone> timedPhone(start, start + duration, phone);
result.set(timedPhone); result.set(timedPhone);
} }
return result; return result;
} }
// Some words have multiple pronunciations, one of which results in better animation than the others. // Some words have multiple pronunciations, one of which results in better animation than the others.
// This function returns the optimal pronunciation for a select set of these words. // This function returns the optimal pronunciation for a select set of these words.
string fixPronunciation(const string& word) { string fixPronunciation(const string& word) {
const static map<string, string> replacements { const static map<string, string> replacements {
{ "into(2)", "into" }, { "into(2)", "into" },
{ "to(2)", "to" }, { "to(2)", "to" },
{ "to(3)", "to" }, { "to(3)", "to" },
{ "today(2)", "today" }, { "today(2)", "today" },
{ "tomorrow(2)", "tomorrow" }, { "tomorrow(2)", "tomorrow" },
{ "tonight(2)", "tonight" } { "tonight(2)", "tonight" }
}; };
const auto pair = replacements.find(word); const auto pair = replacements.find(word);
return pair != replacements.end() ? pair->second : word; return pair != replacements.end() ? pair->second : word;
} }
static Timeline<Phone> utteranceToPhones( static Timeline<Phone> utteranceToPhones(
const AudioClip& audioClip, const AudioClip& audioClip,
TimeRange utteranceTimeRange, TimeRange utteranceTimeRange,
ps_decoder_t& decoder, ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink ProgressSink& utteranceProgressSink
) { ) {
ProgressMerger utteranceProgressMerger(utteranceProgressSink); ProgressMerger utteranceProgressMerger(utteranceProgressSink);
ProgressSink& wordRecognitionProgressSink = ProgressSink& wordRecognitionProgressSink =
utteranceProgressMerger.addSource("word recognition (PocketSphinx recognizer)", 1.0); utteranceProgressMerger.addSource("word recognition (PocketSphinx recognizer)", 1.0);
ProgressSink& alignmentProgressSink = ProgressSink& alignmentProgressSink =
utteranceProgressMerger.addSource("alignment (PocketSphinx recognizer)", 0.5); utteranceProgressMerger.addSource("alignment (PocketSphinx recognizer)", 0.5);
// Pad time range to give PocketSphinx some breathing room // Pad time range to give PocketSphinx some breathing room
TimeRange paddedTimeRange = utteranceTimeRange; TimeRange paddedTimeRange = utteranceTimeRange;
const centiseconds padding(3); const centiseconds padding(3);
paddedTimeRange.grow(padding); paddedTimeRange.grow(padding);
paddedTimeRange.trim(audioClip.getTruncatedRange()); paddedTimeRange.trim(audioClip.getTruncatedRange());
const unique_ptr<AudioClip> clipSegment = audioClip.clone() const unique_ptr<AudioClip> clipSegment = audioClip.clone()
| segment(paddedTimeRange) | segment(paddedTimeRange)
| resample(sphinxSampleRate); | resample(sphinxSampleRate);
const auto audioBuffer = copyTo16bitBuffer(*clipSegment); const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
// Get words // Get words
BoundedTimeline<string> words = recognizeWords(audioBuffer, decoder); BoundedTimeline<string> words = recognizeWords(audioBuffer, decoder);
wordRecognitionProgressSink.reportProgress(1.0); wordRecognitionProgressSink.reportProgress(1.0);
// Log utterance text // Log utterance text
string text; string text;
for (auto& timedWord : words) { for (auto& timedWord : words) {
string word = timedWord.getValue(); string word = timedWord.getValue();
// Skip details // Skip details
if (word == "<s>" || word == "</s>" || word == "<sil>") { if (word == "<s>" || word == "</s>" || word == "<sil>") {
continue; continue;
} }
word = regex_replace(word, regex("\\(\\d\\)"), ""); word = regex_replace(word, regex("\\(\\d\\)"), "");
if (!text.empty()) { if (!text.empty()) {
text += " "; text += " ";
} }
text += word; text += word;
} }
logTimedEvent("utterance", utteranceTimeRange, text); logTimedEvent("utterance", utteranceTimeRange, text);
// Log words // Log words
for (Timed<string> timedWord : words) { for (Timed<string> timedWord : words) {
timedWord.getTimeRange().shift(paddedTimeRange.getStart()); timedWord.getTimeRange().shift(paddedTimeRange.getStart());
logTimedEvent("word", timedWord); logTimedEvent("word", timedWord);
} }
// Convert word strings to word IDs using dictionary // Convert word strings to word IDs using dictionary
vector<s3wid_t> wordIds; vector<s3wid_t> wordIds;
for (const auto& timedWord : words) { for (const auto& timedWord : words) {
const string fixedWord = fixPronunciation(timedWord.getValue()); const string fixedWord = fixPronunciation(timedWord.getValue());
wordIds.push_back(getWordId(fixedWord, *decoder.dict)); wordIds.push_back(getWordId(fixedWord, *decoder.dict));
} }
// Align the words' phones with speech // Align the words' phones with speech
#if BOOST_VERSION < 105600 // Support legacy syntax #if BOOST_VERSION < 105600 // Support legacy syntax
#define value_or get_value_or #define value_or get_value_or
#endif #endif
Timeline<Phone> utterancePhones = getPhoneAlignment(wordIds, audioBuffer, decoder) Timeline<Phone> utterancePhones = getPhoneAlignment(wordIds, audioBuffer, decoder)
.value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Noise)); .value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Noise));
alignmentProgressSink.reportProgress(1.0); alignmentProgressSink.reportProgress(1.0);
utterancePhones.shift(paddedTimeRange.getStart()); utterancePhones.shift(paddedTimeRange.getStart());
// Log raw phones // Log raw phones
for (const auto& timedPhone : utterancePhones) { for (const auto& timedPhone : utterancePhones) {
logTimedEvent("rawPhone", timedPhone); logTimedEvent("rawPhone", timedPhone);
} }
// Guess positions of noise sounds // Guess positions of noise sounds
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones); JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
for (const auto& noiseSound : noiseSounds) { for (const auto& noiseSound : noiseSounds) {
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise); utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
} }
// Log phones // Log phones
for (const auto& timedPhone : utterancePhones) { for (const auto& timedPhone : utterancePhones) {
logTimedEvent("phone", timedPhone); logTimedEvent("phone", timedPhone);
} }
return utterancePhones; return utterancePhones;
} }
BoundedTimeline<Phone> PocketSphinxRecognizer::recognizePhones( BoundedTimeline<Phone> PocketSphinxRecognizer::recognizePhones(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
optional<std::string> dialog, optional<std::string> dialog,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
) const { ) const {
return ::recognizePhones( return ::recognizePhones(
inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink); inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
} }

View File

@ -5,10 +5,10 @@
class PocketSphinxRecognizer : public Recognizer { class PocketSphinxRecognizer : public Recognizer {
public: public:
BoundedTimeline<Phone> recognizePhones( BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
boost::optional<std::string> dialog, boost::optional<std::string> dialog,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
) const override; ) const override;
}; };

View File

@ -7,12 +7,12 @@
class Recognizer { class Recognizer {
public: public:
virtual ~Recognizer() = default; virtual ~Recognizer() = default;
virtual BoundedTimeline<Phone> recognizePhones( virtual BoundedTimeline<Phone> recognizePhones(
const AudioClip& audioClip, const AudioClip& audioClip,
boost::optional<std::string> dialog, boost::optional<std::string> dialog,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
) const = 0; ) const = 0;
}; };

View File

@ -11,101 +11,101 @@ using std::invalid_argument;
using std::pair; using std::pair;
const vector<pair<wregex, wstring>>& getReplacementRules() { const vector<pair<wregex, wstring>>& getReplacementRules() {
static vector<pair<wregex, wstring>> rules { static vector<pair<wregex, wstring>> rules {
#include "g2pRules.cpp" #include "g2pRules.cpp"
// Turn bigrams into unigrams for easier conversion // Turn bigrams into unigrams for easier conversion
{ wregex(L"ôw"), L"Ω" }, { wregex(L"ôw"), L"Ω" },
{ wregex(L"öy"), L"ω" }, { wregex(L"öy"), L"ω" },
{ wregex(L"@r"), L"ɝ" } { wregex(L"@r"), L"ɝ" }
}; };
return rules; return rules;
} }
Phone charToPhone(wchar_t c) { Phone charToPhone(wchar_t c) {
// For reference, see http://www.zompist.com/spell.html // For reference, see http://www.zompist.com/spell.html
switch (c) { switch (c) {
case L'ä': return Phone::EY; case L'ä': return Phone::EY;
case L'â': return Phone::AE; case L'â': return Phone::AE;
case L'ë': return Phone::IY; case L'ë': return Phone::IY;
case L'ê': return Phone::EH; case L'ê': return Phone::EH;
case L'ï': return Phone::AY; case L'ï': return Phone::AY;
case L'î': return Phone::IH; case L'î': return Phone::IH;
case L'ö': return Phone::OW; case L'ö': return Phone::OW;
case L'ô': return Phone::AA; // could also be AO/AH case L'ô': return Phone::AA; // could also be AO/AH
case L'ü': return Phone::UW; // really Y+UW case L'ü': return Phone::UW; // really Y+UW
case L'û': return Phone::AH; // [ʌ] as in b[u]t case L'û': return Phone::AH; // [ʌ] as in b[u]t
case L'u': return Phone::UW; case L'u': return Phone::UW;
case L'ò': return Phone::AO; case L'ò': return Phone::AO;
case L'ù': return Phone::UH; case L'ù': return Phone::UH;
case L'@': return Phone::AH; // [ə] as in [a]lone case L'@': return Phone::AH; // [ə] as in [a]lone
case L'Ω': return Phone::AW; case L'Ω': return Phone::AW;
case L'ω': return Phone::OY; case L'ω': return Phone::OY;
case L'y': return Phone::Y; case L'y': return Phone::Y;
case L'w': return Phone::W; case L'w': return Phone::W;
case L'ɝ': return Phone::ER; case L'ɝ': return Phone::ER;
case L'p': return Phone::P; case L'p': return Phone::P;
case L'b': return Phone::B; case L'b': return Phone::B;
case L't': return Phone::T; case L't': return Phone::T;
case L'd': return Phone::D; case L'd': return Phone::D;
case L'g': return Phone::G; case L'g': return Phone::G;
case L'k': return Phone::K; case L'k': return Phone::K;
case L'm': return Phone::M; case L'm': return Phone::M;
case L'n': return Phone::N; case L'n': return Phone::N;
case L'ñ': return Phone::NG; case L'ñ': return Phone::NG;
case L'f': return Phone::F; case L'f': return Phone::F;
case L'v': return Phone::V; case L'v': return Phone::V;
case L'+': return Phone::TH; // also covers DH case L'+': return Phone::TH; // also covers DH
case L's': return Phone::S; case L's': return Phone::S;
case L'z': return Phone::Z; case L'z': return Phone::Z;
case L'$': return Phone::SH; // also covers ZH case L'$': return Phone::SH; // also covers ZH
case L'ç': return Phone::CH; case L'ç': return Phone::CH;
case L'j': return Phone::JH; case L'j': return Phone::JH;
case L'r': return Phone::R; case L'r': return Phone::R;
case L'l': return Phone::L; case L'l': return Phone::L;
case L'h': return Phone::HH; case L'h': return Phone::HH;
default: default:
return Phone::Noise; return Phone::Noise;
} }
} }
vector<Phone> wordToPhones(const std::string& word) { vector<Phone> wordToPhones(const std::string& word) {
static regex validWord("^[a-z']*$"); static regex validWord("^[a-z']*$");
if (!regex_match(word, validWord)) { if (!regex_match(word, validWord)) {
throw invalid_argument(fmt::format("Word '{}' contains illegal characters.", word)); throw invalid_argument(fmt::format("Word '{}' contains illegal characters.", word));
} }
wstring wideWord = latin1ToWide(word); wstring wideWord = latin1ToWide(word);
for (const auto& rule : getReplacementRules()) { for (const auto& rule : getReplacementRules()) {
const wregex& regex = rule.first; const wregex& regex = rule.first;
const wstring& replacement = rule.second; const wstring& replacement = rule.second;
// Repeatedly apply rule until there is no more change // Repeatedly apply rule until there is no more change
bool changed; bool changed;
do { do {
wstring tmp = regex_replace(wideWord, regex, replacement); wstring tmp = regex_replace(wideWord, regex, replacement);
changed = tmp != wideWord; changed = tmp != wideWord;
wideWord = tmp; wideWord = tmp;
} while (changed); } while (changed);
} }
// Remove duplicate phones // Remove duplicate phones
vector<Phone> result; vector<Phone> result;
Phone lastPhone = Phone::Noise; Phone lastPhone = Phone::Noise;
for (wchar_t c : wideWord) { for (wchar_t c : wideWord) {
Phone phone = charToPhone(c); Phone phone = charToPhone(c);
if (phone == Phone::Noise) { if (phone == Phone::Noise) {
logging::errorFormat( logging::errorFormat(
"G2P error determining pronunciation for '{}': Character '{}' is not a recognized phone shorthand.", "G2P error determining pronunciation for '{}': Character '{}' is not a recognized phone shorthand.",
word, word,
static_cast<char>(c) static_cast<char>(c)
); );
} }
if (phone != lastPhone) { if (phone != lastPhone) {
result.push_back(phone); result.push_back(phone);
} }
lastPhone = phone; lastPhone = phone;
} }
return result; return result;
} }

View File

@ -24,178 +24,178 @@ using Bigram = tuple<string, string>;
using Trigram = tuple<string, string, string>; using Trigram = tuple<string, string, string>;
map<Unigram, int> getUnigramCounts(const vector<string>& words) { map<Unigram, int> getUnigramCounts(const vector<string>& words) {
map<Unigram, int> unigramCounts; map<Unigram, int> unigramCounts;
for (const Unigram& unigram : words) { for (const Unigram& unigram : words) {
++unigramCounts[unigram]; ++unigramCounts[unigram];
} }
return unigramCounts; return unigramCounts;
} }
map<Bigram, int> getBigramCounts(const vector<string>& words) { map<Bigram, int> getBigramCounts(const vector<string>& words) {
map<Bigram, int> bigramCounts; map<Bigram, int> bigramCounts;
for (auto it = words.begin(); it < words.end() - 1; ++it) { for (auto it = words.begin(); it < words.end() - 1; ++it) {
++bigramCounts[Bigram(*it, *(it + 1))]; ++bigramCounts[Bigram(*it, *(it + 1))];
} }
return bigramCounts; return bigramCounts;
} }
map<Trigram, int> getTrigramCounts(const vector<string>& words) { map<Trigram, int> getTrigramCounts(const vector<string>& words) {
map<Trigram, int> trigramCounts; map<Trigram, int> trigramCounts;
if (words.size() >= 3) { if (words.size() >= 3) {
for (auto it = words.begin(); it < words.end() - 2; ++it) { for (auto it = words.begin(); it < words.end() - 2; ++it) {
++trigramCounts[Trigram(*it, *(it + 1), *(it + 2))]; ++trigramCounts[Trigram(*it, *(it + 1), *(it + 2))];
} }
} }
return trigramCounts; return trigramCounts;
} }
map<Unigram, double> getUnigramProbabilities( map<Unigram, double> getUnigramProbabilities(
const vector<string>& words, const vector<string>& words,
const map<Unigram, int>& unigramCounts, const map<Unigram, int>& unigramCounts,
const double deflator const double deflator
) { ) {
map<Unigram, double> unigramProbabilities; map<Unigram, double> unigramProbabilities;
for (const auto& pair : unigramCounts) { for (const auto& pair : unigramCounts) {
const Unigram& unigram = get<0>(pair); const Unigram& unigram = get<0>(pair);
const int unigramCount = get<1>(pair); const int unigramCount = get<1>(pair);
unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator; unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator;
} }
return unigramProbabilities; return unigramProbabilities;
} }
map<Bigram, double> getBigramProbabilities( map<Bigram, double> getBigramProbabilities(
const map<Unigram, int>& unigramCounts, const map<Unigram, int>& unigramCounts,
const map<Bigram, int>& bigramCounts, const map<Bigram, int>& bigramCounts,
const double deflator const double deflator
) { ) {
map<Bigram, double> bigramProbabilities; map<Bigram, double> bigramProbabilities;
for (const auto& pair : bigramCounts) { for (const auto& pair : bigramCounts) {
Bigram bigram = get<0>(pair); Bigram bigram = get<0>(pair);
const int bigramCount = get<1>(pair); const int bigramCount = get<1>(pair);
const int unigramPrefixCount = unigramCounts.at(get<0>(bigram)); const int unigramPrefixCount = unigramCounts.at(get<0>(bigram));
bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator; bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator;
} }
return bigramProbabilities; return bigramProbabilities;
} }
map<Trigram, double> getTrigramProbabilities( map<Trigram, double> getTrigramProbabilities(
const map<Bigram, int>& bigramCounts, const map<Bigram, int>& bigramCounts,
const map<Trigram, int>& trigramCounts, const map<Trigram, int>& trigramCounts,
const double deflator const double deflator
) { ) {
map<Trigram, double> trigramProbabilities; map<Trigram, double> trigramProbabilities;
for (const auto& pair : trigramCounts) { for (const auto& pair : trigramCounts) {
Trigram trigram = get<0>(pair); Trigram trigram = get<0>(pair);
const int trigramCount = get<1>(pair); const int trigramCount = get<1>(pair);
const int bigramPrefixCount = bigramCounts.at(Bigram(get<0>(trigram), get<1>(trigram))); const int bigramPrefixCount = bigramCounts.at(Bigram(get<0>(trigram), get<1>(trigram)));
trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator; trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator;
} }
return trigramProbabilities; return trigramProbabilities;
} }
map<Unigram, double> getUnigramBackoffWeights( map<Unigram, double> getUnigramBackoffWeights(
const map<Unigram, int>& unigramCounts, const map<Unigram, int>& unigramCounts,
const map<Unigram, double>& unigramProbabilities, const map<Unigram, double>& unigramProbabilities,
const map<Bigram, int>& bigramCounts, const map<Bigram, int>& bigramCounts,
const double discountMass) const double discountMass)
{ {
map<Unigram, double> unigramBackoffWeights; map<Unigram, double> unigramBackoffWeights;
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) { for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
double denominator = 1; double denominator = 1;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) { for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
if (get<0>(bigram) == unigram) { if (get<0>(bigram) == unigram) {
denominator -= unigramProbabilities.at(get<1>(bigram)); denominator -= unigramProbabilities.at(get<1>(bigram));
} }
} }
unigramBackoffWeights[unigram] = discountMass / denominator; unigramBackoffWeights[unigram] = discountMass / denominator;
} }
return unigramBackoffWeights; return unigramBackoffWeights;
} }
map<Bigram, double> getBigramBackoffWeights( map<Bigram, double> getBigramBackoffWeights(
const map<Bigram, int>& bigramCounts, const map<Bigram, int>& bigramCounts,
const map<Bigram, double>& bigramProbabilities, const map<Bigram, double>& bigramProbabilities,
const map<Trigram, int>& trigramCounts, const map<Trigram, int>& trigramCounts,
const double discountMass) const double discountMass)
{ {
map<Bigram, double> bigramBackoffWeights; map<Bigram, double> bigramBackoffWeights;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) { for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
double denominator = 1; double denominator = 1;
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) { for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
if (Bigram(get<0>(trigram), get<1>(trigram)) == bigram) { if (Bigram(get<0>(trigram), get<1>(trigram)) == bigram) {
denominator -= bigramProbabilities.at(Bigram(get<1>(trigram), get<2>(trigram))); denominator -= bigramProbabilities.at(Bigram(get<1>(trigram), get<2>(trigram)));
} }
} }
bigramBackoffWeights[bigram] = discountMass / denominator; bigramBackoffWeights[bigram] = discountMass / denominator;
} }
return bigramBackoffWeights; return bigramBackoffWeights;
} }
void createLanguageModelFile(const vector<string>& words, const path& filePath) { void createLanguageModelFile(const vector<string>& words, const path& filePath) {
const double discountMass = 0.5; const double discountMass = 0.5;
const double deflator = 1.0 - discountMass; const double deflator = 1.0 - discountMass;
map<Unigram, int> unigramCounts = getUnigramCounts(words); map<Unigram, int> unigramCounts = getUnigramCounts(words);
map<Bigram, int> bigramCounts = getBigramCounts(words); map<Bigram, int> bigramCounts = getBigramCounts(words);
map<Trigram, int> trigramCounts = getTrigramCounts(words); map<Trigram, int> trigramCounts = getTrigramCounts(words);
map<Unigram, double> unigramProbabilities = map<Unigram, double> unigramProbabilities =
getUnigramProbabilities(words, unigramCounts, deflator); getUnigramProbabilities(words, unigramCounts, deflator);
map<Bigram, double> bigramProbabilities = map<Bigram, double> bigramProbabilities =
getBigramProbabilities(unigramCounts, bigramCounts, deflator); getBigramProbabilities(unigramCounts, bigramCounts, deflator);
map<Trigram, double> trigramProbabilities = map<Trigram, double> trigramProbabilities =
getTrigramProbabilities(bigramCounts, trigramCounts, deflator); getTrigramProbabilities(bigramCounts, trigramCounts, deflator);
map<Unigram, double> unigramBackoffWeights = map<Unigram, double> unigramBackoffWeights =
getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass); getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass);
map<Bigram, double> bigramBackoffWeights = map<Bigram, double> bigramBackoffWeights =
getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass); getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass);
std::ofstream file(filePath); std::ofstream file(filePath);
file << "Generated by " << appName << " " << appVersion << endl << endl; file << "Generated by " << appName << " " << appVersion << endl << endl;
file << "\\data\\" << endl; file << "\\data\\" << endl;
file << "ngram 1=" << unigramCounts.size() << endl; file << "ngram 1=" << unigramCounts.size() << endl;
file << "ngram 2=" << bigramCounts.size() << endl; file << "ngram 2=" << bigramCounts.size() << endl;
file << "ngram 3=" << trigramCounts.size() << endl << endl; file << "ngram 3=" << trigramCounts.size() << endl << endl;
file.setf(std::ios::fixed, std::ios::floatfield); file.setf(std::ios::fixed, std::ios::floatfield);
file.precision(4); file.precision(4);
file << "\\1-grams:" << endl; file << "\\1-grams:" << endl;
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) { for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
file << log10(unigramProbabilities.at(unigram)) file << log10(unigramProbabilities.at(unigram))
<< " " << unigram << " " << unigram
<< " " << log10(unigramBackoffWeights.at(unigram)) << endl; << " " << log10(unigramBackoffWeights.at(unigram)) << endl;
} }
file << endl; file << endl;
file << "\\2-grams:" << endl; file << "\\2-grams:" << endl;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) { for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
file << log10(bigramProbabilities.at(bigram)) file << log10(bigramProbabilities.at(bigram))
<< " " << get<0>(bigram) << " " << get<1>(bigram) << " " << get<0>(bigram) << " " << get<1>(bigram)
<< " " << log10(bigramBackoffWeights.at(bigram)) << endl; << " " << log10(bigramBackoffWeights.at(bigram)) << endl;
} }
file << endl; file << endl;
file << "\\3-grams:" << endl; file << "\\3-grams:" << endl;
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) { for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
file << log10(trigramProbabilities.at(trigram)) file << log10(trigramProbabilities.at(trigram))
<< " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl; << " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl;
} }
file << endl; file << endl;
file << "\\end\\" << endl; file << "\\end\\" << endl;
} }
lambda_unique_ptr<ngram_model_t> createLanguageModel( lambda_unique_ptr<ngram_model_t> createLanguageModel(
const vector<string>& words, const vector<string>& words,
ps_decoder_t& decoder ps_decoder_t& decoder
) { ) {
path tempFilePath = getTempFilePath(); path tempFilePath = getTempFilePath();
createLanguageModelFile(words, tempFilePath); createLanguageModelFile(words, tempFilePath);
auto deleteTempFile = gsl::finally([&]() { std::filesystem::remove(tempFilePath); }); auto deleteTempFile = gsl::finally([&]() { std::filesystem::remove(tempFilePath); });
return lambda_unique_ptr<ngram_model_t>( return lambda_unique_ptr<ngram_model_t>(
ngram_model_read(decoder.config, tempFilePath.u8string().c_str(), NGRAM_ARPA, decoder.lmath), ngram_model_read(decoder.config, tempFilePath.u8string().c_str(), NGRAM_ARPA, decoder.lmath),
[](ngram_model_t* lm) { ngram_model_free(lm); }); [](ngram_model_t* lm) { ngram_model_free(lm); });
} }

View File

@ -9,6 +9,6 @@ extern "C" {
} }
lambda_unique_ptr<ngram_model_t> createLanguageModel( lambda_unique_ptr<ngram_model_t> createLanguageModel(
const std::vector<std::string>& words, const std::vector<std::string>& words,
ps_decoder_t& decoder ps_decoder_t& decoder
); );

View File

@ -23,223 +23,223 @@ using std::filesystem::path;
using std::regex; using std::regex;
using boost::optional; using boost::optional;
using std::chrono::duration_cast; using std::chrono::duration_cast;
logging::Level convertSphinxErrorLevel(err_lvl_t errorLevel) { logging::Level convertSphinxErrorLevel(err_lvl_t errorLevel) {
switch (errorLevel) { switch (errorLevel) {
case ERR_DEBUG: case ERR_DEBUG:
case ERR_INFO: case ERR_INFO:
case ERR_INFOCONT: case ERR_INFOCONT:
return logging::Level::Trace; return logging::Level::Trace;
case ERR_WARN: case ERR_WARN:
return logging::Level::Warn; return logging::Level::Warn;
case ERR_ERROR: case ERR_ERROR:
return logging::Level::Error; return logging::Level::Error;
case ERR_FATAL: case ERR_FATAL:
return logging::Level::Fatal; return logging::Level::Fatal;
default: default:
throw invalid_argument("Unknown log level."); throw invalid_argument("Unknown log level.");
} }
} }
void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format, ...) { void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format, ...) {
UNUSED(user_data); UNUSED(user_data);
// Create varArgs list // Create varArgs list
va_list args; va_list args;
va_start(args, format); va_start(args, format);
auto _ = gsl::finally([&args]() { va_end(args); }); auto _ = gsl::finally([&args]() { va_end(args); });
// Format message // Format message
const int initialSize = 256; const int initialSize = 256;
vector<char> chars(initialSize); vector<char> chars(initialSize);
bool success = false; bool success = false;
while (!success) { while (!success) {
const int charsWritten = vsnprintf(chars.data(), chars.size(), format, args); const int charsWritten = vsnprintf(chars.data(), chars.size(), format, args);
if (charsWritten < 0) throw runtime_error("Error formatting PocketSphinx log message."); if (charsWritten < 0) throw runtime_error("Error formatting PocketSphinx log message.");
success = charsWritten < static_cast<int>(chars.size()); success = charsWritten < static_cast<int>(chars.size());
if (!success) chars.resize(chars.size() * 2); if (!success) chars.resize(chars.size() * 2);
} }
const regex waste("^(DEBUG|INFO|INFOCONT|WARN|ERROR|FATAL): "); const regex waste("^(DEBUG|INFO|INFOCONT|WARN|ERROR|FATAL): ");
string message = string message =
std::regex_replace(chars.data(), waste, "", std::regex_constants::format_first_only); std::regex_replace(chars.data(), waste, "", std::regex_constants::format_first_only);
boost::algorithm::trim(message); boost::algorithm::trim(message);
const logging::Level logLevel = convertSphinxErrorLevel(errorLevel); const logging::Level logLevel = convertSphinxErrorLevel(errorLevel);
logging::log(logLevel, message); logging::log(logLevel, message);
} }
void redirectPocketSphinxOutput() { void redirectPocketSphinxOutput() {
static bool redirected = false; static bool redirected = false;
if (redirected) return; if (redirected) return;
// Discard PocketSphinx output // Discard PocketSphinx output
err_set_logfp(nullptr); err_set_logfp(nullptr);
// Redirect PocketSphinx output to log // Redirect PocketSphinx output to log
err_set_callback(sphinxLogCallback, nullptr); err_set_callback(sphinxLogCallback, nullptr);
redirected = true; redirected = true;
} }
BoundedTimeline<Phone> recognizePhones( BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
optional<std::string> dialog, optional<std::string> dialog,
decoderFactory createDecoder, decoderFactory createDecoder,
utteranceToPhonesFunction utteranceToPhones, utteranceToPhonesFunction utteranceToPhones,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
) { ) {
ProgressMerger totalProgressMerger(progressSink); ProgressMerger totalProgressMerger(progressSink);
ProgressSink& voiceActivationProgressSink = ProgressSink& voiceActivationProgressSink =
totalProgressMerger.addSource("VAD (PocketSphinx tools)", 1.0); totalProgressMerger.addSource("VAD (PocketSphinx tools)", 1.0);
ProgressSink& dialogProgressSink = ProgressSink& dialogProgressSink =
totalProgressMerger.addSource("recognition (PocketSphinx tools)", 15.0); totalProgressMerger.addSource("recognition (PocketSphinx tools)", 15.0);
// Make sure audio stream has no DC offset // Make sure audio stream has no DC offset
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset(); const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
// Split audio into utterances // Split audio into utterances
JoiningBoundedTimeline<void> utterances; JoiningBoundedTimeline<void> utterances;
try { try {
utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink); utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink);
} catch (...) { } catch (...) {
std::throw_with_nested(runtime_error("Error detecting segments of speech.")); std::throw_with_nested(runtime_error("Error detecting segments of speech."));
} }
redirectPocketSphinxOutput(); redirectPocketSphinxOutput();
// Prepare pool of decoders // Prepare pool of decoders
ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool( ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool(
[&] { return createDecoder(dialog); }); [&] { return createDecoder(dialog); });
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange()); BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
std::mutex resultMutex; std::mutex resultMutex;
const auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) { const auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) {
// Detect phones for utterance // Detect phones for utterance
const auto decoder = decoderPool.acquire(); const auto decoder = decoderPool.acquire();
Timeline<Phone> utterancePhones = utteranceToPhones( Timeline<Phone> utterancePhones = utteranceToPhones(
*audioClip, *audioClip,
timedUtterance.getTimeRange(), timedUtterance.getTimeRange(),
*decoder, *decoder,
utteranceProgressSink utteranceProgressSink
); );
// Copy phones to result timeline // Copy phones to result timeline
std::lock_guard<std::mutex> lock(resultMutex); std::lock_guard<std::mutex> lock(resultMutex);
for (const auto& timedPhone : utterancePhones) { for (const auto& timedPhone : utterancePhones) {
phones.set(timedPhone); phones.set(timedPhone);
} }
}; };
const auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) { const auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) {
return timedUtterance.getDuration().count(); return timedUtterance.getDuration().count();
}; };
// Perform speech recognition // Perform speech recognition
try { try {
// Determine how many parallel threads to use // Determine how many parallel threads to use
int threadCount = std::min({ int threadCount = std::min({
maxThreadCount, maxThreadCount,
// Don't use more threads than there are utterances to be processed // Don't use more threads than there are utterances to be processed
static_cast<int>(utterances.size()), static_cast<int>(utterances.size()),
// Don't waste time creating additional threads (and decoders!) if the recording is short // Don't waste time creating additional threads (and decoders!) if the recording is short
static_cast<int>( static_cast<int>(
duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5 duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5
) )
}); });
if (threadCount < 1) { if (threadCount < 1) {
threadCount = 1; threadCount = 1;
} }
logging::debugFormat("Speech recognition using {} threads -- start", threadCount); logging::debugFormat("Speech recognition using {} threads -- start", threadCount);
runParallel( runParallel(
"speech recognition (PocketSphinx tools)", "speech recognition (PocketSphinx tools)",
processUtterance, processUtterance,
utterances, utterances,
threadCount, threadCount,
dialogProgressSink, dialogProgressSink,
getUtteranceProgressWeight getUtteranceProgressWeight
); );
logging::debug("Speech recognition -- end"); logging::debug("Speech recognition -- end");
} catch (...) { } catch (...) {
std::throw_with_nested(runtime_error("Error performing speech recognition via PocketSphinx tools.")); std::throw_with_nested(runtime_error("Error performing speech recognition via PocketSphinx tools."));
} }
return phones; return phones;
} }
const path& getSphinxModelDirectory() { const path& getSphinxModelDirectory() {
static path sphinxModelDirectory(getBinDirectory() / "res" / "sphinx"); static path sphinxModelDirectory(getBinDirectory() / "res" / "sphinx");
return sphinxModelDirectory; return sphinxModelDirectory;
} }
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones) { JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones) {
JoiningTimeline<void> noiseSounds; JoiningTimeline<void> noiseSounds;
// Find utterance parts without recognized phones // Find utterance parts without recognized phones
noiseSounds.set(utteranceTimeRange); noiseSounds.set(utteranceTimeRange);
for (const auto& timedPhone : phones) { for (const auto& timedPhone : phones) {
noiseSounds.clear(timedPhone.getTimeRange()); noiseSounds.clear(timedPhone.getTimeRange());
} }
// Remove undesired elements // Remove undesired elements
const centiseconds minSoundDuration = 12_cs; const centiseconds minSoundDuration = 12_cs;
for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) { for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) {
const bool startsAtZero = unknownSound.getStart() == 0_cs; const bool startsAtZero = unknownSound.getStart() == 0_cs;
const bool tooShort = unknownSound.getDuration() < minSoundDuration; const bool tooShort = unknownSound.getDuration() < minSoundDuration;
if (startsAtZero || tooShort) { if (startsAtZero || tooShort) {
noiseSounds.clear(unknownSound.getTimeRange()); noiseSounds.clear(unknownSound.getTimeRange());
} }
} }
return noiseSounds; return noiseSounds;
} }
BoundedTimeline<string> recognizeWords(const vector<int16_t>& audioBuffer, ps_decoder_t& decoder) { BoundedTimeline<string> recognizeWords(const vector<int16_t>& audioBuffer, ps_decoder_t& decoder) {
// Restart timing at 0 // Restart timing at 0
ps_start_stream(&decoder); ps_start_stream(&decoder);
// Start recognition // Start recognition
int error = ps_start_utt(&decoder); int error = ps_start_utt(&decoder);
if (error) throw runtime_error("Error starting utterance processing for word recognition."); if (error) throw runtime_error("Error starting utterance processing for word recognition.");
// Process entire audio clip // Process entire audio clip
const bool noRecognition = false; const bool noRecognition = false;
const bool fullUtterance = true; const bool fullUtterance = true;
const int searchedFrameCount = const int searchedFrameCount =
ps_process_raw(&decoder, audioBuffer.data(), audioBuffer.size(), noRecognition, fullUtterance); ps_process_raw(&decoder, audioBuffer.data(), audioBuffer.size(), noRecognition, fullUtterance);
if (searchedFrameCount < 0) { if (searchedFrameCount < 0) {
throw runtime_error("Error analyzing raw audio data for word recognition."); throw runtime_error("Error analyzing raw audio data for word recognition.");
} }
// End recognition // End recognition
error = ps_end_utt(&decoder); error = ps_end_utt(&decoder);
if (error) throw runtime_error("Error ending utterance processing for word recognition."); if (error) throw runtime_error("Error ending utterance processing for word recognition.");
BoundedTimeline<string> result( BoundedTimeline<string> result(
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate)) TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
); );
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci"); const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
if (!phonetic) { if (!phonetic) {
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each // If the decoder is in word mode (as opposed to phonetic recognition), it expects each
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error // utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
// "Couldn't find <s> in first frame". // "Couldn't find <s> in first frame".
// Not every utterance does contain speech, however. In this case, we exit early to prevent // Not every utterance does contain speech, however. In this case, we exit early to prevent
// the log output. // the log output.
// We *don't* to that in phonetic mode because here, the same code would omit valid phones. // We *don't* to that in phonetic mode because here, the same code would omit valid phones.
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0; const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
if (noWordsRecognized) { if (noWordsRecognized) {
return result; return result;
} }
} }
// Collect words // Collect words
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) { for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
const char* word = ps_seg_word(it); const char* word = ps_seg_word(it);
int firstFrame, lastFrame; int firstFrame, lastFrame;
ps_seg_frames(it, &firstFrame, &lastFrame); ps_seg_frames(it, &firstFrame, &lastFrame);
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word); result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
} }
return result; return result;
} }

View File

@ -11,23 +11,23 @@ extern "C" {
} }
typedef std::function<lambda_unique_ptr<ps_decoder_t>( typedef std::function<lambda_unique_ptr<ps_decoder_t>(
boost::optional<std::string> dialog boost::optional<std::string> dialog
)> decoderFactory; )> decoderFactory;
typedef std::function<Timeline<Phone>( typedef std::function<Timeline<Phone>(
const AudioClip& audioClip, const AudioClip& audioClip,
TimeRange utteranceTimeRange, TimeRange utteranceTimeRange,
ps_decoder_t& decoder, ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink ProgressSink& utteranceProgressSink
)> utteranceToPhonesFunction; )> utteranceToPhonesFunction;
BoundedTimeline<Phone> recognizePhones( BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip, const AudioClip& inputAudioClip,
boost::optional<std::string> dialog, boost::optional<std::string> dialog,
decoderFactory createDecoder, decoderFactory createDecoder,
utteranceToPhonesFunction utteranceToPhones, utteranceToPhonesFunction utteranceToPhones,
int maxThreadCount, int maxThreadCount,
ProgressSink& progressSink ProgressSink& progressSink
); );
constexpr int sphinxSampleRate = 16000; constexpr int sphinxSampleRate = 16000;
@ -37,6 +37,6 @@ const std::filesystem::path& getSphinxModelDirectory();
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones); JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones);
BoundedTimeline<std::string> recognizeWords( BoundedTimeline<std::string> recognizeWords(
const std::vector<int16_t>& audioBuffer, const std::vector<int16_t>& audioBuffer,
ps_decoder_t& decoder ps_decoder_t& decoder
); );

View File

@ -19,117 +19,117 @@ using boost::optional;
using std::function; using std::function;
lambda_unique_ptr<cst_voice> createDummyVoice() { lambda_unique_ptr<cst_voice> createDummyVoice() {
lambda_unique_ptr<cst_voice> voice(new_voice(), [](cst_voice* voice) { delete_voice(voice); }); lambda_unique_ptr<cst_voice> voice(new_voice(), [](cst_voice* voice) { delete_voice(voice); });
voice->name = "dummy_voice"; voice->name = "dummy_voice";
usenglish_init(voice.get()); usenglish_init(voice.get());
cst_lexicon* lexicon = cmu_lex_init(); cst_lexicon* lexicon = cmu_lex_init();
feat_set(voice->features, "lexicon", lexicon_val(lexicon)); feat_set(voice->features, "lexicon", lexicon_val(lexicon));
return voice; return voice;
} }
static const cst_synth_module synth_method_normalize[] = { static const cst_synth_module synth_method_normalize[] = {
{ "tokenizer_func", default_tokenization }, // split text into tokens { "tokenizer_func", default_tokenization }, // split text into tokens
{ "textanalysis_func", default_textanalysis }, // transform tokens into words { "textanalysis_func", default_textanalysis }, // transform tokens into words
{ nullptr, nullptr } { nullptr, nullptr }
}; };
vector<string> tokenizeViaFlite(const string& text) { vector<string> tokenizeViaFlite(const string& text) {
// Convert text to ASCII // Convert text to ASCII
const string asciiText = utf8ToAscii(text); const string asciiText = utf8ToAscii(text);
// Create utterance object with text // Create utterance object with text
lambda_unique_ptr<cst_utterance> utterance( lambda_unique_ptr<cst_utterance> utterance(
new_utterance(), new_utterance(),
[](cst_utterance* utterance) { delete_utterance(utterance); } [](cst_utterance* utterance) { delete_utterance(utterance); }
); );
utt_set_input_text(utterance.get(), asciiText.c_str()); utt_set_input_text(utterance.get(), asciiText.c_str());
lambda_unique_ptr<cst_voice> voice = createDummyVoice(); lambda_unique_ptr<cst_voice> voice = createDummyVoice();
utt_init(utterance.get(), voice.get()); utt_init(utterance.get(), voice.get());
// Perform tokenization and text normalization // Perform tokenization and text normalization
if (!apply_synth_method(utterance.get(), synth_method_normalize)) { if (!apply_synth_method(utterance.get(), synth_method_normalize)) {
throw runtime_error("Error normalizing text using Flite."); throw runtime_error("Error normalizing text using Flite.");
} }
vector<string> result; vector<string> result;
for ( for (
cst_item* item = relation_head(utt_relation(utterance.get(), "Word")); cst_item* item = relation_head(utt_relation(utterance.get(), "Word"));
item; item;
item = item_next(item) item = item_next(item)
) { ) {
const char* word = item_feat_string(item, "name"); const char* word = item_feat_string(item, "name");
result.emplace_back(word); result.emplace_back(word);
} }
return result; return result;
} }
optional<string> findSimilarDictionaryWord( optional<string> findSimilarDictionaryWord(
const string& word, const string& word,
const function<bool(const string&)>& dictionaryContains const function<bool(const string&)>& dictionaryContains
) { ) {
for (bool addPeriod : { false, true }) { for (bool addPeriod : { false, true }) {
for (int apostropheIndex = -1; apostropheIndex <= static_cast<int>(word.size()); ++apostropheIndex) { for (int apostropheIndex = -1; apostropheIndex <= static_cast<int>(word.size()); ++apostropheIndex) {
string modified = word; string modified = word;
if (apostropheIndex != -1) { if (apostropheIndex != -1) {
modified.insert(apostropheIndex, "'"); modified.insert(apostropheIndex, "'");
} }
if (addPeriod) { if (addPeriod) {
modified += "."; modified += ".";
} }
if (dictionaryContains(modified)) { if (dictionaryContains(modified)) {
return modified; return modified;
} }
} }
} }
return boost::none; return boost::none;
} }
vector<string> tokenizeText( vector<string> tokenizeText(
const string& text, const string& text,
const function<bool(const string&)>& dictionaryContains const function<bool(const string&)>& dictionaryContains
) { ) {
vector<string> words = tokenizeViaFlite(text); vector<string> words = tokenizeViaFlite(text);
// Join words separated by apostrophes // Join words separated by apostrophes
for (int i = static_cast<int>(words.size()) - 1; i > 0; --i) { for (int i = static_cast<int>(words.size()) - 1; i > 0; --i) {
if (!words[i].empty() && words[i][0] == '\'') { if (!words[i].empty() && words[i][0] == '\'') {
words[i - 1].append(words[i]); words[i - 1].append(words[i]);
words.erase(words.begin() + i); words.erase(words.begin() + i);
} }
} }
// Turn some symbols into words, remove the rest // Turn some symbols into words, remove the rest
const static vector<pair<regex, string>> replacements { const static vector<pair<regex, string>> replacements {
{ regex("&"), "and" }, { regex("&"), "and" },
{ regex("\\*"), "times" }, { regex("\\*"), "times" },
{ regex("\\+"), "plus" }, { regex("\\+"), "plus" },
{ regex("="), "equals" }, { regex("="), "equals" },
{ regex("@"), "at" }, { regex("@"), "at" },
{ regex("[^a-z']"), "" } { regex("[^a-z']"), "" }
}; };
for (auto& word : words) { for (auto& word : words) {
for (const auto& replacement : replacements) { for (const auto& replacement : replacements) {
word = regex_replace(word, replacement.first, replacement.second); word = regex_replace(word, replacement.first, replacement.second);
} }
} }
// Remove empty words // Remove empty words
words.erase( words.erase(
std::remove_if(words.begin(), words.end(), [](const string& s) { return s.empty(); }), std::remove_if(words.begin(), words.end(), [](const string& s) { return s.empty(); }),
words.end() words.end()
); );
// Try to replace words that are not in the dictionary with similar ones that are // Try to replace words that are not in the dictionary with similar ones that are
for (auto& word : words) { for (auto& word : words) {
if (!dictionaryContains(word)) { if (!dictionaryContains(word)) {
optional<string> modifiedWord = findSimilarDictionaryWord(word, dictionaryContains); optional<string> modifiedWord = findSimilarDictionaryWord(word, dictionaryContains);
if (modifiedWord) { if (modifiedWord) {
word = *modifiedWord; word = *modifiedWord;
} }
} }
} }
return words; return words;
} }

View File

@ -5,6 +5,6 @@
#include <string> #include <string>
std::vector<std::string> tokenizeText( std::vector<std::string> tokenizeText(
const std::string& text, const std::string& text,
const std::function<bool(const std::string&)>& dictionaryContains const std::function<bool(const std::string&)>& dictionaryContains
); );

View File

@ -3,27 +3,27 @@
using std::string; using std::string;
ExportFormatConverter& ExportFormatConverter::get() { ExportFormatConverter& ExportFormatConverter::get() {
static ExportFormatConverter converter; static ExportFormatConverter converter;
return converter; return converter;
} }
string ExportFormatConverter::getTypeName() { string ExportFormatConverter::getTypeName() {
return "ExportFormat"; return "ExportFormat";
} }
EnumConverter<ExportFormat>::member_data ExportFormatConverter::getMemberData() { EnumConverter<ExportFormat>::member_data ExportFormatConverter::getMemberData() {
return member_data { return member_data {
{ ExportFormat::Dat, "dat" }, { ExportFormat::Dat, "dat" },
{ ExportFormat::Tsv, "tsv" }, { ExportFormat::Tsv, "tsv" },
{ ExportFormat::Xml, "xml" }, { ExportFormat::Xml, "xml" },
{ ExportFormat::Json, "json" } { ExportFormat::Json, "json" }
}; };
} }
std::ostream& operator<<(std::ostream& stream, ExportFormat value) { std::ostream& operator<<(std::ostream& stream, ExportFormat value) {
return ExportFormatConverter::get().write(stream, value); return ExportFormatConverter::get().write(stream, value);
} }
std::istream& operator>>(std::istream& stream, ExportFormat& value) { std::istream& operator>>(std::istream& stream, ExportFormat& value) {
return ExportFormatConverter::get().read(stream, value); return ExportFormatConverter::get().read(stream, value);
} }

View File

@ -3,18 +3,18 @@
#include "tools/EnumConverter.h" #include "tools/EnumConverter.h"
enum class ExportFormat { enum class ExportFormat {
Dat, Dat,
Tsv, Tsv,
Xml, Xml,
Json Json
}; };
class ExportFormatConverter : public EnumConverter<ExportFormat> { class ExportFormatConverter : public EnumConverter<ExportFormat> {
public: public:
static ExportFormatConverter& get(); static ExportFormatConverter& get();
protected: protected:
std::string getTypeName() override; std::string getTypeName() override;
member_data getMemberData() override; member_data getMemberData() override;
}; };
std::ostream& operator<<(std::ostream& stream, ExportFormat value); std::ostream& operator<<(std::ostream& stream, ExportFormat value);

View File

@ -3,25 +3,25 @@
using std::string; using std::string;
RecognizerTypeConverter& RecognizerTypeConverter::get() { RecognizerTypeConverter& RecognizerTypeConverter::get() {
static RecognizerTypeConverter converter; static RecognizerTypeConverter converter;
return converter; return converter;
} }
string RecognizerTypeConverter::getTypeName() { string RecognizerTypeConverter::getTypeName() {
return "RecognizerType"; return "RecognizerType";
} }
EnumConverter<RecognizerType>::member_data RecognizerTypeConverter::getMemberData() { EnumConverter<RecognizerType>::member_data RecognizerTypeConverter::getMemberData() {
return member_data { return member_data {
{ RecognizerType::PocketSphinx, "pocketSphinx" }, { RecognizerType::PocketSphinx, "pocketSphinx" },
{ RecognizerType::Phonetic, "phonetic" } { RecognizerType::Phonetic, "phonetic" }
}; };
} }
std::ostream& operator<<(std::ostream& stream, RecognizerType value) { std::ostream& operator<<(std::ostream& stream, RecognizerType value) {
return RecognizerTypeConverter::get().write(stream, value); return RecognizerTypeConverter::get().write(stream, value);
} }
std::istream& operator>>(std::istream& stream, RecognizerType& value) { std::istream& operator>>(std::istream& stream, RecognizerType& value) {
return RecognizerTypeConverter::get().read(stream, value); return RecognizerTypeConverter::get().read(stream, value);
} }

View File

@ -3,16 +3,16 @@
#include "tools/EnumConverter.h" #include "tools/EnumConverter.h"
enum class RecognizerType { enum class RecognizerType {
PocketSphinx, PocketSphinx,
Phonetic Phonetic
}; };
class RecognizerTypeConverter : public EnumConverter<RecognizerType> { class RecognizerTypeConverter : public EnumConverter<RecognizerType> {
public: public:
static RecognizerTypeConverter& get(); static RecognizerTypeConverter& get();
protected: protected:
std::string getTypeName() override; std::string getTypeName() override;
member_data getMemberData() override; member_data getMemberData() override;
}; };
std::ostream& operator<<(std::ostream& stream, RecognizerType value); std::ostream& operator<<(std::ostream& stream, RecognizerType value);

Some files were not shown because too many files have changed in this diff Show More