Indent code files with spaces rather than tabs

This commit is contained in:
Daniel Wolf 2024-12-09 08:31:59 +01:00
parent 71259421a9
commit b365c4c1d5
147 changed files with 8098 additions and 8096 deletions

View File

@ -14,17 +14,17 @@ add_subdirectory("extras/EsotericSoftwareSpine")
# Install misc. files
install(
FILES README.adoc LICENSE.md CHANGELOG.md
DESTINATION .
FILES README.adoc LICENSE.md CHANGELOG.md
DESTINATION .
)
# Configure CPack
function(get_short_system_name variable)
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(${variable} "macOS" PARENT_SCOPE)
else()
set(${variable} "${CMAKE_SYSTEM_NAME}" PARENT_SCOPE)
endif()
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
set(${variable} "macOS" PARENT_SCOPE)
else()
set(${variable} "${CMAKE_SYSTEM_NAME}" PARENT_SCOPE)
endif()
endfunction()
set(CPACK_PACKAGE_NAME ${appName})

View File

@ -1,11 +1,11 @@
cmake_minimum_required(VERSION 3.2)
set(afterEffectsFiles
"Rhubarb Lip Sync.jsx"
"README.adoc"
"Rhubarb Lip Sync.jsx"
"README.adoc"
)
install(
FILES ${afterEffectsFiles}
DESTINATION "extras/AdobeAfterEffects"
FILES ${afterEffectsFiles}
DESTINATION "extras/AdobeAfterEffects"
)

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,18 @@
cmake_minimum_required(VERSION 3.2)
add_custom_target(
rhubarbForSpine ALL
"./gradlew" "build"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Building Rhubarb for Spine through Gradle."
rhubarbForSpine ALL
"./gradlew" "build"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Building Rhubarb for Spine through Gradle."
)
install(
DIRECTORY "build/libs/"
DESTINATION "extras/EsotericSoftwareSpine"
DIRECTORY "build/libs/"
DESTINATION "extras/EsotericSoftwareSpine"
)
install(
FILES README.adoc
DESTINATION "extras/EsotericSoftwareSpine"
FILES README.adoc
DESTINATION "extras/EsotericSoftwareSpine"
)

View File

@ -14,112 +14,112 @@ import tornadofx.setValue
import java.util.concurrent.ExecutorService
class AnimationFileModel(val parentModel: MainModel, animationFilePath: Path, private val executor: ExecutorService) {
val spineJson = SpineJson(animationFilePath)
val spineJson = SpineJson(animationFilePath)
val slotsProperty = SimpleObjectProperty<ObservableList<String>>()
private var slots: ObservableList<String> by slotsProperty
val slotsProperty = SimpleObjectProperty<ObservableList<String>>()
private var slots: ObservableList<String> by slotsProperty
val mouthSlotProperty: SimpleStringProperty = SimpleStringProperty().alsoListen {
val mouthSlot = this.mouthSlot
val mouthNaming = if (mouthSlot != null)
MouthNaming.guess(spineJson.getSlotAttachmentNames(mouthSlot))
else null
this.mouthNaming = mouthNaming
val mouthSlotProperty: SimpleStringProperty = SimpleStringProperty().alsoListen {
val mouthSlot = this.mouthSlot
val mouthNaming = if (mouthSlot != null)
MouthNaming.guess(spineJson.getSlotAttachmentNames(mouthSlot))
else null
this.mouthNaming = mouthNaming
mouthShapes = if (mouthSlot != null && mouthNaming != null) {
val mouthNames = spineJson.getSlotAttachmentNames(mouthSlot)
MouthShape.values().filter { mouthNames.contains(mouthNaming.getName(it)) }
} else listOf()
mouthShapes = if (mouthSlot != null && mouthNaming != null) {
val mouthNames = spineJson.getSlotAttachmentNames(mouthSlot)
MouthShape.values().filter { mouthNames.contains(mouthNaming.getName(it)) }
} else listOf()
mouthSlotError = if (mouthSlot != null)
null
else
"No slot with mouth drawings specified."
}
private var mouthSlot: String? by mouthSlotProperty
mouthSlotError = if (mouthSlot != null)
null
else
"No slot with mouth drawings specified."
}
private var mouthSlot: String? by mouthSlotProperty
val mouthSlotErrorProperty = SimpleStringProperty()
private var mouthSlotError: String? by mouthSlotErrorProperty
val mouthSlotErrorProperty = SimpleStringProperty()
private var mouthSlotError: String? by mouthSlotErrorProperty
val mouthNamingProperty = SimpleObjectProperty<MouthNaming>()
private var mouthNaming: MouthNaming? by mouthNamingProperty
val mouthNamingProperty = SimpleObjectProperty<MouthNaming>()
private var mouthNaming: MouthNaming? by mouthNamingProperty
val mouthShapesProperty = SimpleObjectProperty<List<MouthShape>>().alsoListen {
mouthShapesError = getMouthShapesErrorString()
}
var mouthShapes: List<MouthShape> by mouthShapesProperty
private set
val mouthShapesProperty = SimpleObjectProperty<List<MouthShape>>().alsoListen {
mouthShapesError = getMouthShapesErrorString()
}
var mouthShapes: List<MouthShape> by mouthShapesProperty
private set
val mouthShapesErrorProperty = SimpleStringProperty()
private var mouthShapesError: String? by mouthShapesErrorProperty
val mouthShapesErrorProperty = SimpleStringProperty()
private var mouthShapesError: String? by mouthShapesErrorProperty
val audioFileModelsProperty = SimpleListProperty<AudioFileModel>(
spineJson.audioEvents
.map { event ->
var audioFileModel: AudioFileModel? = null
val reportResult: (List<MouthCue>) -> Unit =
{ result -> saveAnimation(audioFileModel!!.animationName, event.name, result) }
audioFileModel = AudioFileModel(event, this, executor, reportResult)
return@map audioFileModel
}
.asObservable()
)
val audioFileModels: ObservableList<AudioFileModel> by audioFileModelsProperty
val audioFileModelsProperty = SimpleListProperty<AudioFileModel>(
spineJson.audioEvents
.map { event ->
var audioFileModel: AudioFileModel? = null
val reportResult: (List<MouthCue>) -> Unit =
{ result -> saveAnimation(audioFileModel!!.animationName, event.name, result) }
audioFileModel = AudioFileModel(event, this, executor, reportResult)
return@map audioFileModel
}
.asObservable()
)
val audioFileModels: ObservableList<AudioFileModel> by audioFileModelsProperty
val busyProperty = SimpleBooleanProperty().apply {
bind(object : BooleanBinding() {
init {
for (audioFileModel in audioFileModels) {
super.bind(audioFileModel.busyProperty)
}
}
override fun computeValue(): Boolean {
return audioFileModels.any { it.busy }
}
})
}
val busy by busyProperty
val busyProperty = SimpleBooleanProperty().apply {
bind(object : BooleanBinding() {
init {
for (audioFileModel in audioFileModels) {
super.bind(audioFileModel.busyProperty)
}
}
override fun computeValue(): Boolean {
return audioFileModels.any { it.busy }
}
})
}
val busy by busyProperty
val validProperty = SimpleBooleanProperty().apply {
val errorProperties = arrayOf(mouthSlotErrorProperty, mouthShapesErrorProperty)
bind(object : BooleanBinding() {
init {
super.bind(*errorProperties)
}
override fun computeValue(): Boolean {
return errorProperties.all { it.value == null }
}
})
}
val validProperty = SimpleBooleanProperty().apply {
val errorProperties = arrayOf(mouthSlotErrorProperty, mouthShapesErrorProperty)
bind(object : BooleanBinding() {
init {
super.bind(*errorProperties)
}
override fun computeValue(): Boolean {
return errorProperties.all { it.value == null }
}
})
}
private fun saveAnimation(animationName: String, audioEventName: String, mouthCues: List<MouthCue>) {
spineJson.createOrUpdateAnimation(mouthCues, audioEventName, animationName, mouthSlot!!, mouthNaming!!)
spineJson.save()
}
private fun saveAnimation(animationName: String, audioEventName: String, mouthCues: List<MouthCue>) {
spineJson.createOrUpdateAnimation(mouthCues, audioEventName, animationName, mouthSlot!!, mouthNaming!!)
spineJson.save()
}
init {
slots = spineJson.slots.asObservable()
mouthSlot = spineJson.guessMouthSlot()
}
init {
slots = spineJson.slots.asObservable()
mouthSlot = spineJson.guessMouthSlot()
}
private fun getMouthShapesErrorString(): String? {
val missingBasicShapes = MouthShape.basicShapes
.filter{ !mouthShapes.contains(it) }
if (missingBasicShapes.isEmpty()) return null
private fun getMouthShapesErrorString(): String? {
val missingBasicShapes = MouthShape.basicShapes
.filter{ !mouthShapes.contains(it) }
if (missingBasicShapes.isEmpty()) return null
val result = StringBuilder()
val missingShapesString = missingBasicShapes.joinToString()
result.appendln(
if (missingBasicShapes.count() > 1)
"Mouth shapes $missingShapesString are missing."
else
"Mouth shape $missingShapesString is missing."
)
val result = StringBuilder()
val missingShapesString = missingBasicShapes.joinToString()
result.appendln(
if (missingBasicShapes.count() > 1)
"Mouth shapes $missingShapesString are missing."
else
"Mouth shape $missingShapesString is missing."
)
val first = MouthShape.basicShapes.first()
val last = MouthShape.basicShapes.last()
result.append("At least the basic mouth shapes $first-$last need corresponding image attachments.")
return result.toString()
}
val first = MouthShape.basicShapes.first()
val last = MouthShape.basicShapes.last()
result.append("At least the basic mouth shapes $first-$last need corresponding image attachments.")
return result.toString()
}
}

View File

@ -16,181 +16,181 @@ import java.util.concurrent.ExecutorService
import java.util.concurrent.Future
class AudioFileModel(
audioEvent: SpineJson.AudioEvent,
private val parentModel: AnimationFileModel,
private val executor: ExecutorService,
private val reportResult: (List<MouthCue>) -> Unit
audioEvent: SpineJson.AudioEvent,
private val parentModel: AnimationFileModel,
private val executor: ExecutorService,
private val reportResult: (List<MouthCue>) -> Unit
) {
private val spineJson = parentModel.spineJson
private val spineJson = parentModel.spineJson
private val audioFilePath: Path = spineJson.audioDirectoryPath.resolve(audioEvent.relativeAudioFilePath)
private val audioFilePath: Path = spineJson.audioDirectoryPath.resolve(audioEvent.relativeAudioFilePath)
val eventNameProperty = SimpleStringProperty(audioEvent.name)
val eventName: String by eventNameProperty
val eventNameProperty = SimpleStringProperty(audioEvent.name)
val eventName: String by eventNameProperty
val displayFilePathProperty = SimpleStringProperty(audioEvent.relativeAudioFilePath)
val displayFilePathProperty = SimpleStringProperty(audioEvent.relativeAudioFilePath)
val animationNameProperty = SimpleStringProperty().apply {
val mainModel = parentModel.parentModel
bind(object : ObjectBinding<String>() {
init {
super.bind(
mainModel.animationPrefixProperty,
eventNameProperty,
mainModel.animationSuffixProperty
)
}
override fun computeValue(): String {
return mainModel.animationPrefix + eventName + mainModel.animationSuffix
}
})
}
val animationName: String by animationNameProperty
val animationNameProperty = SimpleStringProperty().apply {
val mainModel = parentModel.parentModel
bind(object : ObjectBinding<String>() {
init {
super.bind(
mainModel.animationPrefixProperty,
eventNameProperty,
mainModel.animationSuffixProperty
)
}
override fun computeValue(): String {
return mainModel.animationPrefix + eventName + mainModel.animationSuffix
}
})
}
val animationName: String by animationNameProperty
val dialogProperty = SimpleStringProperty(audioEvent.dialog)
private val dialog: String? by dialogProperty
val dialogProperty = SimpleStringProperty(audioEvent.dialog)
private val dialog: String? by dialogProperty
val animationProgressProperty = SimpleObjectProperty<Double?>(null)
var animationProgress: Double? by animationProgressProperty
private set
val animationProgressProperty = SimpleObjectProperty<Double?>(null)
var animationProgress: Double? by animationProgressProperty
private set
private val animatedProperty = SimpleBooleanProperty().apply {
bind(object : ObjectBinding<Boolean>() {
init {
super.bind(animationNameProperty, parentModel.spineJson.animationNames)
}
override fun computeValue(): Boolean {
return parentModel.spineJson.animationNames.contains(animationName)
}
})
}
private var animated by animatedProperty
private val animatedProperty = SimpleBooleanProperty().apply {
bind(object : ObjectBinding<Boolean>() {
init {
super.bind(animationNameProperty, parentModel.spineJson.animationNames)
}
override fun computeValue(): Boolean {
return parentModel.spineJson.animationNames.contains(animationName)
}
})
}
private var animated by animatedProperty
private val futureProperty = SimpleObjectProperty<Future<*>?>()
private var future by futureProperty
private val futureProperty = SimpleObjectProperty<Future<*>?>()
private var future by futureProperty
val audioFileStateProperty = SimpleObjectProperty<AudioFileState>().apply {
bind(object : ObjectBinding<AudioFileState>() {
init {
super.bind(animatedProperty, futureProperty, animationProgressProperty)
}
override fun computeValue(): AudioFileState {
return if (future != null) {
if (animationProgress != null)
if (future!!.isCancelled)
AudioFileState(AudioFileStatus.Canceling)
else
AudioFileState(AudioFileStatus.Animating, animationProgress)
else
AudioFileState(AudioFileStatus.Pending)
} else {
if (animated)
AudioFileState(AudioFileStatus.Done)
else
AudioFileState(AudioFileStatus.NotAnimated)
}
}
})
}
val audioFileStateProperty = SimpleObjectProperty<AudioFileState>().apply {
bind(object : ObjectBinding<AudioFileState>() {
init {
super.bind(animatedProperty, futureProperty, animationProgressProperty)
}
override fun computeValue(): AudioFileState {
return if (future != null) {
if (animationProgress != null)
if (future!!.isCancelled)
AudioFileState(AudioFileStatus.Canceling)
else
AudioFileState(AudioFileStatus.Animating, animationProgress)
else
AudioFileState(AudioFileStatus.Pending)
} else {
if (animated)
AudioFileState(AudioFileStatus.Done)
else
AudioFileState(AudioFileStatus.NotAnimated)
}
}
})
}
val busyProperty = SimpleBooleanProperty().apply {
bind(object : BooleanBinding() {
init {
super.bind(futureProperty)
}
override fun computeValue(): Boolean {
return future != null
}
val busyProperty = SimpleBooleanProperty().apply {
bind(object : BooleanBinding() {
init {
super.bind(futureProperty)
}
override fun computeValue(): Boolean {
return future != null
}
})
}
val busy by busyProperty
})
}
val busy by busyProperty
val actionLabelProperty = SimpleStringProperty().apply {
bind(object : StringBinding() {
init {
super.bind(futureProperty)
}
override fun computeValue(): String {
return if (future != null)
"Cancel"
else
"Animate"
}
})
}
val actionLabelProperty = SimpleStringProperty().apply {
bind(object : StringBinding() {
init {
super.bind(futureProperty)
}
override fun computeValue(): String {
return if (future != null)
"Cancel"
else
"Animate"
}
})
}
fun performAction() {
if (future == null) {
if (animated) {
Alert(Alert.AlertType.CONFIRMATION).apply {
headerText = "Animation '$animationName' already exists."
contentText = "Do you want to replace the existing animation?"
val result = showAndWait()
if (result.get() != ButtonType.OK) {
return
}
}
}
fun performAction() {
if (future == null) {
if (animated) {
Alert(Alert.AlertType.CONFIRMATION).apply {
headerText = "Animation '$animationName' already exists."
contentText = "Do you want to replace the existing animation?"
val result = showAndWait()
if (result.get() != ButtonType.OK) {
return
}
}
}
startAnimation()
} else {
cancelAnimation()
}
}
startAnimation()
} else {
cancelAnimation()
}
}
private fun startAnimation() {
val wrapperTask = Runnable {
val recognizer = parentModel.parentModel.recognizer.value
val extendedMouthShapes = parentModel.mouthShapes.filter { it.isExtended }.toSet()
val reportProgress: (Double?) -> Unit = {
progress -> runAndWait { this@AudioFileModel.animationProgress = progress }
}
val rhubarbTask = RhubarbTask(audioFilePath, recognizer, dialog, extendedMouthShapes, reportProgress)
try {
try {
val result = rhubarbTask.call()
runAndWait {
reportResult(result)
}
} finally {
runAndWait {
animationProgress = null
future = null
}
}
} catch (e: InterruptedException) {
} catch (e: Exception) {
e.printStackTrace(System.err)
private fun startAnimation() {
val wrapperTask = Runnable {
val recognizer = parentModel.parentModel.recognizer.value
val extendedMouthShapes = parentModel.mouthShapes.filter { it.isExtended }.toSet()
val reportProgress: (Double?) -> Unit = {
progress -> runAndWait { this@AudioFileModel.animationProgress = progress }
}
val rhubarbTask = RhubarbTask(audioFilePath, recognizer, dialog, extendedMouthShapes, reportProgress)
try {
try {
val result = rhubarbTask.call()
runAndWait {
reportResult(result)
}
} finally {
runAndWait {
animationProgress = null
future = null
}
}
} catch (e: InterruptedException) {
} catch (e: Exception) {
e.printStackTrace(System.err)
Platform.runLater {
Alert(Alert.AlertType.ERROR).apply {
headerText = "Error performing lip sync for event '$eventName'."
contentText = if (e is EndUserException)
e.message
else
("An internal error occurred.\n"
+ "Please report an issue, including the following information.\n"
+ getStackTrace(e))
show()
}
}
}
}
future = executor.submit(wrapperTask)
}
Platform.runLater {
Alert(Alert.AlertType.ERROR).apply {
headerText = "Error performing lip sync for event '$eventName'."
contentText = if (e is EndUserException)
e.message
else
("An internal error occurred.\n"
+ "Please report an issue, including the following information.\n"
+ getStackTrace(e))
show()
}
}
}
}
future = executor.submit(wrapperTask)
}
private fun cancelAnimation() {
future?.cancel(true)
}
private fun cancelAnimation() {
future?.cancel(true)
}
}
enum class AudioFileStatus {
NotAnimated,
Pending,
Animating,
Canceling,
Done
NotAnimated,
Pending,
Animating,
Canceling,
Done
}
data class AudioFileState(val status: AudioFileStatus, val progress: Double? = null)

View File

@ -14,67 +14,67 @@ import tornadofx.rectangle
import tornadofx.removeFromParent
fun renderErrorIndicator(): Node {
return Group().apply {
isManaged = false
circle {
radius = 7.0
fill = Color.ORANGERED
}
rectangle {
x = -1.0
y = -5.0
width = 2.0
height = 7.0
fill = Color.WHITE
}
rectangle {
x = -1.0
y = 3.0
width = 2.0
height = 2.0
fill = Color.WHITE
}
}
return Group().apply {
isManaged = false
circle {
radius = 7.0
fill = Color.ORANGERED
}
rectangle {
x = -1.0
y = -5.0
width = 2.0
height = 7.0
fill = Color.WHITE
}
rectangle {
x = -1.0
y = 3.0
width = 2.0
height = 2.0
fill = Color.WHITE
}
}
}
fun Parent.errorProperty() : StringProperty {
return properties.getOrPut("rhubarb.errorProperty", {
val errorIndicator: Node = renderErrorIndicator()
val tooltip = Tooltip()
val property = SimpleStringProperty()
return properties.getOrPut("rhubarb.errorProperty", {
val errorIndicator: Node = renderErrorIndicator()
val tooltip = Tooltip()
val property = SimpleStringProperty()
fun updateTooltipVisibility() {
if (tooltip.text.isNotEmpty() && isFocused) {
val bounds = localToScreen(boundsInLocal)
tooltip.show(scene.window, bounds.minX + 5, bounds.maxY + 2)
} else {
tooltip.hide()
}
}
fun updateTooltipVisibility() {
if (tooltip.text.isNotEmpty() && isFocused) {
val bounds = localToScreen(boundsInLocal)
tooltip.show(scene.window, bounds.minX + 5, bounds.maxY + 2)
} else {
tooltip.hide()
}
}
focusedProperty().addListener({
_: ObservableValue<out Boolean>, _: Boolean, _: Boolean ->
updateTooltipVisibility()
})
focusedProperty().addListener({
_: ObservableValue<out Boolean>, _: Boolean, _: Boolean ->
updateTooltipVisibility()
})
property.addListener({
_: ObservableValue<out String?>, _: String?, newValue: String? ->
property.addListener({
_: ObservableValue<out String?>, _: String?, newValue: String? ->
if (newValue != null) {
this.addChildIfPossible(errorIndicator)
if (newValue != null) {
this.addChildIfPossible(errorIndicator)
tooltip.text = newValue
Tooltip.install(this, tooltip)
updateTooltipVisibility()
} else {
errorIndicator.removeFromParent()
tooltip.text = newValue
Tooltip.install(this, tooltip)
updateTooltipVisibility()
} else {
errorIndicator.removeFromParent()
tooltip.text = ""
tooltip.hide()
Tooltip.uninstall(this, tooltip)
updateTooltipVisibility()
}
})
return@getOrPut property
}) as StringProperty
tooltip.text = ""
tooltip.hide()
Tooltip.uninstall(this, tooltip)
updateTooltipVisibility()
}
})
return@getOrPut property
}) as StringProperty
}

View File

@ -8,18 +8,18 @@ import java.lang.reflect.Method
import javax.swing.ImageIcon
class MainApp : App(MainView::class) {
override fun start(stage: Stage) {
super.start(stage)
setIcon()
}
override fun start(stage: Stage) {
super.start(stage)
setIcon()
}
private fun setIcon() {
// Set icon for windows
for (iconSize in listOf(16, 32, 48, 256)) {
addStageIcon(Image(this.javaClass.getResourceAsStream("/icon-$iconSize.png")))
}
private fun setIcon() {
// Set icon for windows
for (iconSize in listOf(16, 32, 48, 256)) {
addStageIcon(Image(this.javaClass.getResourceAsStream("/icon-$iconSize.png")))
}
// OS X requires the dock icon to be changed separately.
// OS X requires the dock icon to be changed separately.
// Not all JDKs contain the class com.apple.eawt.Application, so we have to use reflection.
val classLoader = this.javaClass.classLoader
try {
@ -37,6 +37,6 @@ class MainApp : App(MainView::class) {
} catch (e: Exception) {
// Works only on OS X
}
}
}
}

View File

@ -13,51 +13,51 @@ import java.nio.file.Paths
import java.util.concurrent.ExecutorService
class MainModel(private val executor: ExecutorService) {
val filePathStringProperty = SimpleStringProperty(getDefaultPathString()).alsoListen { value ->
filePathError = getExceptionMessage {
animationFileModel = null
if (value.isNullOrBlank()) {
throw EndUserException("No input file specified.")
}
val filePathStringProperty = SimpleStringProperty(getDefaultPathString()).alsoListen { value ->
filePathError = getExceptionMessage {
animationFileModel = null
if (value.isNullOrBlank()) {
throw EndUserException("No input file specified.")
}
val path = try {
val trimmed = value.removeSurrounding("\"")
Paths.get(trimmed)
} catch (e: InvalidPathException) {
throw EndUserException("Not a valid file path.")
}
val path = try {
val trimmed = value.removeSurrounding("\"")
Paths.get(trimmed)
} catch (e: InvalidPathException) {
throw EndUserException("Not a valid file path.")
}
if (!Files.exists(path)) {
throw EndUserException("File does not exist.")
}
if (!Files.exists(path)) {
throw EndUserException("File does not exist.")
}
animationFileModel = AnimationFileModel(this, path, executor)
}
}
animationFileModel = AnimationFileModel(this, path, executor)
}
}
val filePathErrorProperty = SimpleStringProperty()
private var filePathError: String? by filePathErrorProperty
val filePathErrorProperty = SimpleStringProperty()
private var filePathError: String? by filePathErrorProperty
val animationFileModelProperty = SimpleObjectProperty<AnimationFileModel?>()
var animationFileModel by animationFileModelProperty
private set
val animationFileModelProperty = SimpleObjectProperty<AnimationFileModel?>()
var animationFileModel by animationFileModelProperty
private set
val recognizersProperty = SimpleObjectProperty<ObservableList<Recognizer>>(FXCollections.observableArrayList(
Recognizer("pocketSphinx", "PocketSphinx (use for English recordings)"),
Recognizer("phonetic", "Phonetic (use for non-English recordings)")
))
private var recognizers: ObservableList<Recognizer> by recognizersProperty
val recognizersProperty = SimpleObjectProperty<ObservableList<Recognizer>>(FXCollections.observableArrayList(
Recognizer("pocketSphinx", "PocketSphinx (use for English recordings)"),
Recognizer("phonetic", "Phonetic (use for non-English recordings)")
))
private var recognizers: ObservableList<Recognizer> by recognizersProperty
val recognizerProperty = SimpleObjectProperty<Recognizer>(recognizers[0])
var recognizer: Recognizer by recognizerProperty
val recognizerProperty = SimpleObjectProperty<Recognizer>(recognizers[0])
var recognizer: Recognizer by recognizerProperty
val animationPrefixProperty = SimpleStringProperty("say_")
var animationPrefix: String by animationPrefixProperty
val animationPrefixProperty = SimpleStringProperty("say_")
var animationPrefix: String by animationPrefixProperty
val animationSuffixProperty = SimpleStringProperty("")
var animationSuffix: String by animationSuffixProperty
val animationSuffixProperty = SimpleStringProperty("")
var animationSuffix: String by animationSuffixProperty
private fun getDefaultPathString() = FX.application.parameters.raw.firstOrNull()
private fun getDefaultPathString() = FX.application.parameters.raw.firstOrNull()
}
class Recognizer(val value: String, val description: String)

View File

@ -23,235 +23,235 @@ import java.io.File
import java.util.concurrent.Executors
class MainView : View() {
private val executor = Executors.newSingleThreadExecutor()
private val mainModel = MainModel(executor)
private val executor = Executors.newSingleThreadExecutor()
private val mainModel = MainModel(executor)
init {
title = "Rhubarb Lip Sync for Spine"
}
init {
title = "Rhubarb Lip Sync for Spine"
}
override val root = form {
var filePathTextField: TextField? = null
var filePathButton: Button? = null
override val root = form {
var filePathTextField: TextField? = null
var filePathButton: Button? = null
val fileModelProperty = mainModel.animationFileModelProperty
val fileModelProperty = mainModel.animationFileModelProperty
minWidth = 800.0
prefWidth = 1000.0
fieldset("Settings") {
disableProperty().bind(fileModelProperty.select { it!!.busyProperty })
field("Spine JSON file") {
filePathTextField = textfield {
textProperty().bindBidirectional(mainModel.filePathStringProperty)
errorProperty().bind(mainModel.filePathErrorProperty)
}
filePathButton = button("...")
}
field("Mouth slot") {
combobox<String> {
itemsProperty().bind(fileModelProperty.select { it!!.slotsProperty })
valueProperty().bindBidirectional(fileModelProperty.select { it!!.mouthSlotProperty })
errorProperty().bind(fileModelProperty.select { it!!.mouthSlotErrorProperty })
}
}
field("Mouth naming") {
label {
textProperty().bind(
fileModelProperty
.select { it!!.mouthNamingProperty }
.select { SimpleStringProperty(it.displayString) }
)
}
}
field("Mouth shapes") {
hbox {
errorProperty().bind(fileModelProperty.select { it!!.mouthShapesErrorProperty })
gridpane {
hgap = 10.0
vgap = 3.0
row {
label("Basic:")
for (shape in MouthShape.basicShapes) {
renderShapeCheckbox(shape, fileModelProperty, this)
}
}
row {
label("Extended:")
for (shape in MouthShape.extendedShapes) {
renderShapeCheckbox(shape, fileModelProperty, this)
}
}
}
}
}
field("Dialog recognizer") {
combobox<Recognizer> {
itemsProperty().bind(mainModel.recognizersProperty)
this.converter = object : StringConverter<Recognizer>() {
override fun toString(recognizer: Recognizer?): String {
return recognizer?.description ?: ""
}
override fun fromString(string: String?): Recognizer {
throw NotImplementedError()
}
}
valueProperty().bindBidirectional(mainModel.recognizerProperty)
}
}
field("Animation naming") {
textfield {
maxWidth = 100.0
textProperty().bindBidirectional(mainModel.animationPrefixProperty)
}
label("<audio event name>")
textfield {
maxWidth = 100.0
textProperty().bindBidirectional(mainModel.animationSuffixProperty)
}
}
}
fieldset("Audio events") {
tableview<AudioFileModel> {
placeholder = Label("There are no events with associated audio files.")
columnResizePolicy = SmartResize.POLICY
column("Event", AudioFileModel::eventNameProperty)
.weightedWidth(1.0)
column("Animation name", AudioFileModel::animationNameProperty)
.weightedWidth(1.0)
column("Audio file", AudioFileModel::displayFilePathProperty)
.weightedWidth(1.0)
column("Dialog", AudioFileModel::dialogProperty).apply {
weightedWidth(3.0)
// Make dialog column wrap
setCellFactory { tableColumn ->
return@setCellFactory TableCell<AudioFileModel, String>().also { cell ->
cell.graphic = Text().apply {
textProperty().bind(cell.itemProperty())
fillProperty().bind(cell.textFillProperty())
val widthProperty = tableColumn.widthProperty()
.minus(cell.paddingLeftProperty)
.minus(cell.paddingRightProperty)
wrappingWidthProperty().bind(widthProperty)
}
cell.prefHeight = Control.USE_COMPUTED_SIZE
}
}
}
column("Status", AudioFileModel::audioFileStateProperty).apply {
weightedWidth(1.0)
setCellFactory {
return@setCellFactory object : TableCell<AudioFileModel, AudioFileState>() {
override fun updateItem(state: AudioFileState?, empty: Boolean) {
super.updateItem(state, empty)
graphic = if (state != null) {
when (state.status) {
AudioFileStatus.NotAnimated -> Text("Not animated").apply {
fill = Color.GRAY
}
AudioFileStatus.Pending,
AudioFileStatus.Animating -> HBox().apply {
val progress: Double? = state.progress
val indeterminate = -1.0
val bar = progressbar(progress ?: indeterminate) {
maxWidth = Double.MAX_VALUE
}
HBox.setHgrow(bar, Priority.ALWAYS)
hbox {
minWidth = 30.0
if (progress != null) {
text("${(progress * 100).toInt()}%") {
alignment = Pos.BASELINE_RIGHT
}
}
}
}
AudioFileStatus.Canceling -> Text("Canceling")
AudioFileStatus.Done -> Text("Done").apply {
font = Font.font(font.family, FontWeight.BOLD, font.size)
}
}
} else null
}
}
}
}
column("", AudioFileModel::actionLabelProperty).apply {
weightedWidth(1.0)
// Show button
setCellFactory {
return@setCellFactory object : TableCell<AudioFileModel, String>() {
override fun updateItem(item: String?, empty: Boolean) {
super.updateItem(item, empty)
graphic = if (!empty)
Button(item).apply {
this.maxWidth = Double.MAX_VALUE
setOnAction {
val audioFileModel = this@tableview.items[index]
audioFileModel.performAction()
}
val invalidProperty: Property<Boolean> = fileModelProperty
.select { it!!.validProperty }
.select { SimpleBooleanProperty(!it) }
disableProperty().bind(invalidProperty)
}
else
null
}
}
}
}
itemsProperty().bind(fileModelProperty.select { it!!.audioFileModelsProperty })
}
}
minWidth = 800.0
prefWidth = 1000.0
fieldset("Settings") {
disableProperty().bind(fileModelProperty.select { it!!.busyProperty })
field("Spine JSON file") {
filePathTextField = textfield {
textProperty().bindBidirectional(mainModel.filePathStringProperty)
errorProperty().bind(mainModel.filePathErrorProperty)
}
filePathButton = button("...")
}
field("Mouth slot") {
combobox<String> {
itemsProperty().bind(fileModelProperty.select { it!!.slotsProperty })
valueProperty().bindBidirectional(fileModelProperty.select { it!!.mouthSlotProperty })
errorProperty().bind(fileModelProperty.select { it!!.mouthSlotErrorProperty })
}
}
field("Mouth naming") {
label {
textProperty().bind(
fileModelProperty
.select { it!!.mouthNamingProperty }
.select { SimpleStringProperty(it.displayString) }
)
}
}
field("Mouth shapes") {
hbox {
errorProperty().bind(fileModelProperty.select { it!!.mouthShapesErrorProperty })
gridpane {
hgap = 10.0
vgap = 3.0
row {
label("Basic:")
for (shape in MouthShape.basicShapes) {
renderShapeCheckbox(shape, fileModelProperty, this)
}
}
row {
label("Extended:")
for (shape in MouthShape.extendedShapes) {
renderShapeCheckbox(shape, fileModelProperty, this)
}
}
}
}
}
field("Dialog recognizer") {
combobox<Recognizer> {
itemsProperty().bind(mainModel.recognizersProperty)
this.converter = object : StringConverter<Recognizer>() {
override fun toString(recognizer: Recognizer?): String {
return recognizer?.description ?: ""
}
override fun fromString(string: String?): Recognizer {
throw NotImplementedError()
}
}
valueProperty().bindBidirectional(mainModel.recognizerProperty)
}
}
field("Animation naming") {
textfield {
maxWidth = 100.0
textProperty().bindBidirectional(mainModel.animationPrefixProperty)
}
label("<audio event name>")
textfield {
maxWidth = 100.0
textProperty().bindBidirectional(mainModel.animationSuffixProperty)
}
}
}
fieldset("Audio events") {
tableview<AudioFileModel> {
placeholder = Label("There are no events with associated audio files.")
columnResizePolicy = SmartResize.POLICY
column("Event", AudioFileModel::eventNameProperty)
.weightedWidth(1.0)
column("Animation name", AudioFileModel::animationNameProperty)
.weightedWidth(1.0)
column("Audio file", AudioFileModel::displayFilePathProperty)
.weightedWidth(1.0)
column("Dialog", AudioFileModel::dialogProperty).apply {
weightedWidth(3.0)
// Make dialog column wrap
setCellFactory { tableColumn ->
return@setCellFactory TableCell<AudioFileModel, String>().also { cell ->
cell.graphic = Text().apply {
textProperty().bind(cell.itemProperty())
fillProperty().bind(cell.textFillProperty())
val widthProperty = tableColumn.widthProperty()
.minus(cell.paddingLeftProperty)
.minus(cell.paddingRightProperty)
wrappingWidthProperty().bind(widthProperty)
}
cell.prefHeight = Control.USE_COMPUTED_SIZE
}
}
}
column("Status", AudioFileModel::audioFileStateProperty).apply {
weightedWidth(1.0)
setCellFactory {
return@setCellFactory object : TableCell<AudioFileModel, AudioFileState>() {
override fun updateItem(state: AudioFileState?, empty: Boolean) {
super.updateItem(state, empty)
graphic = if (state != null) {
when (state.status) {
AudioFileStatus.NotAnimated -> Text("Not animated").apply {
fill = Color.GRAY
}
AudioFileStatus.Pending,
AudioFileStatus.Animating -> HBox().apply {
val progress: Double? = state.progress
val indeterminate = -1.0
val bar = progressbar(progress ?: indeterminate) {
maxWidth = Double.MAX_VALUE
}
HBox.setHgrow(bar, Priority.ALWAYS)
hbox {
minWidth = 30.0
if (progress != null) {
text("${(progress * 100).toInt()}%") {
alignment = Pos.BASELINE_RIGHT
}
}
}
}
AudioFileStatus.Canceling -> Text("Canceling")
AudioFileStatus.Done -> Text("Done").apply {
font = Font.font(font.family, FontWeight.BOLD, font.size)
}
}
} else null
}
}
}
}
column("", AudioFileModel::actionLabelProperty).apply {
weightedWidth(1.0)
// Show button
setCellFactory {
return@setCellFactory object : TableCell<AudioFileModel, String>() {
override fun updateItem(item: String?, empty: Boolean) {
super.updateItem(item, empty)
graphic = if (!empty)
Button(item).apply {
this.maxWidth = Double.MAX_VALUE
setOnAction {
val audioFileModel = this@tableview.items[index]
audioFileModel.performAction()
}
val invalidProperty: Property<Boolean> = fileModelProperty
.select { it!!.validProperty }
.select { SimpleBooleanProperty(!it) }
disableProperty().bind(invalidProperty)
}
else
null
}
}
}
}
itemsProperty().bind(fileModelProperty.select { it!!.audioFileModelsProperty })
}
}
onDragOver = EventHandler<DragEvent> { event ->
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
event.acceptTransferModes(TransferMode.COPY)
event.consume()
}
}
onDragDropped = EventHandler<DragEvent> { event ->
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
filePathTextField!!.text = event.dragboard.files.firstOrNull()?.path
event.isDropCompleted = true
event.consume()
}
}
onDragOver = EventHandler<DragEvent> { event ->
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
event.acceptTransferModes(TransferMode.COPY)
event.consume()
}
}
onDragDropped = EventHandler<DragEvent> { event ->
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
filePathTextField!!.text = event.dragboard.files.firstOrNull()?.path
event.isDropCompleted = true
event.consume()
}
}
whenUndocked {
executor.shutdownNow()
}
whenUndocked {
executor.shutdownNow()
}
filePathButton!!.onAction = EventHandler<ActionEvent> {
val fileChooser = FileChooser().apply {
title = "Open Spine JSON file"
extensionFilters.addAll(
FileChooser.ExtensionFilter("Spine JSON file (*.json)", "*.json"),
FileChooser.ExtensionFilter("All files (*.*)", "*.*")
)
val lastDirectory = filePathTextField!!.text?.let { File(it).parentFile }
if (lastDirectory != null && lastDirectory.isDirectory) {
initialDirectory = lastDirectory
}
}
val file = fileChooser.showOpenDialog(this@MainView.primaryStage)
if (file != null) {
filePathTextField!!.text = file.path
}
}
}
filePathButton!!.onAction = EventHandler<ActionEvent> {
val fileChooser = FileChooser().apply {
title = "Open Spine JSON file"
extensionFilters.addAll(
FileChooser.ExtensionFilter("Spine JSON file (*.json)", "*.json"),
FileChooser.ExtensionFilter("All files (*.*)", "*.*")
)
val lastDirectory = filePathTextField!!.text?.let { File(it).parentFile }
if (lastDirectory != null && lastDirectory.isDirectory) {
initialDirectory = lastDirectory
}
}
val file = fileChooser.showOpenDialog(this@MainView.primaryStage)
if (file != null) {
filePathTextField!!.text = file.path
}
}
}
private fun renderShapeCheckbox(shape: MouthShape, fileModelProperty: SimpleObjectProperty<AnimationFileModel?>, parent: EventTarget) {
parent.label {
textProperty().bind(
fileModelProperty
.select { it!!.mouthShapesProperty }
.select { mouthShapes ->
val hairSpace = "\u200A"
val result = shape.toString() + hairSpace + if (mouthShapes.contains(shape)) "" else ""
return@select SimpleStringProperty(result)
}
)
}
}
private fun renderShapeCheckbox(shape: MouthShape, fileModelProperty: SimpleObjectProperty<AnimationFileModel?>, parent: EventTarget) {
parent.label {
textProperty().bind(
fileModelProperty
.select { it!!.mouthShapesProperty }
.select { mouthShapes ->
val hairSpace = "\u200A"
val result = shape.toString() + hairSpace + if (mouthShapes.contains(shape)) "" else ""
return@select SimpleStringProperty(result)
}
)
}
}
}

View File

@ -4,52 +4,52 @@ import java.util.*
class MouthNaming(private val prefix: String, private val suffix: String, private val mouthShapeCasing: MouthShapeCasing) {
companion object {
fun guess(mouthNames: List<String>): MouthNaming {
if (mouthNames.isEmpty()) {
return MouthNaming("", "", guessMouthShapeCasing(""))
}
companion object {
fun guess(mouthNames: List<String>): MouthNaming {
if (mouthNames.isEmpty()) {
return MouthNaming("", "", guessMouthShapeCasing(""))
}
val commonPrefix = mouthNames.commonPrefix
val commonSuffix = mouthNames.commonSuffix
val firstMouthName = mouthNames.first()
if (commonPrefix.length + commonSuffix.length >= firstMouthName.length) {
return MouthNaming(commonPrefix, "", guessMouthShapeCasing(""))
}
val commonPrefix = mouthNames.commonPrefix
val commonSuffix = mouthNames.commonSuffix
val firstMouthName = mouthNames.first()
if (commonPrefix.length + commonSuffix.length >= firstMouthName.length) {
return MouthNaming(commonPrefix, "", guessMouthShapeCasing(""))
}
val shapeName = firstMouthName.substring(
commonPrefix.length,
firstMouthName.length - commonSuffix.length)
val mouthShapeCasing = guessMouthShapeCasing(shapeName)
return MouthNaming(commonPrefix, commonSuffix, mouthShapeCasing)
}
val shapeName = firstMouthName.substring(
commonPrefix.length,
firstMouthName.length - commonSuffix.length)
val mouthShapeCasing = guessMouthShapeCasing(shapeName)
return MouthNaming(commonPrefix, commonSuffix, mouthShapeCasing)
}
private fun guessMouthShapeCasing(shapeName: String): MouthShapeCasing {
return if (shapeName.isBlank() || shapeName[0].isLowerCase())
MouthShapeCasing.Lower
else
MouthShapeCasing.Upper
}
}
private fun guessMouthShapeCasing(shapeName: String): MouthShapeCasing {
return if (shapeName.isBlank() || shapeName[0].isLowerCase())
MouthShapeCasing.Lower
else
MouthShapeCasing.Upper
}
}
fun getName(mouthShape: MouthShape): String {
val name = if (mouthShapeCasing == MouthShapeCasing.Upper)
mouthShape.toString()
else
mouthShape.toString().toLowerCase(Locale.ROOT)
return "$prefix$name$suffix"
}
fun getName(mouthShape: MouthShape): String {
val name = if (mouthShapeCasing == MouthShapeCasing.Upper)
mouthShape.toString()
else
mouthShape.toString().toLowerCase(Locale.ROOT)
return "$prefix$name$suffix"
}
val displayString: String get() {
val casing = if (mouthShapeCasing == MouthShapeCasing.Upper)
"<UPPER-CASE SHAPE NAME>"
else
"<lower-case shape name>"
return "\"$prefix$casing$suffix\""
}
val displayString: String get() {
val casing = if (mouthShapeCasing == MouthShapeCasing.Upper)
"<UPPER-CASE SHAPE NAME>"
else
"<lower-case shape name>"
return "\"$prefix$casing$suffix\""
}
}
enum class MouthShapeCasing {
Upper,
Lower
Upper,
Lower
}

View File

@ -1,19 +1,19 @@
package com.rhubarb_lip_sync.rhubarb_for_spine
enum class MouthShape {
A, B, C, D, E, F, G, H, X;
A, B, C, D, E, F, G, H, X;
val isBasic: Boolean
get() = this.ordinal < basicShapeCount
val isBasic: Boolean
get() = this.ordinal < basicShapeCount
val isExtended: Boolean
get() = !this.isBasic
val isExtended: Boolean
get() = !this.isBasic
companion object {
const val basicShapeCount = 6
companion object {
const val basicShapeCount = 6
val basicShapes = MouthShape.values().take(basicShapeCount)
val basicShapes = MouthShape.values().take(basicShapeCount)
val extendedShapes = MouthShape.values().drop(basicShapeCount)
}
val extendedShapes = MouthShape.values().drop(basicShapeCount)
}
}

View File

@ -10,157 +10,157 @@ import java.nio.file.Path
import java.util.concurrent.Callable
class RhubarbTask(
val audioFilePath: Path,
val recognizer: String,
val dialog: String?,
val extendedMouthShapes: Set<MouthShape>,
val reportProgress: (Double?) -> Unit
val audioFilePath: Path,
val recognizer: String,
val dialog: String?,
val extendedMouthShapes: Set<MouthShape>,
val reportProgress: (Double?) -> Unit
) : Callable<List<MouthCue>> {
override fun call(): List<MouthCue> {
if (Thread.currentThread().isInterrupted) {
throw InterruptedException()
}
if (!Files.exists(audioFilePath)) {
throw EndUserException("File '$audioFilePath' does not exist.")
}
override fun call(): List<MouthCue> {
if (Thread.currentThread().isInterrupted) {
throw InterruptedException()
}
if (!Files.exists(audioFilePath)) {
throw EndUserException("File '$audioFilePath' does not exist.")
}
val dialogFile = if (dialog != null) TemporaryTextFile(dialog) else null
val outputFile = TemporaryTextFile()
dialogFile.use { outputFile.use {
val processBuilder = ProcessBuilder(createProcessBuilderArgs(dialogFile?.filePath)).apply {
// See http://java-monitor.com/forum/showthread.php?t=4067
redirectOutput(outputFile.filePath.toFile())
}
val process: Process = processBuilder.start()
val stderr = BufferedReader(InputStreamReader(process.errorStream, StandardCharsets.UTF_8))
try {
while (true) {
val line = stderr.interruptibleReadLine()
val message = parseJsonObject(line)
when (message.string("type")!!) {
"progress" -> {
reportProgress(message.double("value")!!)
}
"success" -> {
reportProgress(1.0)
val resultString = String(Files.readAllBytes(outputFile.filePath), StandardCharsets.UTF_8)
return parseRhubarbResult(resultString)
}
"failure" -> {
throw EndUserException(message.string("reason") ?: "Rhubarb failed without reason.")
}
}
}
} catch (e: InterruptedException) {
process.destroyForcibly()
throw e
} catch (e: EOFException) {
throw EndUserException("Rhubarb terminated unexpectedly.")
} finally {
process.waitFor()
}
}}
val dialogFile = if (dialog != null) TemporaryTextFile(dialog) else null
val outputFile = TemporaryTextFile()
dialogFile.use { outputFile.use {
val processBuilder = ProcessBuilder(createProcessBuilderArgs(dialogFile?.filePath)).apply {
// See http://java-monitor.com/forum/showthread.php?t=4067
redirectOutput(outputFile.filePath.toFile())
}
val process: Process = processBuilder.start()
val stderr = BufferedReader(InputStreamReader(process.errorStream, StandardCharsets.UTF_8))
try {
while (true) {
val line = stderr.interruptibleReadLine()
val message = parseJsonObject(line)
when (message.string("type")!!) {
"progress" -> {
reportProgress(message.double("value")!!)
}
"success" -> {
reportProgress(1.0)
val resultString = String(Files.readAllBytes(outputFile.filePath), StandardCharsets.UTF_8)
return parseRhubarbResult(resultString)
}
"failure" -> {
throw EndUserException(message.string("reason") ?: "Rhubarb failed without reason.")
}
}
}
} catch (e: InterruptedException) {
process.destroyForcibly()
throw e
} catch (e: EOFException) {
throw EndUserException("Rhubarb terminated unexpectedly.")
} finally {
process.waitFor()
}
}}
throw EndUserException("Audio file processing terminated in an unexpected way.")
}
throw EndUserException("Audio file processing terminated in an unexpected way.")
}
private fun parseRhubarbResult(jsonString: String): List<MouthCue> {
val json = parseJsonObject(jsonString)
val mouthCues = json.array<JsonObject>("mouthCues")!!
return mouthCues.map { mouthCue ->
val time = mouthCue.double("start")!!
val mouthShape = MouthShape.valueOf(mouthCue.string("value")!!)
return@map MouthCue(time, mouthShape)
}
}
private fun parseRhubarbResult(jsonString: String): List<MouthCue> {
val json = parseJsonObject(jsonString)
val mouthCues = json.array<JsonObject>("mouthCues")!!
return mouthCues.map { mouthCue ->
val time = mouthCue.double("start")!!
val mouthShape = MouthShape.valueOf(mouthCue.string("value")!!)
return@map MouthCue(time, mouthShape)
}
}
private val jsonParser = JsonParser.default()
private fun parseJsonObject(jsonString: String): JsonObject {
return jsonParser.parse(StringReader(jsonString)) as JsonObject
}
private val jsonParser = JsonParser.default()
private fun parseJsonObject(jsonString: String): JsonObject {
return jsonParser.parse(StringReader(jsonString)) as JsonObject
}
private fun createProcessBuilderArgs(dialogFilePath: Path?): List<String> {
val extendedMouthShapesString =
if (extendedMouthShapes.any()) extendedMouthShapes.joinToString(separator = "")
else "\"\""
return mutableListOf(
rhubarbBinFilePath.toString(),
"--machineReadable",
"--recognizer", recognizer,
"--exportFormat", "json",
"--extendedShapes", extendedMouthShapesString
).apply {
if (dialogFilePath != null) {
addAll(listOf(
"--dialogFile", dialogFilePath.toString()
))
}
}.apply {
add(audioFilePath.toString())
}
}
private fun createProcessBuilderArgs(dialogFilePath: Path?): List<String> {
val extendedMouthShapesString =
if (extendedMouthShapes.any()) extendedMouthShapes.joinToString(separator = "")
else "\"\""
return mutableListOf(
rhubarbBinFilePath.toString(),
"--machineReadable",
"--recognizer", recognizer,
"--exportFormat", "json",
"--extendedShapes", extendedMouthShapesString
).apply {
if (dialogFilePath != null) {
addAll(listOf(
"--dialogFile", dialogFilePath.toString()
))
}
}.apply {
add(audioFilePath.toString())
}
}
private val guiBinDirectory: Path by lazy {
val path = urlToPath(getLocation(RhubarbTask::class.java))
return@lazy if (Files.isDirectory(path)) path.parent else path
}
private val guiBinDirectory: Path by lazy {
val path = urlToPath(getLocation(RhubarbTask::class.java))
return@lazy if (Files.isDirectory(path)) path.parent else path
}
private val rhubarbBinFilePath: Path by lazy {
val rhubarbBinName = if (IS_OS_WINDOWS) "rhubarb.exe" else "rhubarb"
var currentDirectory: Path? = guiBinDirectory
while (currentDirectory != null) {
val candidate: Path = currentDirectory.resolve(rhubarbBinName)
if (Files.exists(candidate)) {
return@lazy candidate
}
currentDirectory = currentDirectory.parent
}
throw EndUserException("Could not find Rhubarb Lip Sync executable '$rhubarbBinName'."
+ " Expected to find it in '$guiBinDirectory' or any directory above.")
}
private val rhubarbBinFilePath: Path by lazy {
val rhubarbBinName = if (IS_OS_WINDOWS) "rhubarb.exe" else "rhubarb"
var currentDirectory: Path? = guiBinDirectory
while (currentDirectory != null) {
val candidate: Path = currentDirectory.resolve(rhubarbBinName)
if (Files.exists(candidate)) {
return@lazy candidate
}
currentDirectory = currentDirectory.parent
}
throw EndUserException("Could not find Rhubarb Lip Sync executable '$rhubarbBinName'."
+ " Expected to find it in '$guiBinDirectory' or any directory above.")
}
private class TemporaryTextFile(text: String = "") : AutoCloseable {
val filePath: Path = Files.createTempFile(null, null).also {
Files.write(it, text.toByteArray(StandardCharsets.UTF_8))
}
private class TemporaryTextFile(text: String = "") : AutoCloseable {
val filePath: Path = Files.createTempFile(null, null).also {
Files.write(it, text.toByteArray(StandardCharsets.UTF_8))
}
override fun close() {
Files.delete(filePath)
}
override fun close() {
Files.delete(filePath)
}
}
}
// Same as readLine, but can be interrupted.
// Note that this function handles linebreak characters differently from readLine.
// It only consumes the first linebreak character before returning and swallows any leading
// linebreak characters.
// This behavior is much easier to implement and doesn't make any difference for our purposes.
private fun BufferedReader.interruptibleReadLine(): String {
val result = StringBuilder()
while (true) {
val char = interruptibleReadChar()
if (char == '\r' || char == '\n') {
if (result.isNotEmpty()) return result.toString()
} else {
result.append(char)
}
}
}
// Same as readLine, but can be interrupted.
// Note that this function handles linebreak characters differently from readLine.
// It only consumes the first linebreak character before returning and swallows any leading
// linebreak characters.
// This behavior is much easier to implement and doesn't make any difference for our purposes.
private fun BufferedReader.interruptibleReadLine(): String {
val result = StringBuilder()
while (true) {
val char = interruptibleReadChar()
if (char == '\r' || char == '\n') {
if (result.isNotEmpty()) return result.toString()
} else {
result.append(char)
}
}
}
private fun BufferedReader.interruptibleReadChar(): Char {
while (true) {
if (Thread.currentThread().isInterrupted) {
throw InterruptedException()
}
if (ready()) {
val result: Int = read()
if (result == -1) {
throw EOFException()
}
return result.toChar()
}
Thread.yield()
}
}
private fun BufferedReader.interruptibleReadChar(): Char {
while (true) {
if (Thread.currentThread().isInterrupted) {
throw InterruptedException()
}
if (ready()) {
val result: Int = read()
if (result == -1) {
throw EOFException()
}
return result.toChar()
}
Thread.yield()
}
}
}

View File

@ -7,157 +7,157 @@ import java.nio.file.Files
import java.nio.file.Path
class SpineJson(private val filePath: Path) {
private val fileDirectoryPath: Path = filePath.parent
private val json: JsonObject
private val skeleton: JsonObject
private val fileDirectoryPath: Path = filePath.parent
private val json: JsonObject
private val skeleton: JsonObject
init {
if (!Files.exists(filePath)) {
throw EndUserException("File '$filePath' does not exist.")
}
try {
json = Parser.default().parse(filePath.toString()) as JsonObject
} catch (e: Exception) {
throw EndUserException("Wrong file format. This is not a valid JSON file.")
}
skeleton = json.obj("skeleton") ?: throw EndUserException("JSON file is corrupted.")
init {
if (!Files.exists(filePath)) {
throw EndUserException("File '$filePath' does not exist.")
}
try {
json = Parser.default().parse(filePath.toString()) as JsonObject
} catch (e: Exception) {
throw EndUserException("Wrong file format. This is not a valid JSON file.")
}
skeleton = json.obj("skeleton") ?: throw EndUserException("JSON file is corrupted.")
validateProperties()
}
validateProperties()
}
private fun validateProperties() {
imagesDirectoryPath
audioDirectoryPath
}
private fun validateProperties() {
imagesDirectoryPath
audioDirectoryPath
}
private val imagesDirectoryPath: Path get() {
val relativeImagesDirectory = skeleton.string("images")
?: throw EndUserException("JSON file is incomplete: Images path is missing."
+ " Make sure to check 'Nonessential data' when exporting.")
private val imagesDirectoryPath: Path get() {
val relativeImagesDirectory = skeleton.string("images")
?: throw EndUserException("JSON file is incomplete: Images path is missing."
+ " Make sure to check 'Nonessential data' when exporting.")
val imagesDirectoryPath = fileDirectoryPath.resolve(relativeImagesDirectory).normalize()
if (!Files.exists(imagesDirectoryPath)) {
throw EndUserException("Could not find images directory relative to the JSON file."
+ " Make sure the JSON file is in the same directory as the original Spine file.")
}
val imagesDirectoryPath = fileDirectoryPath.resolve(relativeImagesDirectory).normalize()
if (!Files.exists(imagesDirectoryPath)) {
throw EndUserException("Could not find images directory relative to the JSON file."
+ " Make sure the JSON file is in the same directory as the original Spine file.")
}
return imagesDirectoryPath
}
return imagesDirectoryPath
}
val audioDirectoryPath: Path get() {
val relativeAudioDirectory = skeleton.string("audio")
?: throw EndUserException("JSON file is incomplete: Audio path is missing."
+ " Make sure to check 'Nonessential data' when exporting.")
val audioDirectoryPath: Path get() {
val relativeAudioDirectory = skeleton.string("audio")
?: throw EndUserException("JSON file is incomplete: Audio path is missing."
+ " Make sure to check 'Nonessential data' when exporting.")
val audioDirectoryPath = fileDirectoryPath.resolve(relativeAudioDirectory).normalize()
if (!Files.exists(audioDirectoryPath)) {
throw EndUserException("Could not find audio directory relative to the JSON file."
+ " Make sure the JSON file is in the same directory as the original Spine file.")
}
val audioDirectoryPath = fileDirectoryPath.resolve(relativeAudioDirectory).normalize()
if (!Files.exists(audioDirectoryPath)) {
throw EndUserException("Could not find audio directory relative to the JSON file."
+ " Make sure the JSON file is in the same directory as the original Spine file.")
}
return audioDirectoryPath
}
return audioDirectoryPath
}
val frameRate: Double get() {
return skeleton.double("fps") ?: 30.0
}
val frameRate: Double get() {
return skeleton.double("fps") ?: 30.0
}
val slots: List<String> get() {
val slots = json.array("slots") ?: listOf<JsonObject>()
return slots.mapNotNull { it.string("name") }
}
val slots: List<String> get() {
val slots = json.array("slots") ?: listOf<JsonObject>()
return slots.mapNotNull { it.string("name") }
}
fun guessMouthSlot(): String? {
return slots.firstOrNull { it.contains("mouth", ignoreCase = true) }
?: slots.firstOrNull()
}
fun guessMouthSlot(): String? {
return slots.firstOrNull { it.contains("mouth", ignoreCase = true) }
?: slots.firstOrNull()
}
data class AudioEvent(val name: String, val relativeAudioFilePath: String, val dialog: String?)
data class AudioEvent(val name: String, val relativeAudioFilePath: String, val dialog: String?)
val audioEvents: List<AudioEvent> get() {
val events = json.obj("events") ?: JsonObject()
val result = mutableListOf<AudioEvent>()
for ((name, value) in events) {
if (value !is JsonObject) throw EndUserException("Invalid event found.")
val audioEvents: List<AudioEvent> get() {
val events = json.obj("events") ?: JsonObject()
val result = mutableListOf<AudioEvent>()
for ((name, value) in events) {
if (value !is JsonObject) throw EndUserException("Invalid event found.")
val relativeAudioFilePath = value.string("audio") ?: continue
val relativeAudioFilePath = value.string("audio") ?: continue
val dialog = value.string("string")
result.add(AudioEvent(name, relativeAudioFilePath, dialog))
}
return result
}
val dialog = value.string("string")
result.add(AudioEvent(name, relativeAudioFilePath, dialog))
}
return result
}
fun getSlotAttachmentNames(slotName: String): List<String> {
@Suppress("UNCHECKED_CAST")
val skins: Collection<JsonObject> = when (val skinsObject = json["skins"]) {
is JsonObject -> skinsObject.values as Collection<JsonObject>
is JsonArray<*> -> skinsObject as Collection<JsonObject>
else -> emptyList()
}
fun getSlotAttachmentNames(slotName: String): List<String> {
@Suppress("UNCHECKED_CAST")
val skins: Collection<JsonObject> = when (val skinsObject = json["skins"]) {
is JsonObject -> skinsObject.values as Collection<JsonObject>
is JsonArray<*> -> skinsObject as Collection<JsonObject>
else -> emptyList()
}
// Get attachment names for all skins
return skins
.flatMap { skin ->
skin.obj(slotName)?.keys?.toList()
?: skin.obj("attachments")?.obj(slotName)?.keys?.toList()
?: emptyList<String>()
}
.distinct()
}
// Get attachment names for all skins
return skins
.flatMap { skin ->
skin.obj(slotName)?.keys?.toList()
?: skin.obj("attachments")?.obj(slotName)?.keys?.toList()
?: emptyList<String>()
}
.distinct()
}
val animationNames = observableSet<String>(
json.obj("animations")?.map{ it.key }?.toMutableSet() ?: mutableSetOf()
)
val animationNames = observableSet<String>(
json.obj("animations")?.map{ it.key }?.toMutableSet() ?: mutableSetOf()
)
fun createOrUpdateAnimation(mouthCues: List<MouthCue>, eventName: String, animationName: String,
mouthSlot: String, mouthNaming: MouthNaming
) {
if (!json.containsKey("animations")) {
json["animations"] = JsonObject()
}
val animations: JsonObject = json.obj("animations")!!
fun createOrUpdateAnimation(mouthCues: List<MouthCue>, eventName: String, animationName: String,
mouthSlot: String, mouthNaming: MouthNaming
) {
if (!json.containsKey("animations")) {
json["animations"] = JsonObject()
}
val animations: JsonObject = json.obj("animations")!!
// Round times to full frames. Always round down.
// If events coincide, prefer the latest one.
val keyframes = mutableMapOf<Int, MouthShape>()
for (mouthCue in mouthCues) {
val frameNumber = (mouthCue.time * frameRate).toInt()
keyframes[frameNumber] = mouthCue.mouthShape
}
// Round times to full frames. Always round down.
// If events coincide, prefer the latest one.
val keyframes = mutableMapOf<Int, MouthShape>()
for (mouthCue in mouthCues) {
val frameNumber = (mouthCue.time * frameRate).toInt()
keyframes[frameNumber] = mouthCue.mouthShape
}
animations[animationName] = JsonObject().apply {
this["slots"] = JsonObject().apply {
this[mouthSlot] = JsonObject().apply {
this["attachment"] = JsonArray(
keyframes
.toSortedMap()
.map { (frameNumber, mouthShape) ->
JsonObject().apply {
this["time"] = frameNumber / frameRate
this["name"] = mouthNaming.getName(mouthShape)
}
}
)
}
}
this["events"] = JsonArray(
JsonObject().apply {
this["time"] = 0.0
this["name"] = eventName
this["string"] = ""
}
)
}
animations[animationName] = JsonObject().apply {
this["slots"] = JsonObject().apply {
this[mouthSlot] = JsonObject().apply {
this["attachment"] = JsonArray(
keyframes
.toSortedMap()
.map { (frameNumber, mouthShape) ->
JsonObject().apply {
this["time"] = frameNumber / frameRate
this["name"] = mouthNaming.getName(mouthShape)
}
}
)
}
}
this["events"] = JsonArray(
JsonObject().apply {
this["time"] = 0.0
this["name"] = eventName
this["string"] = ""
}
)
}
animationNames.add(animationName)
}
animationNames.add(animationName)
}
override fun toString(): String {
return json.toJsonString(prettyPrint = true)
}
override fun toString(): String {
return json.toJsonString(prettyPrint = true)
}
fun save() {
Files.write(filePath, listOf(toString()), StandardCharsets.UTF_8)
}
fun save() {
Files.write(filePath, listOf(toString()), StandardCharsets.UTF_8)
}
}

View File

@ -24,37 +24,37 @@ import java.nio.file.Paths
* @param c The class whose location is desired.
*/
fun getLocation(c: Class<*>): URL {
// Try the easy way first
try {
val codeSourceLocation = c.protectionDomain.codeSource.location
if (codeSourceLocation != null) return codeSourceLocation
} catch (e: SecurityException) {
// Cannot access protection domain
} catch (e: NullPointerException) {
// Protection domain or code source is null
}
// Try the easy way first
try {
val codeSourceLocation = c.protectionDomain.codeSource.location
if (codeSourceLocation != null) return codeSourceLocation
} catch (e: SecurityException) {
// Cannot access protection domain
} catch (e: NullPointerException) {
// Protection domain or code source is null
}
// The easy way failed, so we try the hard way. We ask for the class
// itself as a resource, then strip the class's path from the URL string,
// leaving the base path.
// The easy way failed, so we try the hard way. We ask for the class
// itself as a resource, then strip the class's path from the URL string,
// leaving the base path.
// Get the class's raw resource path
val classResource = c.getResource(c.simpleName + ".class")
?: throw Exception("Cannot find class resource.")
// Get the class's raw resource path
val classResource = c.getResource(c.simpleName + ".class")
?: throw Exception("Cannot find class resource.")
val url = classResource.toString()
val suffix = c.canonicalName.replace('.', '/') + ".class"
if (!url.endsWith(suffix)) throw Exception("Malformed URL.")
val url = classResource.toString()
val suffix = c.canonicalName.replace('.', '/') + ".class"
if (!url.endsWith(suffix)) throw Exception("Malformed URL.")
// strip the class's path from the URL string
val base = url.substring(0, url.length - suffix.length)
// strip the class's path from the URL string
val base = url.substring(0, url.length - suffix.length)
var path = base
var path = base
// remove the "jar:" prefix and "!/" suffix, if present
if (path.startsWith("jar:")) path = path.substring(4, path.length - 2)
// remove the "jar:" prefix and "!/" suffix, if present
if (path.startsWith("jar:")) path = path.substring(4, path.length - 2)
return URL(path)
return URL(path)
}
/**
@ -64,29 +64,29 @@ fun getLocation(c: Class<*>): URL {
* @return A file path suitable for use with e.g. [FileInputStream]
*/
fun urlToPath(url: URL): Path {
var pathString = url.toString()
var pathString = url.toString()
if (pathString.startsWith("jar:")) {
// Remove "jar:" prefix and "!/" suffix
val index = pathString.indexOf("!/")
pathString = pathString.substring(4, index)
}
if (pathString.startsWith("jar:")) {
// Remove "jar:" prefix and "!/" suffix
val index = pathString.indexOf("!/")
pathString = pathString.substring(4, index)
}
try {
if (IS_OS_WINDOWS && pathString.matches("file:[A-Za-z]:.*".toRegex())) {
pathString = "file:/" + pathString.substring(5)
}
return Paths.get(URL(pathString).toURI())
} catch (e: MalformedURLException) {
// URL is not completely well-formed.
} catch (e: URISyntaxException) {
// URL is not completely well-formed.
}
try {
if (IS_OS_WINDOWS && pathString.matches("file:[A-Za-z]:.*".toRegex())) {
pathString = "file:/" + pathString.substring(5)
}
return Paths.get(URL(pathString).toURI())
} catch (e: MalformedURLException) {
// URL is not completely well-formed.
} catch (e: URISyntaxException) {
// URL is not completely well-formed.
}
if (pathString.startsWith("file:")) {
// Pass through the URL as-is, minus "file:" prefix
pathString = pathString.substring(5)
return Paths.get(pathString)
}
throw IllegalArgumentException("Invalid URL: $url")
if (pathString.startsWith("file:")) {
// Pass through the URL as-is, minus "file:" prefix
pathString = pathString.substring(5)
return Paths.get(pathString)
}
throw IllegalArgumentException("Invalid URL: $url")
}

View File

@ -3,5 +3,5 @@ package com.rhubarb_lip_sync.rhubarb_for_spine
import javafx.application.Application
fun main(args: Array<String>) {
Application.launch(MainApp::class.java, *args)
Application.launch(MainApp::class.java, *args)
}

View File

@ -8,31 +8,31 @@ import java.io.PrintWriter
import java.io.StringWriter
val List<String>.commonPrefix: String get() {
return if (isEmpty()) "" else this.reduce { result, string -> result.commonPrefixWith(string) }
return if (isEmpty()) "" else this.reduce { result, string -> result.commonPrefixWith(string) }
}
val List<String>.commonSuffix: String get() {
return if (isEmpty()) "" else this.reduce { result, string -> result.commonSuffixWith(string) }
return if (isEmpty()) "" else this.reduce { result, string -> result.commonSuffixWith(string) }
}
fun <TValue, TProperty : Property<TValue>> TProperty.alsoListen(listener: (TValue) -> Unit) : TProperty {
// Notify the listener of the initial value.
// If we did this synchronously, the listener's state would have to be fully initialized the
// moment this function is called. So calling this function during object initialization might
// result in access to uninitialized state.
Platform.runLater { listener(this.value) }
// Notify the listener of the initial value.
// If we did this synchronously, the listener's state would have to be fully initialized the
// moment this function is called. So calling this function during object initialization might
// result in access to uninitialized state.
Platform.runLater { listener(this.value) }
addListener({ _, _, newValue -> listener(newValue)})
return this
addListener({ _, _, newValue -> listener(newValue)})
return this
}
fun getExceptionMessage(action: () -> Unit): String? {
try {
action()
} catch (e: Exception) {
return e.message
}
return null
try {
action()
} catch (e: Exception) {
return e.message
}
return null
}
/**
@ -44,32 +44,32 @@ fun getExceptionMessage(action: () -> Unit): String? {
* @throws Throwable An exception occurred in the run method of the Runnable
*/
fun runAndWait(action: () -> Unit) {
if (Platform.isFxApplicationThread()) {
action()
} else {
val lock = ReentrantLock()
lock.withLock {
val doneCondition = lock.newCondition()
var throwable: Throwable? = null
Platform.runLater {
lock.withLock {
try {
action()
} catch (e: Throwable) {
throwable = e
} finally {
doneCondition.signal()
}
}
}
doneCondition.await()
throwable?.let { throw it }
}
}
if (Platform.isFxApplicationThread()) {
action()
} else {
val lock = ReentrantLock()
lock.withLock {
val doneCondition = lock.newCondition()
var throwable: Throwable? = null
Platform.runLater {
lock.withLock {
try {
action()
} catch (e: Throwable) {
throwable = e
} finally {
doneCondition.signal()
}
}
}
doneCondition.await()
throwable?.let { throw it }
}
}
}
fun getStackTrace(e: Exception): String {
val stringWriter = StringWriter()
e.printStackTrace(PrintWriter(stringWriter))
return stringWriter.toString()
val stringWriter = StringWriter()
e.printStackTrace(PrintWriter(stringWriter))
return stringWriter.toString()
}

View File

@ -7,63 +7,63 @@ import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.Assertions.catchThrowable
class SpineJsonTest {
@Nested
inner class `file format 3_7` {
@Test
fun `correctly reads valid file`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.7.json").toAbsolutePath()
val spine = SpineJson(path)
@Nested
inner class `file format 3_7` {
@Test
fun `correctly reads valid file`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.7.json").toAbsolutePath()
val spine = SpineJson(path)
assertThat(spine.audioDirectoryPath)
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
assertThat(spine.frameRate).isEqualTo(30.0)
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
assertThat(spine.audioEvents).containsExactly(
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
)
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
}
assertThat(spine.audioDirectoryPath)
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
assertThat(spine.frameRate).isEqualTo(30.0)
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
assertThat(spine.audioEvents).containsExactly(
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
)
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
}
@Test
fun `throws on file without nonessential data`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.7-essential.json").toAbsolutePath()
val throwable = catchThrowable { SpineJson(path) }
assertThat(throwable)
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
}
}
@Test
fun `throws on file without nonessential data`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.7-essential.json").toAbsolutePath()
val throwable = catchThrowable { SpineJson(path) }
assertThat(throwable)
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
}
}
@Nested
inner class `file format 3_8` {
@Test
fun `correctly reads valid file`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.8.json").toAbsolutePath()
val spine = SpineJson(path)
@Nested
inner class `file format 3_8` {
@Test
fun `correctly reads valid file`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.8.json").toAbsolutePath()
val spine = SpineJson(path)
assertThat(spine.audioDirectoryPath)
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
assertThat(spine.frameRate).isEqualTo(30.0)
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
assertThat(spine.audioEvents).containsExactly(
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
)
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
}
assertThat(spine.audioDirectoryPath)
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
assertThat(spine.frameRate).isEqualTo(30.0)
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
assertThat(spine.audioEvents).containsExactly(
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
)
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
}
@Test
fun `throws on file without nonessential data`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.8-essential.json").toAbsolutePath()
val throwable = catchThrowable { SpineJson(path) }
assertThat(throwable)
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
}
}
@Test
fun `throws on file without nonessential data`() {
val path = Paths.get("src/test/data/jsonFiles/matt-3.8-essential.json").toAbsolutePath()
val throwable = catchThrowable { SpineJson(path) }
assertThat(throwable)
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
}
}
}

View File

@ -1,14 +1,14 @@
cmake_minimum_required(VERSION 3.2)
set(vegasFiles
"Debug Rhubarb.cs"
"Debug Rhubarb.cs.config"
"Import Rhubarb.cs"
"Import Rhubarb.cs.config"
"README.adoc"
"Debug Rhubarb.cs"
"Debug Rhubarb.cs.config"
"Import Rhubarb.cs"
"Import Rhubarb.cs.config"
"README.adoc"
)
install(
FILES ${vegasFiles}
DESTINATION "extras/MagixVegas"
FILES ${vegasFiles}
DESTINATION "extras/MagixVegas"
)

View File

@ -17,329 +17,329 @@ using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas
using Region = ScriptPortal.Vegas.Region; // For older versions, this should say Sony.Vegas.Region
public class EntryPoint {
public void FromVegas(Vegas vegas) {
Config config = Config.Load();
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
importDialog.ShowDialog();
config.Save();
}
public void FromVegas(Vegas vegas) {
Config config = Config.Load();
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
importDialog.ShowDialog();
config.Save();
}
private void Import(Config config, Vegas vegas) {
Project project = vegas.Project;
private void Import(Config config, Vegas vegas) {
Project project = vegas.Project;
// Clear markers and regions
if (config.ClearMarkers) {
project.Markers.Clear();
}
if (config.ClearRegions) {
project.Regions.Clear();
}
// Clear markers and regions
if (config.ClearMarkers) {
project.Markers.Clear();
}
if (config.ClearRegions) {
project.Regions.Clear();
}
// Load log file
if (!File.Exists(config.LogFile)) {
throw new Exception("Log file does not exist.");
}
Dictionary<EventType, List<TimedEvent>> timedEvents = ParseLogFile(config);
// Load log file
if (!File.Exists(config.LogFile)) {
throw new Exception("Log file does not exist.");
}
Dictionary<EventType, List<TimedEvent>> timedEvents = ParseLogFile(config);
// Add markers/regions
foreach (EventType eventType in timedEvents.Keys) {
foreach (Visualization visualization in config.Visualizations) {
if (visualization.EventType != eventType) continue;
// Add markers/regions
foreach (EventType eventType in timedEvents.Keys) {
foreach (Visualization visualization in config.Visualizations) {
if (visualization.EventType != eventType) continue;
List<TimedEvent> filteredEvents = FilterEvents(timedEvents[eventType], visualization.Regex);
foreach (TimedEvent timedEvent in filteredEvents) {
Timecode start = Timecode.FromSeconds(timedEvent.Start);
Timecode end = Timecode.FromSeconds(timedEvent.End);
Timecode length = end - start;
if (config.LoopRegionOnly) {
Timecode loopRegionStart = vegas.Transport.LoopRegionStart;
Timecode loopRegionEnd = loopRegionStart + vegas.Transport.LoopRegionLength;
if (start < loopRegionStart || start > loopRegionEnd || end < loopRegionStart || end > loopRegionEnd) {
continue;
}
}
switch (visualization.VisualizationType) {
case VisualizationType.Marker:
project.Markers.Add(new Marker(start, timedEvent.Value));
break;
case VisualizationType.Region:
project.Regions.Add(new Region(start, length, timedEvent.Value));
break;
}
}
}
}
}
List<TimedEvent> filteredEvents = FilterEvents(timedEvents[eventType], visualization.Regex);
foreach (TimedEvent timedEvent in filteredEvents) {
Timecode start = Timecode.FromSeconds(timedEvent.Start);
Timecode end = Timecode.FromSeconds(timedEvent.End);
Timecode length = end - start;
if (config.LoopRegionOnly) {
Timecode loopRegionStart = vegas.Transport.LoopRegionStart;
Timecode loopRegionEnd = loopRegionStart + vegas.Transport.LoopRegionLength;
if (start < loopRegionStart || start > loopRegionEnd || end < loopRegionStart || end > loopRegionEnd) {
continue;
}
}
switch (visualization.VisualizationType) {
case VisualizationType.Marker:
project.Markers.Add(new Marker(start, timedEvent.Value));
break;
case VisualizationType.Region:
project.Regions.Add(new Region(start, length, timedEvent.Value));
break;
}
}
}
}
}
private List<TimedEvent> FilterEvents(List<TimedEvent> timedEvents, Regex filterRegex) {
if (filterRegex == null) return timedEvents;
private List<TimedEvent> FilterEvents(List<TimedEvent> timedEvents, Regex filterRegex) {
if (filterRegex == null) return timedEvents;
StringBuilder stringBuilder = new StringBuilder();
Dictionary<int, TimedEvent> timedEventsByCharPosition = new Dictionary<int, TimedEvent>();
foreach (TimedEvent timedEvent in timedEvents) {
string inAngleBrackets = "<" + timedEvent.Value + ">";
for (int charPosition = stringBuilder.Length;
charPosition < stringBuilder.Length + inAngleBrackets.Length;
charPosition++) {
timedEventsByCharPosition[charPosition] = timedEvent;
}
stringBuilder.Append(inAngleBrackets);
}
StringBuilder stringBuilder = new StringBuilder();
Dictionary<int, TimedEvent> timedEventsByCharPosition = new Dictionary<int, TimedEvent>();
foreach (TimedEvent timedEvent in timedEvents) {
string inAngleBrackets = "<" + timedEvent.Value + ">";
for (int charPosition = stringBuilder.Length;
charPosition < stringBuilder.Length + inAngleBrackets.Length;
charPosition++) {
timedEventsByCharPosition[charPosition] = timedEvent;
}
stringBuilder.Append(inAngleBrackets);
}
MatchCollection matches = filterRegex.Matches(stringBuilder.ToString());
List<TimedEvent> result = new List<TimedEvent>();
foreach (Match match in matches) {
if (match.Length == 0) continue;
MatchCollection matches = filterRegex.Matches(stringBuilder.ToString());
List<TimedEvent> result = new List<TimedEvent>();
foreach (Match match in matches) {
if (match.Length == 0) continue;
for (int charPosition = match.Index; charPosition < match.Index + match.Length; charPosition++) {
TimedEvent matchedEvent = timedEventsByCharPosition[charPosition];
if (!result.Contains(matchedEvent)) {
result.Add(matchedEvent);
}
}
}
return result;
}
for (int charPosition = match.Index; charPosition < match.Index + match.Length; charPosition++) {
TimedEvent matchedEvent = timedEventsByCharPosition[charPosition];
if (!result.Contains(matchedEvent)) {
result.Add(matchedEvent);
}
}
}
return result;
}
private static Dictionary<EventType, List<TimedEvent>> ParseLogFile(Config config) {
string[] lines = File.ReadAllLines(config.LogFile);
Regex structuredLogLine = new Regex(@"##(\w+)\[(\d*\.\d*)-(\d*\.\d*)\]: (.*)");
Dictionary<EventType, List<TimedEvent>> timedEvents = new Dictionary<EventType, List<TimedEvent>>();
foreach (string line in lines) {
Match match = structuredLogLine.Match(line);
if (!match.Success) continue;
private static Dictionary<EventType, List<TimedEvent>> ParseLogFile(Config config) {
string[] lines = File.ReadAllLines(config.LogFile);
Regex structuredLogLine = new Regex(@"##(\w+)\[(\d*\.\d*)-(\d*\.\d*)\]: (.*)");
Dictionary<EventType, List<TimedEvent>> timedEvents = new Dictionary<EventType, List<TimedEvent>>();
foreach (string line in lines) {
Match match = structuredLogLine.Match(line);
if (!match.Success) continue;
EventType eventType = (EventType) Enum.Parse(typeof(EventType), match.Groups[1].Value, true);
double start = double.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
double end = double.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture);
string value = match.Groups[4].Value;
EventType eventType = (EventType) Enum.Parse(typeof(EventType), match.Groups[1].Value, true);
double start = double.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
double end = double.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture);
string value = match.Groups[4].Value;
if (!timedEvents.ContainsKey(eventType)) {
timedEvents[eventType] = new List<TimedEvent>();
}
timedEvents[eventType].Add(new TimedEvent(eventType, start, end, value));
}
return timedEvents;
}
if (!timedEvents.ContainsKey(eventType)) {
timedEvents[eventType] = new List<TimedEvent>();
}
timedEvents[eventType].Add(new TimedEvent(eventType, start, end, value));
}
return timedEvents;
}
}
public class TimedEvent {
private readonly EventType eventType;
private readonly double start;
private readonly double end;
private readonly string value;
private readonly EventType eventType;
private readonly double start;
private readonly double end;
private readonly string value;
public TimedEvent(EventType eventType, double start, double end, string value) {
this.eventType = eventType;
this.start = start;
this.end = end;
this.value = value;
}
public TimedEvent(EventType eventType, double start, double end, string value) {
this.eventType = eventType;
this.start = start;
this.end = end;
this.value = value;
}
public EventType EventType {
get { return eventType; }
}
public EventType EventType {
get { return eventType; }
}
public double Start {
get { return start; }
}
public double Start {
get { return start; }
}
public double End {
get { return end; }
}
public double End {
get { return end; }
}
public string Value {
get { return value; }
}
public string Value {
get { return value; }
}
}
public class Config {
private string logFile;
private bool clearMarkers;
private bool clearRegions;
private bool loopRegionOnly;
private List<Visualization> visualizations = new List<Visualization>();
private string logFile;
private bool clearMarkers;
private bool clearRegions;
private bool loopRegionOnly;
private List<Visualization> visualizations = new List<Visualization>();
[DisplayName("Log File")]
[Description("A log file generated by Rhubarb Lip Sync.")]
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
public string LogFile {
get { return logFile; }
set { logFile = value; }
}
[DisplayName("Log File")]
[Description("A log file generated by Rhubarb Lip Sync.")]
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
public string LogFile {
get { return logFile; }
set { logFile = value; }
}
[DisplayName("Clear Markers")]
[Description("Clear all markers in the current project.")]
public bool ClearMarkers {
get { return clearMarkers; }
set { clearMarkers = value; }
}
[DisplayName("Clear Markers")]
[Description("Clear all markers in the current project.")]
public bool ClearMarkers {
get { return clearMarkers; }
set { clearMarkers = value; }
}
[DisplayName("Clear Regions")]
[Description("Clear all regions in the current project.")]
public bool ClearRegions {
get { return clearRegions; }
set { clearRegions = value; }
}
[DisplayName("Clear Regions")]
[Description("Clear all regions in the current project.")]
public bool ClearRegions {
get { return clearRegions; }
set { clearRegions = value; }
}
[DisplayName("Loop region only")]
[Description("Adds regions or markers to the loop region only.")]
public bool LoopRegionOnly {
get { return loopRegionOnly; }
set { loopRegionOnly = value; }
}
[DisplayName("Loop region only")]
[Description("Adds regions or markers to the loop region only.")]
public bool LoopRegionOnly {
get { return loopRegionOnly; }
set { loopRegionOnly = value; }
}
[DisplayName("Visualization rules")]
[Description("Specify how to visualize various log events.")]
[Editor(typeof(CollectionEditor), typeof(UITypeEditor))]
[XmlIgnore]
public List<Visualization> Visualizations {
get { return visualizations; }
set { visualizations = value; }
}
[DisplayName("Visualization rules")]
[Description("Specify how to visualize various log events.")]
[Editor(typeof(CollectionEditor), typeof(UITypeEditor))]
[XmlIgnore]
public List<Visualization> Visualizations {
get { return visualizations; }
set { visualizations = value; }
}
[Browsable(false)]
public Visualization[] VisualizationArray {
get { return visualizations.ToArray(); }
set { visualizations = new List<Visualization>(value); }
}
[Browsable(false)]
public Visualization[] VisualizationArray {
get { return visualizations.ToArray(); }
set { visualizations = new List<Visualization>(value); }
}
private static string ConfigFileName {
get {
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
return Path.Combine(folder, "DebugRhubarbSettings.xml");
}
}
private static string ConfigFileName {
get {
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
return Path.Combine(folder, "DebugRhubarbSettings.xml");
}
}
public static Config Load() {
try {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (FileStream file = File.OpenRead(ConfigFileName)) {
return (Config) serializer.Deserialize(file);
}
} catch (Exception) {
return new Config();
}
}
public static Config Load() {
try {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (FileStream file = File.OpenRead(ConfigFileName)) {
return (Config) serializer.Deserialize(file);
}
} catch (Exception) {
return new Config();
}
}
public void Save() {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (StreamWriter file = File.CreateText(ConfigFileName)) {
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = "\t";
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
serializer.Serialize(writer, this);
}
}
}
public void Save() {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (StreamWriter file = File.CreateText(ConfigFileName)) {
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = "\t";
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
serializer.Serialize(writer, this);
}
}
}
}
public class Visualization {
private EventType eventType;
private string regexString;
private VisualizationType visualizationType = VisualizationType.Marker;
private EventType eventType;
private string regexString;
private VisualizationType visualizationType = VisualizationType.Marker;
[DisplayName("Event Type")]
[Description("The type of event to visualize.")]
public EventType EventType {
get { return eventType; }
set { eventType = value; }
}
[DisplayName("Event Type")]
[Description("The type of event to visualize.")]
public EventType EventType {
get { return eventType; }
set { eventType = value; }
}
[DisplayName("Regular Expression")]
[Description("A regular expression used to filter events. Leave empty to disable filtering.\nInput is a string of events in angle brackets. Example: '<AO>(?=<T>)' finds every AO phone followed by a T phone.")]
public string RegexString {
get { return regexString; }
set { regexString = value; }
}
[DisplayName("Regular Expression")]
[Description("A regular expression used to filter events. Leave empty to disable filtering.\nInput is a string of events in angle brackets. Example: '<AO>(?=<T>)' finds every AO phone followed by a T phone.")]
public string RegexString {
get { return regexString; }
set { regexString = value; }
}
[Browsable(false)]
public Regex Regex {
get { return string.IsNullOrEmpty(RegexString) ? null : new Regex(RegexString); }
}
[Browsable(false)]
public Regex Regex {
get { return string.IsNullOrEmpty(RegexString) ? null : new Regex(RegexString); }
}
[DisplayName("Visualization Type")]
[Description("Specify how to visualize events.")]
public VisualizationType VisualizationType {
get { return visualizationType; }
set { visualizationType = value; }
}
[DisplayName("Visualization Type")]
[Description("Specify how to visualize events.")]
public VisualizationType VisualizationType {
get { return visualizationType; }
set { visualizationType = value; }
}
public override string ToString() {
return string.Format("{0} -> {1}", EventType, VisualizationType);
}
public override string ToString() {
return string.Format("{0} -> {1}", EventType, VisualizationType);
}
}
public enum EventType {
Utterance,
Word,
RawPhone,
Phone,
Shape,
Segment
Utterance,
Word,
RawPhone,
Phone,
Shape,
Segment
}
public enum VisualizationType {
None,
Marker,
Region
None,
Marker,
Region
}
public delegate void ImportAction();
public class ImportDialog : Form {
private readonly Config config;
private readonly ImportAction import;
private readonly Config config;
private readonly ImportAction import;
public ImportDialog(Config config, ImportAction import) {
this.config = config;
this.import = import;
SuspendLayout();
InitializeComponent();
ResumeLayout(false);
}
public ImportDialog(Config config, ImportAction import) {
this.config = config;
this.import = import;
SuspendLayout();
InitializeComponent();
ResumeLayout(false);
}
private void InitializeComponent() {
// Configure dialog
Text = "Debug Rhubarb";
Size = new Size(600, 400);
Font = new Font(Font.FontFamily, 10);
private void InitializeComponent() {
// Configure dialog
Text = "Debug Rhubarb";
Size = new Size(600, 400);
Font = new Font(Font.FontFamily, 10);
// Add property grid
PropertyGrid propertyGrid1 = new PropertyGrid();
propertyGrid1.SelectedObject = config;
Controls.Add(propertyGrid1);
propertyGrid1.Dock = DockStyle.Fill;
// Add property grid
PropertyGrid propertyGrid1 = new PropertyGrid();
propertyGrid1.SelectedObject = config;
Controls.Add(propertyGrid1);
propertyGrid1.Dock = DockStyle.Fill;
// Add button panel
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
buttonPanel.AutoSize = true;
buttonPanel.Dock = DockStyle.Bottom;
Controls.Add(buttonPanel);
// Add button panel
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
buttonPanel.AutoSize = true;
buttonPanel.Dock = DockStyle.Bottom;
Controls.Add(buttonPanel);
// Add Cancel button
Button cancelButton1 = new Button();
cancelButton1.Text = "Cancel";
cancelButton1.DialogResult = DialogResult.Cancel;
buttonPanel.Controls.Add(cancelButton1);
CancelButton = cancelButton1;
// Add Cancel button
Button cancelButton1 = new Button();
cancelButton1.Text = "Cancel";
cancelButton1.DialogResult = DialogResult.Cancel;
buttonPanel.Controls.Add(cancelButton1);
CancelButton = cancelButton1;
// Add OK button
Button okButton1 = new Button();
okButton1.Text = "OK";
okButton1.Click += OkButtonClickedHandler;
buttonPanel.Controls.Add(okButton1);
AcceptButton = okButton1;
}
// Add OK button
Button okButton1 = new Button();
okButton1.Text = "OK";
okButton1.Click += OkButtonClickedHandler;
buttonPanel.Controls.Add(okButton1);
AcceptButton = okButton1;
}
private void OkButtonClickedHandler(object sender, EventArgs e) {
try {
import();
DialogResult = DialogResult.OK;
} catch (Exception exception) {
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
private void OkButtonClickedHandler(object sender, EventArgs e) {
try {
import();
DialogResult = DialogResult.OK;
} catch (Exception exception) {
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
}

View File

@ -14,161 +14,161 @@ using System.Xml.Serialization;
using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas
public class EntryPoint {
public void FromVegas(Vegas vegas) {
Config config = Config.Load();
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
importDialog.ShowDialog();
config.Save();
}
public void FromVegas(Vegas vegas) {
Config config = Config.Load();
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
importDialog.ShowDialog();
config.Save();
}
private void Import(Config config, Vegas vegas) {
// Load XML file
if (!File.Exists(config.XmlFile)) {
throw new Exception("XML file does not exist.");
}
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(config.XmlFile);
private void Import(Config config, Vegas vegas) {
// Load XML file
if (!File.Exists(config.XmlFile)) {
throw new Exception("XML file does not exist.");
}
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(config.XmlFile);
// Determine image file names
XmlNodeList mouthCueElements = xmlDocument.SelectNodes("//mouthCue");
List<string> shapeNames = new List<string>();
foreach (XmlElement mouthCueElement in mouthCueElements) {
if (!shapeNames.Contains(mouthCueElement.InnerText)) {
shapeNames.Add(mouthCueElement.InnerText);
}
}
Dictionary<string, string> imageFileNames = GetImageFileNames(config.OneImageFile, shapeNames.ToArray());
// Determine image file names
XmlNodeList mouthCueElements = xmlDocument.SelectNodes("//mouthCue");
List<string> shapeNames = new List<string>();
foreach (XmlElement mouthCueElement in mouthCueElements) {
if (!shapeNames.Contains(mouthCueElement.InnerText)) {
shapeNames.Add(mouthCueElement.InnerText);
}
}
Dictionary<string, string> imageFileNames = GetImageFileNames(config.OneImageFile, shapeNames.ToArray());
// Create new project
bool promptSave = !config.DiscardChanges;
bool showDialog = false;
Project project = new Project(promptSave, showDialog);
// Create new project
bool promptSave = !config.DiscardChanges;
bool showDialog = false;
Project project = new Project(promptSave, showDialog);
// Set frame size
Bitmap testImage = new Bitmap(config.OneImageFile);
project.Video.Width = testImage.Width;
project.Video.Height = testImage.Height;
// Set frame size
Bitmap testImage = new Bitmap(config.OneImageFile);
project.Video.Width = testImage.Width;
project.Video.Height = testImage.Height;
// Set frame rate
if (config.FrameRate < 0.1 || config.FrameRate > 100) {
throw new Exception("Invalid frame rate.");
}
project.Video.FrameRate = config.FrameRate;
// Set frame rate
if (config.FrameRate < 0.1 || config.FrameRate > 100) {
throw new Exception("Invalid frame rate.");
}
project.Video.FrameRate = config.FrameRate;
// Set other video settings
project.Video.FieldOrder = VideoFieldOrder.ProgressiveScan;
project.Video.PixelAspectRatio = 1;
// Set other video settings
project.Video.FieldOrder = VideoFieldOrder.ProgressiveScan;
project.Video.PixelAspectRatio = 1;
// Add video track with images
VideoTrack videoTrack = vegas.Project.AddVideoTrack();
foreach (XmlElement mouthCueElement in mouthCueElements) {
Timecode start = GetTimecode(mouthCueElement.Attributes["start"]);
Timecode length = GetTimecode(mouthCueElement.Attributes["end"]) - start;
VideoEvent videoEvent = videoTrack.AddVideoEvent(start, length);
Media imageMedia = new Media(imageFileNames[mouthCueElement.InnerText]);
videoEvent.AddTake(imageMedia.GetVideoStreamByIndex(0));
}
// Add video track with images
VideoTrack videoTrack = vegas.Project.AddVideoTrack();
foreach (XmlElement mouthCueElement in mouthCueElements) {
Timecode start = GetTimecode(mouthCueElement.Attributes["start"]);
Timecode length = GetTimecode(mouthCueElement.Attributes["end"]) - start;
VideoEvent videoEvent = videoTrack.AddVideoEvent(start, length);
Media imageMedia = new Media(imageFileNames[mouthCueElement.InnerText]);
videoEvent.AddTake(imageMedia.GetVideoStreamByIndex(0));
}
// Add audio track with original sound file
AudioTrack audioTrack = vegas.Project.AddAudioTrack();
Media audioMedia = new Media(xmlDocument.SelectSingleNode("//soundFile").InnerText);
AudioEvent audioEvent = audioTrack.AddAudioEvent(new Timecode(0), audioMedia.Length);
audioEvent.AddTake(audioMedia.GetAudioStreamByIndex(0));
}
// Add audio track with original sound file
AudioTrack audioTrack = vegas.Project.AddAudioTrack();
Media audioMedia = new Media(xmlDocument.SelectSingleNode("//soundFile").InnerText);
AudioEvent audioEvent = audioTrack.AddAudioEvent(new Timecode(0), audioMedia.Length);
audioEvent.AddTake(audioMedia.GetAudioStreamByIndex(0));
}
private static Timecode GetTimecode(XmlAttribute valueAttribute) {
double seconds = Double.Parse(valueAttribute.Value, CultureInfo.InvariantCulture);
return Timecode.FromSeconds(seconds);
}
private static Timecode GetTimecode(XmlAttribute valueAttribute) {
double seconds = Double.Parse(valueAttribute.Value, CultureInfo.InvariantCulture);
return Timecode.FromSeconds(seconds);
}
private Dictionary<string, string> GetImageFileNames(string oneImageFile, string[] shapeNames) {
if (oneImageFile == null) {
throw new Exception("Image file name not set.");
}
Regex nameRegex = new Regex(@"(?<=-)([^-]*)(?=\.[^.]+$)");
if (!nameRegex.IsMatch(oneImageFile)) {
throw new Exception("Image file name doesn't have expected format.");
}
private Dictionary<string, string> GetImageFileNames(string oneImageFile, string[] shapeNames) {
if (oneImageFile == null) {
throw new Exception("Image file name not set.");
}
Regex nameRegex = new Regex(@"(?<=-)([^-]*)(?=\.[^.]+$)");
if (!nameRegex.IsMatch(oneImageFile)) {
throw new Exception("Image file name doesn't have expected format.");
}
Dictionary<string, string> result = new Dictionary<string, string>();
foreach (string shapeName in shapeNames) {
string imageFileName = nameRegex.Replace(oneImageFile, shapeName);
if (!File.Exists(imageFileName)) {
throw new Exception(string.Format("Image file '{0}' not found.", imageFileName));
}
result[shapeName] = imageFileName;
}
return result;
}
Dictionary<string, string> result = new Dictionary<string, string>();
foreach (string shapeName in shapeNames) {
string imageFileName = nameRegex.Replace(oneImageFile, shapeName);
if (!File.Exists(imageFileName)) {
throw new Exception(string.Format("Image file '{0}' not found.", imageFileName));
}
result[shapeName] = imageFileName;
}
return result;
}
}
public class Config {
private string xmlFile;
private string oneImageFile;
private double frameRate = 100;
private bool discardChanges = false;
private string xmlFile;
private string oneImageFile;
private double frameRate = 100;
private bool discardChanges = false;
[DisplayName("XML File")]
[Description("An XML file generated by Rhubarb Lip Sync.")]
[Editor(typeof(XmlFileEditor), typeof(UITypeEditor))]
public string XmlFile {
get { return xmlFile; }
set { xmlFile = value; }
}
[DisplayName("XML File")]
[Description("An XML file generated by Rhubarb Lip Sync.")]
[Editor(typeof(XmlFileEditor), typeof(UITypeEditor))]
public string XmlFile {
get { return xmlFile; }
set { xmlFile = value; }
}
[DisplayName("One image file")]
[Description("Any image file out of the set of image files representing the mouth chart.")]
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
public string OneImageFile {
get { return oneImageFile; }
set { oneImageFile = value; }
}
[DisplayName("One image file")]
[Description("Any image file out of the set of image files representing the mouth chart.")]
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
public string OneImageFile {
get { return oneImageFile; }
set { oneImageFile = value; }
}
[DisplayName("Frame rate")]
[Description("The frame rate for the new project.")]
public double FrameRate {
get { return frameRate; }
set { frameRate = value; }
}
[DisplayName("Frame rate")]
[Description("The frame rate for the new project.")]
public double FrameRate {
get { return frameRate; }
set { frameRate = value; }
}
[DisplayName("Discard Changes")]
[Description("Discard all changes to the current project without prompting to save.")]
public bool DiscardChanges {
get { return discardChanges; }
set { discardChanges = value; }
}
[DisplayName("Discard Changes")]
[Description("Discard all changes to the current project without prompting to save.")]
public bool DiscardChanges {
get { return discardChanges; }
set { discardChanges = value; }
}
private static string ConfigFileName {
get {
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
return Path.Combine(folder, "ImportRhubarbSettings.xml");
}
}
private static string ConfigFileName {
get {
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
return Path.Combine(folder, "ImportRhubarbSettings.xml");
}
}
public static Config Load() {
try {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (FileStream file = File.OpenRead(ConfigFileName)) {
return (Config) serializer.Deserialize(file);
}
} catch (Exception) {
return new Config();
}
}
public static Config Load() {
try {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (FileStream file = File.OpenRead(ConfigFileName)) {
return (Config) serializer.Deserialize(file);
}
} catch (Exception) {
return new Config();
}
}
public void Save() {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (StreamWriter file = File.CreateText(ConfigFileName)) {
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = "\t";
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
serializer.Serialize(writer, this);
}
}
}
public void Save() {
XmlSerializer serializer = new XmlSerializer(typeof(Config));
using (StreamWriter file = File.CreateText(ConfigFileName)) {
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.IndentChars = "\t";
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
serializer.Serialize(writer, this);
}
}
}
}
@ -176,58 +176,58 @@ public delegate void ImportAction();
public class ImportDialog : Form {
private readonly Config config;
private readonly ImportAction import;
private readonly Config config;
private readonly ImportAction import;
public ImportDialog(Config config, ImportAction import) {
this.config = config;
this.import = import;
SuspendLayout();
InitializeComponent();
ResumeLayout(false);
}
public ImportDialog(Config config, ImportAction import) {
this.config = config;
this.import = import;
SuspendLayout();
InitializeComponent();
ResumeLayout(false);
}
private void InitializeComponent() {
// Configure dialog
Text = "Import Rhubarb";
Size = new Size(600, 400);
Font = new Font(Font.FontFamily, 10);
private void InitializeComponent() {
// Configure dialog
Text = "Import Rhubarb";
Size = new Size(600, 400);
Font = new Font(Font.FontFamily, 10);
// Add property grid
PropertyGrid propertyGrid1 = new PropertyGrid();
propertyGrid1.SelectedObject = config;
Controls.Add(propertyGrid1);
propertyGrid1.Dock = DockStyle.Fill;
// Add property grid
PropertyGrid propertyGrid1 = new PropertyGrid();
propertyGrid1.SelectedObject = config;
Controls.Add(propertyGrid1);
propertyGrid1.Dock = DockStyle.Fill;
// Add button panel
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
buttonPanel.AutoSize = true;
buttonPanel.Dock = DockStyle.Bottom;
Controls.Add(buttonPanel);
// Add button panel
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
buttonPanel.AutoSize = true;
buttonPanel.Dock = DockStyle.Bottom;
Controls.Add(buttonPanel);
// Add Cancel button
Button cancelButton1 = new Button();
cancelButton1.Text = "Cancel";
cancelButton1.DialogResult = DialogResult.Cancel;
buttonPanel.Controls.Add(cancelButton1);
CancelButton = cancelButton1;
// Add Cancel button
Button cancelButton1 = new Button();
cancelButton1.Text = "Cancel";
cancelButton1.DialogResult = DialogResult.Cancel;
buttonPanel.Controls.Add(cancelButton1);
CancelButton = cancelButton1;
// Add OK button
Button okButton1 = new Button();
okButton1.Text = "OK";
okButton1.Click += OkButtonClickedHandler;
buttonPanel.Controls.Add(okButton1);
AcceptButton = okButton1;
}
// Add OK button
Button okButton1 = new Button();
okButton1.Text = "OK";
okButton1.Click += OkButtonClickedHandler;
buttonPanel.Controls.Add(okButton1);
AcceptButton = okButton1;
}
private void OkButtonClickedHandler(object sender, EventArgs e) {
try {
import();
DialogResult = DialogResult.OK;
} catch (Exception exception) {
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
private void OkButtonClickedHandler(object sender, EventArgs e) {
try {
import();
DialogResult = DialogResult.OK;
} catch (Exception exception) {
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
}

View File

@ -11,37 +11,37 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Enable POSIX threads
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
endif()
# Use static run-time
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_options(/MT$<$<CONFIG:Debug>:d>)
add_compile_options(/MT$<$<CONFIG:Debug>:d>)
endif()
# Set global flags and define flags variables for later use
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(enableWarningsFlags "-Wall;-Wextra")
set(disableWarningsFlags "-w")
set(enableWarningsFlags "-Wall;-Wextra")
set(disableWarningsFlags "-w")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
set(enableWarningsFlags "/W4")
set(disableWarningsFlags "/W0")
set(enableWarningsFlags "/W4")
set(disableWarningsFlags "/W0")
# Disable warning C4456: declaration of '...' hides previous local declaration
# I'm doing that on purpose.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4458")
# Disable warning C4456: declaration of '...' hides previous local declaration
# I'm doing that on purpose.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4458")
# Assume UTF-8 encoding for source files and encode string constants in UTF-8
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
# Assume UTF-8 encoding for source files and encode string constants in UTF-8
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
endif()
# Use UTF-8 throughout
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_options("/utf-8")
add_compile_options("/utf-8")
endif()
if(${UNIX})
add_definitions(-DHAVE_UNISTD_H)
add_definitions(-DHAVE_UNISTD_H)
endif()
# Enable project folders
@ -69,9 +69,9 @@ set_target_properties(cppFormat PROPERTIES FOLDER lib)
FILE(GLOB_RECURSE sphinxbaseFiles "lib/sphinxbase-rev13216/src/libsphinxbase/*.c")
add_library(sphinxbase ${sphinxbaseFiles})
target_include_directories(sphinxbase SYSTEM PUBLIC
"lib/sphinxbase-rev13216/include"
"lib/sphinxbase-rev13216/src"
"lib/sphinx_config"
"lib/sphinxbase-rev13216/include"
"lib/sphinxbase-rev13216/src"
"lib/sphinx_config"
)
target_compile_options(sphinxbase PRIVATE ${disableWarningsFlags})
target_compile_definitions(sphinxbase PUBLIC __SPHINXBASE_EXPORT_H__=1 SPHINXBASE_EXPORT=) # Compile as static lib
@ -81,8 +81,8 @@ set_target_properties(sphinxbase PROPERTIES FOLDER lib)
FILE(GLOB pocketSphinxFiles "lib/pocketsphinx-rev13216/src/libpocketsphinx/*.c")
add_library(pocketSphinx ${pocketSphinxFiles})
target_include_directories(pocketSphinx SYSTEM PUBLIC
"lib/pocketsphinx-rev13216/include"
"lib/pocketsphinx-rev13216/src/libpocketsphinx"
"lib/pocketsphinx-rev13216/include"
"lib/pocketsphinx-rev13216/src/libpocketsphinx"
)
target_link_libraries(pocketSphinx sphinxbase)
target_compile_options(pocketSphinx PRIVATE ${disableWarningsFlags})
@ -108,23 +108,23 @@ include_directories(SYSTEM "lib/gsl/include")
# ... WebRTC
set(webRtcFiles
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/cross_correlation.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/division_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/downsample_fast.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/energy.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/get_scaling_square.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/min_max_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_48khz.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_by_2_internal.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_fractional.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_init.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_inl.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/vector_scaling_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_core.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_filterbank.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_gmm.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_sp.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/webrtc_vad.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/cross_correlation.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/division_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/downsample_fast.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/energy.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/get_scaling_square.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/min_max_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_48khz.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_by_2_internal.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_fractional.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_init.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_inl.c
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/vector_scaling_operations.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_core.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_filterbank.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_gmm.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_sp.c
lib/webrtc-8d2248ff/webrtc/common_audio/vad/webrtc_vad.c
)
add_library(webRtc ${webRtcFiles})
target_include_directories(webRtc SYSTEM PUBLIC "lib/webrtc-8d2248ff")
@ -133,7 +133,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
target_compile_options(webRtc PRIVATE -pthread -lpthread)
endif()
if (NOT WIN32)
target_compile_definitions(webRtc PRIVATE WEBRTC_POSIX)
target_compile_definitions(webRtc PRIVATE WEBRTC_POSIX)
endif()
set_target_properties(webRtc PROPERTIES FOLDER lib)
@ -144,76 +144,76 @@ set_target_properties(whereami PROPERTIES FOLDER lib)
# ... Flite
set(fliteFiles
lib/flite-1.4/lang/cmulex/cmu_lex.c
lib/flite-1.4/lang/cmulex/cmu_lex_data.c
lib/flite-1.4/lang/cmulex/cmu_lex_entries.c
lib/flite-1.4/lang/cmulex/cmu_lts_model.c
lib/flite-1.4/lang/cmulex/cmu_lts_rules.c
lib/flite-1.4/lang/cmulex/cmu_postlex.c
lib/flite-1.4/lang/usenglish/us_aswd.c
lib/flite-1.4/lang/usenglish/us_dur_stats.c
lib/flite-1.4/lang/usenglish/us_durz_cart.c
lib/flite-1.4/lang/usenglish/us_expand.c
lib/flite-1.4/lang/usenglish/us_f0_model.c
lib/flite-1.4/lang/usenglish/us_f0lr.c
lib/flite-1.4/lang/usenglish/us_ffeatures.c
lib/flite-1.4/lang/usenglish/us_gpos.c
lib/flite-1.4/lang/usenglish/us_int_accent_cart.c
lib/flite-1.4/lang/usenglish/us_int_tone_cart.c
lib/flite-1.4/lang/usenglish/us_nums_cart.c
lib/flite-1.4/lang/usenglish/us_phoneset.c
lib/flite-1.4/lang/usenglish/us_phrasing_cart.c
lib/flite-1.4/lang/usenglish/us_pos_cart.c
lib/flite-1.4/lang/usenglish/us_text.c
lib/flite-1.4/lang/usenglish/usenglish.c
lib/flite-1.4/src/audio/au_none.c
lib/flite-1.4/src/audio/au_streaming.c
lib/flite-1.4/src/audio/audio.c
lib/flite-1.4/src/hrg/cst_ffeature.c
lib/flite-1.4/src/hrg/cst_item.c
lib/flite-1.4/src/hrg/cst_relation.c
lib/flite-1.4/src/hrg/cst_utterance.c
lib/flite-1.4/src/lexicon/cst_lexicon.c
lib/flite-1.4/src/lexicon/cst_lts.c
lib/flite-1.4/src/regex/cst_regex.c
lib/flite-1.4/src/regex/regexp.c
lib/flite-1.4/src/speech/cst_lpcres.c
lib/flite-1.4/src/speech/cst_track.c
lib/flite-1.4/src/speech/cst_wave.c
lib/flite-1.4/src/speech/cst_wave_io.c
lib/flite-1.4/src/speech/cst_wave_utils.c
lib/flite-1.4/src/speech/rateconv.c
lib/flite-1.4/src/stats/cst_cart.c
lib/flite-1.4/src/synth/cst_ffeatures.c
lib/flite-1.4/src/synth/cst_phoneset.c
lib/flite-1.4/src/synth/cst_synth.c
lib/flite-1.4/src/synth/cst_utt_utils.c
lib/flite-1.4/src/synth/cst_voice.c
lib/flite-1.4/src/synth/flite.c
lib/flite-1.4/src/utils/cst_alloc.c
lib/flite-1.4/src/utils/cst_endian.c
lib/flite-1.4/src/utils/cst_error.c
lib/flite-1.4/src/utils/cst_features.c
lib/flite-1.4/src/utils/cst_file_stdio.c
lib/flite-1.4/src/utils/cst_string.c
lib/flite-1.4/src/utils/cst_tokenstream.c
lib/flite-1.4/src/utils/cst_val.c
lib/flite-1.4/src/utils/cst_val_const.c
lib/flite-1.4/src/utils/cst_val_user.c
lib/flite-1.4/src/utils/cst_val_user.c
lib/flite-1.4/lang/cmulex/cmu_lex.c
lib/flite-1.4/lang/cmulex/cmu_lex_data.c
lib/flite-1.4/lang/cmulex/cmu_lex_entries.c
lib/flite-1.4/lang/cmulex/cmu_lts_model.c
lib/flite-1.4/lang/cmulex/cmu_lts_rules.c
lib/flite-1.4/lang/cmulex/cmu_postlex.c
lib/flite-1.4/lang/usenglish/us_aswd.c
lib/flite-1.4/lang/usenglish/us_dur_stats.c
lib/flite-1.4/lang/usenglish/us_durz_cart.c
lib/flite-1.4/lang/usenglish/us_expand.c
lib/flite-1.4/lang/usenglish/us_f0_model.c
lib/flite-1.4/lang/usenglish/us_f0lr.c
lib/flite-1.4/lang/usenglish/us_ffeatures.c
lib/flite-1.4/lang/usenglish/us_gpos.c
lib/flite-1.4/lang/usenglish/us_int_accent_cart.c
lib/flite-1.4/lang/usenglish/us_int_tone_cart.c
lib/flite-1.4/lang/usenglish/us_nums_cart.c
lib/flite-1.4/lang/usenglish/us_phoneset.c
lib/flite-1.4/lang/usenglish/us_phrasing_cart.c
lib/flite-1.4/lang/usenglish/us_pos_cart.c
lib/flite-1.4/lang/usenglish/us_text.c
lib/flite-1.4/lang/usenglish/usenglish.c
lib/flite-1.4/src/audio/au_none.c
lib/flite-1.4/src/audio/au_streaming.c
lib/flite-1.4/src/audio/audio.c
lib/flite-1.4/src/hrg/cst_ffeature.c
lib/flite-1.4/src/hrg/cst_item.c
lib/flite-1.4/src/hrg/cst_relation.c
lib/flite-1.4/src/hrg/cst_utterance.c
lib/flite-1.4/src/lexicon/cst_lexicon.c
lib/flite-1.4/src/lexicon/cst_lts.c
lib/flite-1.4/src/regex/cst_regex.c
lib/flite-1.4/src/regex/regexp.c
lib/flite-1.4/src/speech/cst_lpcres.c
lib/flite-1.4/src/speech/cst_track.c
lib/flite-1.4/src/speech/cst_wave.c
lib/flite-1.4/src/speech/cst_wave_io.c
lib/flite-1.4/src/speech/cst_wave_utils.c
lib/flite-1.4/src/speech/rateconv.c
lib/flite-1.4/src/stats/cst_cart.c
lib/flite-1.4/src/synth/cst_ffeatures.c
lib/flite-1.4/src/synth/cst_phoneset.c
lib/flite-1.4/src/synth/cst_synth.c
lib/flite-1.4/src/synth/cst_utt_utils.c
lib/flite-1.4/src/synth/cst_voice.c
lib/flite-1.4/src/synth/flite.c
lib/flite-1.4/src/utils/cst_alloc.c
lib/flite-1.4/src/utils/cst_endian.c
lib/flite-1.4/src/utils/cst_error.c
lib/flite-1.4/src/utils/cst_features.c
lib/flite-1.4/src/utils/cst_file_stdio.c
lib/flite-1.4/src/utils/cst_string.c
lib/flite-1.4/src/utils/cst_tokenstream.c
lib/flite-1.4/src/utils/cst_val.c
lib/flite-1.4/src/utils/cst_val_const.c
lib/flite-1.4/src/utils/cst_val_user.c
lib/flite-1.4/src/utils/cst_val_user.c
)
add_library(flite ${fliteFiles})
target_include_directories(flite SYSTEM PUBLIC
"lib/flite-1.4/include"
"lib/flite-1.4"
"lib/flite-1.4/include"
"lib/flite-1.4"
)
target_compile_options(flite PRIVATE ${disableWarningsFlags})
set_target_properties(flite PROPERTIES FOLDER lib)
# ... UTF8-CPP
add_library(utfcpp
lib/header-only.c
lib/utfcpp-2.3.5/source/utf8.h
lib/header-only.c
lib/utfcpp-2.3.5/source/utf8.h
)
target_include_directories(utfcpp SYSTEM PUBLIC "lib/utfcpp-2.3.5/source")
target_compile_options(utfcpp PRIVATE ${disableWarningsFlags})
@ -221,8 +221,8 @@ set_target_properties(utfcpp PROPERTIES FOLDER lib)
# ... utf8proc
add_library(utf8proc
lib/utf8proc-2.2.0/utf8proc.c
lib/utf8proc-2.2.0/utf8proc.h
lib/utf8proc-2.2.0/utf8proc.c
lib/utf8proc-2.2.0/utf8proc.h
)
target_include_directories(utf8proc SYSTEM PUBLIC "lib/utf8proc-2.2.0")
target_compile_options(utf8proc PRIVATE ${disableWarningsFlags})
@ -231,9 +231,9 @@ set_target_properties(utf8proc PROPERTIES FOLDER lib)
# ... Ogg
add_library(ogg
lib/ogg-1.3.3/include/ogg/ogg.h
lib/ogg-1.3.3/src/bitwise.c
lib/ogg-1.3.3/src/framing.c
lib/ogg-1.3.3/include/ogg/ogg.h
lib/ogg-1.3.3/src/bitwise.c
lib/ogg-1.3.3/src/framing.c
)
target_include_directories(ogg SYSTEM PUBLIC "lib/ogg-1.3.3/include")
target_compile_options(ogg PRIVATE ${disableWarningsFlags})
@ -241,30 +241,30 @@ set_target_properties(ogg PROPERTIES FOLDER lib)
# ... Vorbis
add_library(vorbis
lib/vorbis-1.3.6/include/vorbis/vorbisfile.h
lib/vorbis-1.3.6/lib/bitrate.c
lib/vorbis-1.3.6/lib/block.c
lib/vorbis-1.3.6/lib/codebook.c
lib/vorbis-1.3.6/lib/envelope.c
lib/vorbis-1.3.6/lib/floor0.c
lib/vorbis-1.3.6/lib/floor1.c
lib/vorbis-1.3.6/lib/info.c
lib/vorbis-1.3.6/lib/lpc.c
lib/vorbis-1.3.6/lib/lsp.c
lib/vorbis-1.3.6/lib/mapping0.c
lib/vorbis-1.3.6/lib/mdct.c
lib/vorbis-1.3.6/lib/psy.c
lib/vorbis-1.3.6/lib/registry.c
lib/vorbis-1.3.6/lib/res0.c
lib/vorbis-1.3.6/lib/sharedbook.c
lib/vorbis-1.3.6/lib/smallft.c
lib/vorbis-1.3.6/lib/synthesis.c
lib/vorbis-1.3.6/lib/vorbisfile.c
lib/vorbis-1.3.6/lib/window.c
lib/vorbis-1.3.6/include/vorbis/vorbisfile.h
lib/vorbis-1.3.6/lib/bitrate.c
lib/vorbis-1.3.6/lib/block.c
lib/vorbis-1.3.6/lib/codebook.c
lib/vorbis-1.3.6/lib/envelope.c
lib/vorbis-1.3.6/lib/floor0.c
lib/vorbis-1.3.6/lib/floor1.c
lib/vorbis-1.3.6/lib/info.c
lib/vorbis-1.3.6/lib/lpc.c
lib/vorbis-1.3.6/lib/lsp.c
lib/vorbis-1.3.6/lib/mapping0.c
lib/vorbis-1.3.6/lib/mdct.c
lib/vorbis-1.3.6/lib/psy.c
lib/vorbis-1.3.6/lib/registry.c
lib/vorbis-1.3.6/lib/res0.c
lib/vorbis-1.3.6/lib/sharedbook.c
lib/vorbis-1.3.6/lib/smallft.c
lib/vorbis-1.3.6/lib/synthesis.c
lib/vorbis-1.3.6/lib/vorbisfile.c
lib/vorbis-1.3.6/lib/window.c
)
target_include_directories(vorbis SYSTEM PUBLIC "lib/vorbis-1.3.6/include")
target_link_libraries(vorbis
ogg
ogg
)
target_compile_options(vorbis PRIVATE ${disableWarningsFlags})
set_target_properties(vorbis PROPERTIES FOLDER lib)
@ -275,303 +275,303 @@ include_directories("src")
# ... rhubarb-animation
add_library(rhubarb-animation
src/animation/animationRules.cpp
src/animation/animationRules.h
src/animation/mouthAnimation.cpp
src/animation/mouthAnimation.h
src/animation/pauseAnimation.cpp
src/animation/pauseAnimation.h
src/animation/roughAnimation.cpp
src/animation/roughAnimation.h
src/animation/ShapeRule.cpp
src/animation/ShapeRule.h
src/animation/shapeShorthands.h
src/animation/staticSegments.cpp
src/animation/staticSegments.h
src/animation/targetShapeSet.cpp
src/animation/targetShapeSet.h
src/animation/timingOptimization.cpp
src/animation/timingOptimization.h
src/animation/tweening.cpp
src/animation/tweening.h
src/animation/animationRules.cpp
src/animation/animationRules.h
src/animation/mouthAnimation.cpp
src/animation/mouthAnimation.h
src/animation/pauseAnimation.cpp
src/animation/pauseAnimation.h
src/animation/roughAnimation.cpp
src/animation/roughAnimation.h
src/animation/ShapeRule.cpp
src/animation/ShapeRule.h
src/animation/shapeShorthands.h
src/animation/staticSegments.cpp
src/animation/staticSegments.h
src/animation/targetShapeSet.cpp
src/animation/targetShapeSet.h
src/animation/timingOptimization.cpp
src/animation/timingOptimization.h
src/animation/tweening.cpp
src/animation/tweening.h
)
target_include_directories(rhubarb-animation PRIVATE "src/animation")
target_link_libraries(rhubarb-animation
rhubarb-core
rhubarb-logging
rhubarb-time
rhubarb-core
rhubarb-logging
rhubarb-time
)
# ... rhubarb-audio
add_library(rhubarb-audio
src/audio/AudioClip.cpp
src/audio/AudioClip.h
src/audio/audioFileReading.cpp
src/audio/audioFileReading.h
src/audio/AudioSegment.cpp
src/audio/AudioSegment.h
src/audio/DcOffset.cpp
src/audio/DcOffset.h
src/audio/ioTools.h
src/audio/OggVorbisFileReader.cpp
src/audio/OggVorbisFileReader.h
src/audio/processing.cpp
src/audio/processing.h
src/audio/SampleRateConverter.cpp
src/audio/SampleRateConverter.h
src/audio/voiceActivityDetection.cpp
src/audio/voiceActivityDetection.h
src/audio/WaveFileReader.cpp
src/audio/WaveFileReader.h
src/audio/waveFileWriting.cpp
src/audio/waveFileWriting.h
src/audio/AudioClip.cpp
src/audio/AudioClip.h
src/audio/audioFileReading.cpp
src/audio/audioFileReading.h
src/audio/AudioSegment.cpp
src/audio/AudioSegment.h
src/audio/DcOffset.cpp
src/audio/DcOffset.h
src/audio/ioTools.h
src/audio/OggVorbisFileReader.cpp
src/audio/OggVorbisFileReader.h
src/audio/processing.cpp
src/audio/processing.h
src/audio/SampleRateConverter.cpp
src/audio/SampleRateConverter.h
src/audio/voiceActivityDetection.cpp
src/audio/voiceActivityDetection.h
src/audio/WaveFileReader.cpp
src/audio/WaveFileReader.h
src/audio/waveFileWriting.cpp
src/audio/waveFileWriting.h
)
target_include_directories(rhubarb-audio PRIVATE "src/audio")
target_link_libraries(rhubarb-audio
webRtc
vorbis
rhubarb-logging
rhubarb-time
rhubarb-tools
webRtc
vorbis
rhubarb-logging
rhubarb-time
rhubarb-tools
)
# ... rhubarb-core
configure_file(src/core/appInfo.cpp.in appInfo.cpp ESCAPE_QUOTES)
add_library(rhubarb-core
${CMAKE_CURRENT_BINARY_DIR}/appInfo.cpp
src/core/appInfo.h
src/core/Phone.cpp
src/core/Phone.h
src/core/Shape.cpp
src/core/Shape.h
${CMAKE_CURRENT_BINARY_DIR}/appInfo.cpp
src/core/appInfo.h
src/core/Phone.cpp
src/core/Phone.h
src/core/Shape.cpp
src/core/Shape.h
)
target_include_directories(rhubarb-core PRIVATE "src/core")
target_link_libraries(rhubarb-core
rhubarb-tools
rhubarb-tools
)
# ... rhubarb-exporters
add_library(rhubarb-exporters
src/exporters/DatExporter.cpp
src/exporters/DatExporter.h
src/exporters/Exporter.h
src/exporters/exporterTools.cpp
src/exporters/exporterTools.h
src/exporters/JsonExporter.cpp
src/exporters/JsonExporter.h
src/exporters/TsvExporter.cpp
src/exporters/TsvExporter.h
src/exporters/XmlExporter.cpp
src/exporters/XmlExporter.h
src/exporters/DatExporter.cpp
src/exporters/DatExporter.h
src/exporters/Exporter.h
src/exporters/exporterTools.cpp
src/exporters/exporterTools.h
src/exporters/JsonExporter.cpp
src/exporters/JsonExporter.h
src/exporters/TsvExporter.cpp
src/exporters/TsvExporter.h
src/exporters/XmlExporter.cpp
src/exporters/XmlExporter.h
)
target_include_directories(rhubarb-exporters PRIVATE "src/exporters")
target_link_libraries(rhubarb-exporters
rhubarb-animation
rhubarb-core
rhubarb-time
rhubarb-animation
rhubarb-core
rhubarb-time
)
# ... rhubarb-lib
add_library(rhubarb-lib
src/lib/rhubarbLib.cpp
src/lib/rhubarbLib.h
src/lib/rhubarbLib.cpp
src/lib/rhubarbLib.h
)
target_include_directories(rhubarb-lib PRIVATE "src/lib")
target_link_libraries(rhubarb-lib
rhubarb-animation
rhubarb-audio
rhubarb-core
rhubarb-recognition
rhubarb-time
rhubarb-tools
rhubarb-animation
rhubarb-audio
rhubarb-core
rhubarb-recognition
rhubarb-time
rhubarb-tools
)
# ... rhubarb-logging
add_library(rhubarb-logging
src/logging/Entry.cpp
src/logging/Entry.h
src/logging/Formatter.h
src/logging/formatters.cpp
src/logging/formatters.h
src/logging/Level.cpp
src/logging/Level.h
src/logging/logging.cpp
src/logging/logging.h
src/logging/Sink.h
src/logging/sinks.cpp
src/logging/sinks.h
src/logging/Entry.cpp
src/logging/Entry.h
src/logging/Formatter.h
src/logging/formatters.cpp
src/logging/formatters.h
src/logging/Level.cpp
src/logging/Level.h
src/logging/logging.cpp
src/logging/logging.h
src/logging/Sink.h
src/logging/sinks.cpp
src/logging/sinks.h
)
target_include_directories(rhubarb-logging PRIVATE "src/logging")
target_link_libraries(rhubarb-logging
rhubarb-tools
rhubarb-tools
)
# ... rhubarb-recognition
add_library(rhubarb-recognition
src/recognition/g2p.cpp
src/recognition/g2p.h
src/recognition/languageModels.cpp
src/recognition/languageModels.h
src/recognition/PhoneticRecognizer.cpp
src/recognition/PhoneticRecognizer.h
src/recognition/PocketSphinxRecognizer.cpp
src/recognition/PocketSphinxRecognizer.h
src/recognition/pocketSphinxTools.cpp
src/recognition/pocketSphinxTools.h
src/recognition/Recognizer.h
src/recognition/tokenization.cpp
src/recognition/tokenization.h
src/recognition/g2p.cpp
src/recognition/g2p.h
src/recognition/languageModels.cpp
src/recognition/languageModels.h
src/recognition/PhoneticRecognizer.cpp
src/recognition/PhoneticRecognizer.h
src/recognition/PocketSphinxRecognizer.cpp
src/recognition/PocketSphinxRecognizer.h
src/recognition/pocketSphinxTools.cpp
src/recognition/pocketSphinxTools.h
src/recognition/Recognizer.h
src/recognition/tokenization.cpp
src/recognition/tokenization.h
)
target_include_directories(rhubarb-recognition PRIVATE "src/recognition")
target_link_libraries(rhubarb-recognition
flite
pocketSphinx
rhubarb-audio
rhubarb-core
rhubarb-logging
flite
pocketSphinx
rhubarb-audio
rhubarb-core
rhubarb-logging
)
# ... rhubarb-time
add_library(rhubarb-time
src/time/BoundedTimeline.h
src/time/centiseconds.cpp
src/time/centiseconds.h
src/time/ContinuousTimeline.h
src/time/Timed.h
src/time/timedLogging.h
src/time/Timeline.h
src/time/TimeRange.cpp
src/time/TimeRange.h
src/time/BoundedTimeline.h
src/time/centiseconds.cpp
src/time/centiseconds.h
src/time/ContinuousTimeline.h
src/time/Timed.h
src/time/timedLogging.h
src/time/Timeline.h
src/time/TimeRange.cpp
src/time/TimeRange.h
)
target_include_directories(rhubarb-time PRIVATE "src/time")
target_link_libraries(rhubarb-time
cppFormat
rhubarb-logging
cppFormat
rhubarb-logging
)
# ... rhubarb-tools
add_library(rhubarb-tools
src/tools/array.h
src/tools/EnumConverter.h
src/tools/exceptions.cpp
src/tools/exceptions.h
src/tools/fileTools.cpp
src/tools/fileTools.h
src/tools/Lazy.h
src/tools/nextCombination.h
src/tools/NiceCmdLineOutput.cpp
src/tools/NiceCmdLineOutput.h
src/tools/ObjectPool.h
src/tools/pairs.h
src/tools/parallel.h
src/tools/platformTools.cpp
src/tools/platformTools.h
src/tools/progress.cpp
src/tools/progress.h
src/tools/ProgressBar.cpp
src/tools/ProgressBar.h
src/tools/stringTools.cpp
src/tools/stringTools.h
src/tools/TablePrinter.cpp
src/tools/TablePrinter.h
src/tools/textFiles.cpp
src/tools/textFiles.h
src/tools/tools.cpp
src/tools/tools.h
src/tools/tupleHash.h
src/tools/array.h
src/tools/EnumConverter.h
src/tools/exceptions.cpp
src/tools/exceptions.h
src/tools/fileTools.cpp
src/tools/fileTools.h
src/tools/Lazy.h
src/tools/nextCombination.h
src/tools/NiceCmdLineOutput.cpp
src/tools/NiceCmdLineOutput.h
src/tools/ObjectPool.h
src/tools/pairs.h
src/tools/parallel.h
src/tools/platformTools.cpp
src/tools/platformTools.h
src/tools/progress.cpp
src/tools/progress.h
src/tools/ProgressBar.cpp
src/tools/ProgressBar.h
src/tools/stringTools.cpp
src/tools/stringTools.h
src/tools/TablePrinter.cpp
src/tools/TablePrinter.h
src/tools/textFiles.cpp
src/tools/textFiles.h
src/tools/tools.cpp
src/tools/tools.h
src/tools/tupleHash.h
)
target_include_directories(rhubarb-tools PRIVATE "src/tools")
target_link_libraries(rhubarb-tools
cppFormat
whereami
utfcpp
utf8proc
cppFormat
whereami
utfcpp
utf8proc
)
# Define Rhubarb executable
add_executable(rhubarb
src/rhubarb/main.cpp
src/rhubarb/ExportFormat.cpp
src/rhubarb/ExportFormat.h
src/rhubarb/RecognizerType.cpp
src/rhubarb/RecognizerType.h
src/rhubarb/semanticEntries.cpp
src/rhubarb/semanticEntries.h
src/rhubarb/sinks.cpp
src/rhubarb/sinks.h
src/rhubarb/main.cpp
src/rhubarb/ExportFormat.cpp
src/rhubarb/ExportFormat.h
src/rhubarb/RecognizerType.cpp
src/rhubarb/RecognizerType.h
src/rhubarb/semanticEntries.cpp
src/rhubarb/semanticEntries.h
src/rhubarb/sinks.cpp
src/rhubarb/sinks.h
)
target_include_directories(rhubarb PUBLIC "src/rhubarb")
target_link_libraries(rhubarb
rhubarb-exporters
rhubarb-lib
rhubarb-exporters
rhubarb-lib
)
target_compile_options(rhubarb PUBLIC ${enableWarningsFlags})
# Define test project
#include_directories("${gtest_SOURCE_DIR}/include")
set(TEST_FILES
tests/stringToolsTests.cpp
tests/TimelineTests.cpp
tests/BoundedTimelineTests.cpp
tests/ContinuousTimelineTests.cpp
tests/pairsTests.cpp
tests/tokenizationTests.cpp
tests/g2pTests.cpp
tests/LazyTests.cpp
tests/WaveFileReaderTests.cpp
tests/stringToolsTests.cpp
tests/TimelineTests.cpp
tests/BoundedTimelineTests.cpp
tests/ContinuousTimelineTests.cpp
tests/pairsTests.cpp
tests/tokenizationTests.cpp
tests/g2pTests.cpp
tests/LazyTests.cpp
tests/WaveFileReaderTests.cpp
)
add_executable(runTests ${TEST_FILES})
target_link_libraries(runTests
gtest
gmock
gmock_main
rhubarb-recognition
rhubarb-time
rhubarb-audio
gtest
gmock
gmock_main
rhubarb-recognition
rhubarb-time
rhubarb-audio
)
# Copies the specified files in a post-build event, then installs them
function(copy_and_install sourceGlob relativeTargetDirectory)
# Set `sourcePaths`
file(GLOB sourcePaths "${sourceGlob}")
foreach(sourcePath ${sourcePaths})
if(NOT IS_DIRECTORY ${sourcePath})
# Set `fileName`
get_filename_component(fileName "${sourcePath}" NAME)
# Set `sourcePaths`
file(GLOB sourcePaths "${sourceGlob}")
foreach(sourcePath ${sourcePaths})
if(NOT IS_DIRECTORY ${sourcePath})
# Set `fileName`
get_filename_component(fileName "${sourcePath}" NAME)
# Copy file during build
add_custom_command(TARGET rhubarb POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
)
# Copy file during build
add_custom_command(TARGET rhubarb POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
)
# Install file
install(
FILES "${sourcePath}"
DESTINATION "${relativeTargetDirectory}"
)
endif()
endforeach()
# Install file
install(
FILES "${sourcePath}"
DESTINATION "${relativeTargetDirectory}"
)
endif()
endforeach()
endfunction()
# Copies the specified files in a post-build event
function(copy sourceGlob relativeTargetDirectory)
# Set `sourcePaths`
file(GLOB sourcePaths "${sourceGlob}")
foreach(sourcePath ${sourcePaths})
if(NOT IS_DIRECTORY ${sourcePath})
# Set `fileName`
get_filename_component(fileName "${sourcePath}" NAME)
# Set `sourcePaths`
file(GLOB sourcePaths "${sourceGlob}")
foreach(sourcePath ${sourcePaths})
if(NOT IS_DIRECTORY ${sourcePath})
# Set `fileName`
get_filename_component(fileName "${sourcePath}" NAME)
# Copy file during build
add_custom_command(TARGET rhubarb POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
)
endif()
endforeach()
# Copy file during build
add_custom_command(TARGET rhubarb POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
)
endif()
endforeach()
endfunction()
copy_and_install("lib/pocketsphinx-rev13216/model/en-us/*" "res/sphinx")
@ -580,7 +580,7 @@ copy_and_install("lib/cmusphinx-en-us-5.2/*" "res/sphinx/acoustic-model")
copy_and_install("tests/resources/*" "tests/resources")
install(
TARGETS rhubarb
RUNTIME
DESTINATION .
TARGETS rhubarb
RUNTIME
DESTINATION .
)

View File

@ -8,79 +8,79 @@ using boost::adaptors::transformed;
template<typename T, bool AutoJoin>
ContinuousTimeline<optional<T>, AutoJoin> boundedTimelinetoContinuousOptional(
const BoundedTimeline<T, AutoJoin>& timeline
const BoundedTimeline<T, AutoJoin>& timeline
) {
return {
timeline.getRange(),
boost::none,
timeline | transformed([](const Timed<T>& timedValue) {
return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue());
})
};
return {
timeline.getRange(),
boost::none,
timeline | transformed([](const Timed<T>& timedValue) {
return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue());
})
};
}
ShapeRule::ShapeRule(
ShapeSet shapeSet,
optional<Phone> phone,
TimeRange phoneTiming
ShapeSet shapeSet,
optional<Phone> phone,
TimeRange phoneTiming
) :
shapeSet(std::move(shapeSet)),
phone(std::move(phone)),
phoneTiming(phoneTiming)
shapeSet(std::move(shapeSet)),
phone(std::move(phone)),
phoneTiming(phoneTiming)
{}
ShapeRule ShapeRule::getInvalid() {
return { {}, boost::none, { 0_cs, 0_cs } };
return { {}, boost::none, { 0_cs, 0_cs } };
}
bool ShapeRule::operator==(const ShapeRule& rhs) const {
return shapeSet == rhs.shapeSet && phone == rhs.phone && phoneTiming == rhs.phoneTiming;
return shapeSet == rhs.shapeSet && phone == rhs.phone && phoneTiming == rhs.phoneTiming;
}
bool ShapeRule::operator!=(const ShapeRule& rhs) const {
return !operator==(rhs);
return !operator==(rhs);
}
bool ShapeRule::operator<(const ShapeRule& rhs) const {
return shapeSet < rhs.shapeSet
|| phone < rhs.phone
|| phoneTiming.getStart() < rhs.phoneTiming.getStart()
|| phoneTiming.getEnd() < rhs.phoneTiming.getEnd();
return shapeSet < rhs.shapeSet
|| phone < rhs.phone
|| phoneTiming.getStart() < rhs.phoneTiming.getStart()
|| phoneTiming.getEnd() < rhs.phoneTiming.getEnd();
}
ContinuousTimeline<ShapeRule> getShapeRules(const BoundedTimeline<Phone>& phones) {
// Convert to continuous timeline so that silences aren't skipped when iterating
auto continuousPhones = boundedTimelinetoContinuousOptional(phones);
// Convert to continuous timeline so that silences aren't skipped when iterating
auto continuousPhones = boundedTimelinetoContinuousOptional(phones);
// Create timeline of shape rules
ContinuousTimeline<ShapeRule> shapeRules(
phones.getRange(),
{ { Shape::X }, boost::none, { 0_cs, 0_cs } }
);
centiseconds previousDuration = 0_cs;
for (const auto& timedPhone : continuousPhones) {
optional<Phone> phone = timedPhone.getValue();
const centiseconds duration = timedPhone.getDuration();
// Create timeline of shape rules
ContinuousTimeline<ShapeRule> shapeRules(
phones.getRange(),
{ { Shape::X }, boost::none, { 0_cs, 0_cs } }
);
centiseconds previousDuration = 0_cs;
for (const auto& timedPhone : continuousPhones) {
optional<Phone> phone = timedPhone.getValue();
const centiseconds duration = timedPhone.getDuration();
if (phone) {
// Animate one phone
Timeline<ShapeSet> phoneShapeSets = getShapeSets(*phone, duration, previousDuration);
if (phone) {
// Animate one phone
Timeline<ShapeSet> phoneShapeSets = getShapeSets(*phone, duration, previousDuration);
// Result timing is relative to phone. Make absolute.
phoneShapeSets.shift(timedPhone.getStart());
// Result timing is relative to phone. Make absolute.
phoneShapeSets.shift(timedPhone.getStart());
// Copy to timeline.
// Later shape sets may overwrite earlier ones if overlapping.
for (const auto& timedShapeSet : phoneShapeSets) {
shapeRules.set(
timedShapeSet.getTimeRange(),
ShapeRule(timedShapeSet.getValue(), phone, timedPhone.getTimeRange())
);
}
}
// Copy to timeline.
// Later shape sets may overwrite earlier ones if overlapping.
for (const auto& timedShapeSet : phoneShapeSets) {
shapeRules.set(
timedShapeSet.getTimeRange(),
ShapeRule(timedShapeSet.getValue(), phone, timedPhone.getTimeRange())
);
}
}
previousDuration = duration;
}
previousDuration = duration;
}
return shapeRules;
return shapeRules;
}

View File

@ -7,17 +7,17 @@
#include "time/TimeRange.h"
struct ShapeRule {
ShapeSet shapeSet;
boost::optional<Phone> phone;
TimeRange phoneTiming;
ShapeSet shapeSet;
boost::optional<Phone> phone;
TimeRange phoneTiming;
ShapeRule(ShapeSet shapeSet, boost::optional<Phone> phone, TimeRange phoneTiming);
ShapeRule(ShapeSet shapeSet, boost::optional<Phone> phone, TimeRange phoneTiming);
static ShapeRule getInvalid();
static ShapeRule getInvalid();
bool operator==(const ShapeRule&) const;
bool operator!=(const ShapeRule&) const;
bool operator<(const ShapeRule&) const;
bool operator==(const ShapeRule&) const;
bool operator!=(const ShapeRule&) const;
bool operator<(const ShapeRule&) const;
};
// Returns shape rules for an entire timeline of phones.

View File

@ -14,153 +14,153 @@ using std::map;
constexpr size_t shapeValueCount = static_cast<size_t>(Shape::EndSentinel);
Shape getBasicShape(Shape shape) {
static constexpr array<Shape, shapeValueCount> basicShapes =
make_array(A, B, C, D, E, F, A, C, A);
return basicShapes[static_cast<size_t>(shape)];
static constexpr array<Shape, shapeValueCount> basicShapes =
make_array(A, B, C, D, E, F, A, C, A);
return basicShapes[static_cast<size_t>(shape)];
}
Shape relax(Shape shape) {
static constexpr array<Shape, shapeValueCount> relaxedShapes =
make_array(A, B, B, C, C, B, X, B, X);
return relaxedShapes[static_cast<size_t>(shape)];
static constexpr array<Shape, shapeValueCount> relaxedShapes =
make_array(A, B, B, C, C, B, X, B, X);
return relaxedShapes[static_cast<size_t>(shape)];
}
Shape getClosestShape(Shape reference, ShapeSet shapes) {
if (shapes.empty()) {
throw std::invalid_argument("Cannot select from empty set of shapes.");
}
if (shapes.empty()) {
throw std::invalid_argument("Cannot select from empty set of shapes.");
}
// A matrix that for each shape contains all shapes in ascending order of effort required to
// move to them
constexpr static array<array<Shape, shapeValueCount>, shapeValueCount> effortMatrix = make_array(
/* A */ make_array(A, X, G, B, C, H, E, D, F),
/* B */ make_array(B, G, A, X, C, H, E, D, F),
/* C */ make_array(C, H, B, G, D, A, X, E, F),
/* D */ make_array(D, C, H, B, G, A, X, E, F),
/* E */ make_array(E, C, H, B, G, A, X, D, F),
/* F */ make_array(F, B, G, A, X, C, H, E, D),
/* G */ make_array(G, A, B, C, H, X, E, D, F),
/* H */ make_array(H, C, B, G, D, A, X, E, F), // Like C
/* X */ make_array(X, A, G, B, C, H, E, D, F) // Like A
);
// A matrix that for each shape contains all shapes in ascending order of effort required to
// move to them
constexpr static array<array<Shape, shapeValueCount>, shapeValueCount> effortMatrix = make_array(
/* A */ make_array(A, X, G, B, C, H, E, D, F),
/* B */ make_array(B, G, A, X, C, H, E, D, F),
/* C */ make_array(C, H, B, G, D, A, X, E, F),
/* D */ make_array(D, C, H, B, G, A, X, E, F),
/* E */ make_array(E, C, H, B, G, A, X, D, F),
/* F */ make_array(F, B, G, A, X, C, H, E, D),
/* G */ make_array(G, A, B, C, H, X, E, D, F),
/* H */ make_array(H, C, B, G, D, A, X, E, F), // Like C
/* X */ make_array(X, A, G, B, C, H, E, D, F) // Like A
);
auto& closestShapes = effortMatrix.at(static_cast<size_t>(reference));
for (Shape closestShape : closestShapes) {
if (shapes.find(closestShape) != shapes.end()) {
return closestShape;
}
}
auto& closestShapes = effortMatrix.at(static_cast<size_t>(reference));
for (Shape closestShape : closestShapes) {
if (shapes.find(closestShape) != shapes.end()) {
return closestShape;
}
}
throw std::invalid_argument("Unable to find closest shape.");
throw std::invalid_argument("Unable to find closest shape.");
}
optional<pair<Shape, TweenTiming>> getTween(Shape first, Shape second) {
// Note that most of the following rules work in one direction only.
// That's because in animation, the mouth should usually "pop" open without inbetweens,
// then close slowly.
static const map<pair<Shape, Shape>, pair<Shape, TweenTiming>> lookup {
{ { D, A }, { C, TweenTiming::Early } },
{ { D, B }, { C, TweenTiming::Centered } },
{ { D, G }, { C, TweenTiming::Early } },
{ { D, X }, { C, TweenTiming::Late } },
{ { C, F }, { E, TweenTiming::Centered } }, { { F, C }, { E, TweenTiming::Centered } },
{ { D, F }, { E, TweenTiming::Centered } },
{ { H, F }, { E, TweenTiming::Late } }, { { F, H }, { E, TweenTiming::Early } }
};
const auto it = lookup.find({ first, second });
return it != lookup.end() ? it->second : optional<pair<Shape, TweenTiming>>();
// Note that most of the following rules work in one direction only.
// That's because in animation, the mouth should usually "pop" open without inbetweens,
// then close slowly.
static const map<pair<Shape, Shape>, pair<Shape, TweenTiming>> lookup {
{ { D, A }, { C, TweenTiming::Early } },
{ { D, B }, { C, TweenTiming::Centered } },
{ { D, G }, { C, TweenTiming::Early } },
{ { D, X }, { C, TweenTiming::Late } },
{ { C, F }, { E, TweenTiming::Centered } }, { { F, C }, { E, TweenTiming::Centered } },
{ { D, F }, { E, TweenTiming::Centered } },
{ { H, F }, { E, TweenTiming::Late } }, { { F, H }, { E, TweenTiming::Early } }
};
const auto it = lookup.find({ first, second });
return it != lookup.end() ? it->second : optional<pair<Shape, TweenTiming>>();
}
Timeline<ShapeSet> getShapeSets(Phone phone, centiseconds duration, centiseconds previousDuration) {
// Returns a timeline with a single shape set
const auto single = [duration](ShapeSet value) {
return Timeline<ShapeSet> { { 0_cs, duration, value } };
};
// Returns a timeline with a single shape set
const auto single = [duration](ShapeSet value) {
return Timeline<ShapeSet> { { 0_cs, duration, value } };
};
// Returns a timeline with two shape sets, timed as a diphthong
const auto diphthong = [duration](ShapeSet first, ShapeSet second) {
const centiseconds firstDuration = duration_cast<centiseconds>(duration * 0.6);
return Timeline<ShapeSet> {
{ 0_cs, firstDuration, first },
{ firstDuration, duration, second }
};
};
// Returns a timeline with two shape sets, timed as a diphthong
const auto diphthong = [duration](ShapeSet first, ShapeSet second) {
const centiseconds firstDuration = duration_cast<centiseconds>(duration * 0.6);
return Timeline<ShapeSet> {
{ 0_cs, firstDuration, first },
{ firstDuration, duration, second }
};
};
// Returns a timeline with two shape sets, timed as a plosive
const auto plosive = [duration, previousDuration](ShapeSet first, ShapeSet second) {
const centiseconds minOcclusionDuration = 4_cs;
const centiseconds maxOcclusionDuration = 12_cs;
const centiseconds occlusionDuration =
clamp(previousDuration / 2, minOcclusionDuration, maxOcclusionDuration);
return Timeline<ShapeSet> {
{ -occlusionDuration, 0_cs, first },
{ 0_cs, duration, second }
};
};
// Returns a timeline with two shape sets, timed as a plosive
const auto plosive = [duration, previousDuration](ShapeSet first, ShapeSet second) {
const centiseconds minOcclusionDuration = 4_cs;
const centiseconds maxOcclusionDuration = 12_cs;
const centiseconds occlusionDuration =
clamp(previousDuration / 2, minOcclusionDuration, maxOcclusionDuration);
return Timeline<ShapeSet> {
{ -occlusionDuration, 0_cs, first },
{ 0_cs, duration, second }
};
};
// Returns the result of `getShapeSets` when called with identical arguments
// except for a different phone.
const auto like = [duration, previousDuration](Phone referencePhone) {
return getShapeSets(referencePhone, duration, previousDuration);
};
// Returns the result of `getShapeSets` when called with identical arguments
// except for a different phone.
const auto like = [duration, previousDuration](Phone referencePhone) {
return getShapeSets(referencePhone, duration, previousDuration);
};
static const ShapeSet any { A, B, C, D, E, F, G, H, X };
static const ShapeSet anyOpen { B, C, D, E, F, G, H };
static const ShapeSet any { A, B, C, D, E, F, G, H, X };
static const ShapeSet anyOpen { B, C, D, E, F, G, H };
// Note:
// The shapes {A, B, G, X} are very similar. You should avoid regular shape sets containing more
// than one of these shapes.
// Otherwise, the resulting shape may be more or less random and might not be a good fit.
// As an exception, a very flexible rule may contain *all* these shapes.
// Note:
// The shapes {A, B, G, X} are very similar. You should avoid regular shape sets containing more
// than one of these shapes.
// Otherwise, the resulting shape may be more or less random and might not be a good fit.
// As an exception, a very flexible rule may contain *all* these shapes.
switch (phone) {
case Phone::AO: return single({ E });
case Phone::AA: return single({ D });
case Phone::IY: return single({ B });
case Phone::UW: return single({ F });
case Phone::EH: return single({ C });
case Phone::IH: return single({ B });
case Phone::UH: return single({ F });
case Phone::AH: return duration < 20_cs ? single({ C }) : single({ D });
case Phone::Schwa: return single({ B, C });
case Phone::AE: return single({ C });
case Phone::EY: return diphthong({ C }, { B });
case Phone::AY: return duration < 20_cs ? diphthong({ C }, { B }) : diphthong({ D }, { B });
case Phone::OW: return diphthong({ E }, { F });
case Phone::AW: return duration < 30_cs ? diphthong({ C }, { E }) : diphthong({ D }, { E });
case Phone::OY: return diphthong({ E }, { B });
case Phone::ER: return duration < 7_cs ? like(Phone::Schwa) : single({ E });
switch (phone) {
case Phone::AO: return single({ E });
case Phone::AA: return single({ D });
case Phone::IY: return single({ B });
case Phone::UW: return single({ F });
case Phone::EH: return single({ C });
case Phone::IH: return single({ B });
case Phone::UH: return single({ F });
case Phone::AH: return duration < 20_cs ? single({ C }) : single({ D });
case Phone::Schwa: return single({ B, C });
case Phone::AE: return single({ C });
case Phone::EY: return diphthong({ C }, { B });
case Phone::AY: return duration < 20_cs ? diphthong({ C }, { B }) : diphthong({ D }, { B });
case Phone::OW: return diphthong({ E }, { F });
case Phone::AW: return duration < 30_cs ? diphthong({ C }, { E }) : diphthong({ D }, { E });
case Phone::OY: return diphthong({ E }, { B });
case Phone::ER: return duration < 7_cs ? like(Phone::Schwa) : single({ E });
case Phone::P:
case Phone::B: return plosive({ A }, any);
case Phone::T:
case Phone::D: return plosive({ B, F }, anyOpen);
case Phone::K:
case Phone::G: return plosive({ B, C, E, F, H }, anyOpen);
case Phone::CH:
case Phone::JH: return single({ B, F });
case Phone::F:
case Phone::V: return single({ G });
case Phone::TH:
case Phone::DH:
case Phone::S:
case Phone::Z:
case Phone::SH:
case Phone::ZH: return single({ B, F });
case Phone::HH: return single(any); // think "m-hm"
case Phone::M: return single({ A });
case Phone::N: return single({ B, C, F, H });
case Phone::NG: return single({ B, C, E, F });
case Phone::L: return duration < 20_cs ? single({ B, E, F, H }) : single({ H });
case Phone::R: return single({ B, E, F });
case Phone::Y: return single({ B, C, F });
case Phone::W: return single({ F });
case Phone::P:
case Phone::B: return plosive({ A }, any);
case Phone::T:
case Phone::D: return plosive({ B, F }, anyOpen);
case Phone::K:
case Phone::G: return plosive({ B, C, E, F, H }, anyOpen);
case Phone::CH:
case Phone::JH: return single({ B, F });
case Phone::F:
case Phone::V: return single({ G });
case Phone::TH:
case Phone::DH:
case Phone::S:
case Phone::Z:
case Phone::SH:
case Phone::ZH: return single({ B, F });
case Phone::HH: return single(any); // think "m-hm"
case Phone::M: return single({ A });
case Phone::N: return single({ B, C, F, H });
case Phone::NG: return single({ B, C, E, F });
case Phone::L: return duration < 20_cs ? single({ B, E, F, H }) : single({ H });
case Phone::R: return single({ B, E, F });
case Phone::Y: return single({ B, C, F });
case Phone::W: return single({ F });
case Phone::Breath:
case Phone::Cough:
case Phone::Smack: return single({ C });
case Phone::Noise: return single({ B });
case Phone::Breath:
case Phone::Cough:
case Phone::Smack: return single({ C });
case Phone::Noise: return single({ B });
default: throw std::invalid_argument("Unexpected phone.");
}
default: throw std::invalid_argument("Unexpected phone.");
}
}

View File

@ -16,14 +16,14 @@ Shape getClosestShape(Shape reference, ShapeSet shapes);
// Indicates how to time a tween between two mouth shapes
enum class TweenTiming {
// Tween should end at the original transition
Early,
// Tween should end at the original transition
Early,
// Tween should overlap both original mouth shapes equally
Centered,
// Tween should overlap both original mouth shapes equally
Centered,
// Tween should begin at the original transition
Late
// Tween should begin at the original transition
Late
};
// Returns the tween shape and timing to use to transition between the specified two mouth shapes.

View File

@ -9,33 +9,33 @@
#include "staticSegments.h"
JoiningContinuousTimeline<Shape> animate(
const BoundedTimeline<Phone>& phones,
const ShapeSet& targetShapeSet
const BoundedTimeline<Phone>& phones,
const ShapeSet& targetShapeSet
) {
// Create timeline of shape rules
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
// Create timeline of shape rules
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
// Modify shape rules to only contain allowed shapes -- plus X, which is needed for pauses and
// will be replaced later
ShapeSet targetShapeSetPlusX = targetShapeSet;
targetShapeSetPlusX.insert(Shape::X);
shapeRules = convertToTargetShapeSet(shapeRules, targetShapeSetPlusX);
// Modify shape rules to only contain allowed shapes -- plus X, which is needed for pauses and
// will be replaced later
ShapeSet targetShapeSetPlusX = targetShapeSet;
targetShapeSetPlusX.insert(Shape::X);
shapeRules = convertToTargetShapeSet(shapeRules, targetShapeSetPlusX);
// Animate in multiple steps
const auto performMainAnimationSteps = [&targetShapeSet](const auto& shapeRules) {
JoiningContinuousTimeline<Shape> animation = animateRough(shapeRules);
animation = optimizeTiming(animation);
animation = animatePauses(animation);
animation = insertTweens(animation);
animation = convertToTargetShapeSet(animation, targetShapeSet);
return animation;
};
const JoiningContinuousTimeline<Shape> result =
avoidStaticSegments(shapeRules, performMainAnimationSteps);
// Animate in multiple steps
const auto performMainAnimationSteps = [&targetShapeSet](const auto& shapeRules) {
JoiningContinuousTimeline<Shape> animation = animateRough(shapeRules);
animation = optimizeTiming(animation);
animation = animatePauses(animation);
animation = insertTweens(animation);
animation = convertToTargetShapeSet(animation, targetShapeSet);
return animation;
};
const JoiningContinuousTimeline<Shape> result =
avoidStaticSegments(shapeRules, performMainAnimationSteps);
for (const auto& timedShape : result) {
logTimedEvent("shape", timedShape);
}
for (const auto& timedShape : result) {
logTimedEvent("shape", timedShape);
}
return result;
return result;
}

View File

@ -6,6 +6,6 @@
#include "targetShapeSet.h"
JoiningContinuousTimeline<Shape> animate(
const BoundedTimeline<Phone>& phones,
const ShapeSet& targetShapeSet
const BoundedTimeline<Phone>& phones,
const ShapeSet& targetShapeSet
);

View File

@ -2,47 +2,47 @@
#include "animationRules.h"
Shape getPauseShape(Shape previous, Shape next, centiseconds duration) {
// For very short pauses: Just hold the previous shape
if (duration < 12_cs) {
return previous;
}
// For very short pauses: Just hold the previous shape
if (duration < 12_cs) {
return previous;
}
// For short pauses: Relax the mouth
if (duration <= 35_cs) {
// It looks odd if the pause shape is identical to the next shape.
// Make sure we find a relaxed shape that's different from the next one.
for (Shape currentRelaxedShape = previous;;) {
const Shape nextRelaxedShape = relax(currentRelaxedShape);
if (nextRelaxedShape != next) {
return nextRelaxedShape;
}
if (nextRelaxedShape == currentRelaxedShape) {
// We're going in circles
break;
}
currentRelaxedShape = nextRelaxedShape;
}
}
// For short pauses: Relax the mouth
if (duration <= 35_cs) {
// It looks odd if the pause shape is identical to the next shape.
// Make sure we find a relaxed shape that's different from the next one.
for (Shape currentRelaxedShape = previous;;) {
const Shape nextRelaxedShape = relax(currentRelaxedShape);
if (nextRelaxedShape != next) {
return nextRelaxedShape;
}
if (nextRelaxedShape == currentRelaxedShape) {
// We're going in circles
break;
}
currentRelaxedShape = nextRelaxedShape;
}
}
// For longer pauses: Close the mouth
return Shape::X;
// For longer pauses: Close the mouth
return Shape::X;
}
JoiningContinuousTimeline<Shape> animatePauses(const JoiningContinuousTimeline<Shape>& animation) {
JoiningContinuousTimeline<Shape> result(animation);
for_each_adjacent(
animation.begin(),
animation.end(),
[&](const Timed<Shape>& previous, const Timed<Shape>& pause, const Timed<Shape>& next) {
if (pause.getValue() != Shape::X) return;
JoiningContinuousTimeline<Shape> result(animation);
for_each_adjacent(
animation.begin(),
animation.end(),
[&](const Timed<Shape>& previous, const Timed<Shape>& pause, const Timed<Shape>& next) {
if (pause.getValue() != Shape::X) return;
result.set(
pause.getTimeRange(),
getPauseShape(previous.getValue(), next.getValue(), pause.getDuration())
);
}
);
result.set(
pause.getTimeRange(),
getPauseShape(previous.getValue(), next.getValue(), pause.getDuration())
);
}
);
return result;
return result;
}

View File

@ -13,48 +13,48 @@
// So whenever we come across a one-shape vowel, we backtrack a little, spreading that shape to
// the left.
JoiningContinuousTimeline<Shape> animateRough(const ContinuousTimeline<ShapeRule>& shapeRules) {
JoiningContinuousTimeline<Shape> animation(shapeRules.getRange(), Shape::X);
JoiningContinuousTimeline<Shape> animation(shapeRules.getRange(), Shape::X);
Shape referenceShape = Shape::X;
// Animate forwards
centiseconds lastAnticipatedShapeStart = -1_cs;
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
const ShapeRule shapeRule = it->getValue();
const Shape shape = getClosestShape(referenceShape, shapeRule.shapeSet);
animation.set(it->getTimeRange(), shape);
const bool anticipateShape = shapeRule.phone
&& isVowel(*shapeRule.phone)
&& shapeRule.shapeSet.size() == 1;
if (anticipateShape) {
// Animate backwards a little
const Shape anticipatedShape = shape;
const centiseconds anticipatedShapeStart = it->getStart();
referenceShape = anticipatedShape;
for (auto reverseIt = it; reverseIt != shapeRules.begin();) {
--reverseIt;
Shape referenceShape = Shape::X;
// Animate forwards
centiseconds lastAnticipatedShapeStart = -1_cs;
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
const ShapeRule shapeRule = it->getValue();
const Shape shape = getClosestShape(referenceShape, shapeRule.shapeSet);
animation.set(it->getTimeRange(), shape);
const bool anticipateShape = shapeRule.phone
&& isVowel(*shapeRule.phone)
&& shapeRule.shapeSet.size() == 1;
if (anticipateShape) {
// Animate backwards a little
const Shape anticipatedShape = shape;
const centiseconds anticipatedShapeStart = it->getStart();
referenceShape = anticipatedShape;
for (auto reverseIt = it; reverseIt != shapeRules.begin();) {
--reverseIt;
// Make sure we haven't animated too far back
centiseconds anticipatingShapeStart = reverseIt->getStart();
if (anticipatingShapeStart == lastAnticipatedShapeStart) break;
const centiseconds maxAnticipationDuration = 20_cs;
const centiseconds anticipationDuration =
anticipatedShapeStart - anticipatingShapeStart;
if (anticipationDuration > maxAnticipationDuration) break;
// Make sure we haven't animated too far back
centiseconds anticipatingShapeStart = reverseIt->getStart();
if (anticipatingShapeStart == lastAnticipatedShapeStart) break;
const centiseconds maxAnticipationDuration = 20_cs;
const centiseconds anticipationDuration =
anticipatedShapeStart - anticipatingShapeStart;
if (anticipationDuration > maxAnticipationDuration) break;
// Overwrite forward-animated shape with backwards-animated, anticipating shape
const Shape anticipatingShape =
getClosestShape(referenceShape, reverseIt->getValue().shapeSet);
animation.set(reverseIt->getTimeRange(), anticipatingShape);
// Overwrite forward-animated shape with backwards-animated, anticipating shape
const Shape anticipatingShape =
getClosestShape(referenceShape, reverseIt->getValue().shapeSet);
animation.set(reverseIt->getTimeRange(), anticipatingShape);
// Make sure the new, backwards-animated shape still resembles the anticipated shape
if (getBasicShape(anticipatingShape) != getBasicShape(anticipatedShape)) break;
// Make sure the new, backwards-animated shape still resembles the anticipated shape
if (getBasicShape(anticipatingShape) != getBasicShape(anticipatedShape)) break;
referenceShape = anticipatingShape;
}
lastAnticipatedShapeStart = anticipatedShapeStart;
}
referenceShape = anticipateShape ? shape : relax(shape);
}
referenceShape = anticipatingShape;
}
lastAnticipatedShapeStart = anticipatedShapeStart;
}
referenceShape = anticipateShape ? shape : relax(shape);
}
return animation;
return animation;
}

View File

@ -6,71 +6,71 @@
using std::vector;
int getSyllableCount(const ContinuousTimeline<ShapeRule>& shapeRules, TimeRange timeRange) {
if (timeRange.empty()) return 0;
if (timeRange.empty()) return 0;
const auto begin = shapeRules.find(timeRange.getStart());
const auto end = std::next(shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft));
const auto begin = shapeRules.find(timeRange.getStart());
const auto end = std::next(shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft));
// Treat every vowel as one syllable
int syllableCount = 0;
for (auto it = begin; it != end; ++it) {
const ShapeRule shapeRule = it->getValue();
// Treat every vowel as one syllable
int syllableCount = 0;
for (auto it = begin; it != end; ++it) {
const ShapeRule shapeRule = it->getValue();
// Disregard phones that are mostly outside the specified time range.
const centiseconds phoneMiddle = shapeRule.phoneTiming.getMiddle();
if (phoneMiddle < timeRange.getStart() || phoneMiddle >= timeRange.getEnd()) continue;
// Disregard phones that are mostly outside the specified time range.
const centiseconds phoneMiddle = shapeRule.phoneTiming.getMiddle();
if (phoneMiddle < timeRange.getStart() || phoneMiddle >= timeRange.getEnd()) continue;
auto phone = shapeRule.phone;
if (phone && isVowel(*phone)) {
++syllableCount;
}
}
auto phone = shapeRule.phone;
if (phone && isVowel(*phone)) {
++syllableCount;
}
}
return syllableCount;
return syllableCount;
}
// A static segment is a prolonged period during which the mouth shape doesn't change
vector<TimeRange> getStaticSegments(
const ContinuousTimeline<ShapeRule>& shapeRules,
const JoiningContinuousTimeline<Shape>& animation
const ContinuousTimeline<ShapeRule>& shapeRules,
const JoiningContinuousTimeline<Shape>& animation
) {
// A static segment must contain a certain number of syllables to look distractingly static
const int minSyllableCount = 3;
// It must also have a minimum duration. The same number of syllables in fast speech usually
// looks good.
const centiseconds minDuration = 75_cs;
// A static segment must contain a certain number of syllables to look distractingly static
const int minSyllableCount = 3;
// It must also have a minimum duration. The same number of syllables in fast speech usually
// looks good.
const centiseconds minDuration = 75_cs;
vector<TimeRange> result;
for (const auto& timedShape : animation) {
const TimeRange timeRange = timedShape.getTimeRange();
const bool isStatic = timeRange.getDuration() >= minDuration
&& getSyllableCount(shapeRules, timeRange) >= minSyllableCount;
if (isStatic) {
result.push_back(timeRange);
}
}
return result;
vector<TimeRange> result;
for (const auto& timedShape : animation) {
const TimeRange timeRange = timedShape.getTimeRange();
const bool isStatic = timeRange.getDuration() >= minDuration
&& getSyllableCount(shapeRules, timeRange) >= minSyllableCount;
if (isStatic) {
result.push_back(timeRange);
}
}
return result;
}
// Indicates whether this shape rule can potentially be replaced by a modified version that breaks
// up long static segments
bool canChange(const ShapeRule& rule) {
return rule.phone && isVowel(*rule.phone) && rule.shapeSet.size() == 1;
return rule.phone && isVowel(*rule.phone) && rule.shapeSet.size() == 1;
}
// Returns a new shape rule that is identical to the specified one, except that it leads to a
// slightly different visualization
ShapeRule getChangedShapeRule(const ShapeRule& rule) {
assert(canChange(rule));
assert(canChange(rule));
ShapeRule result(rule);
// So far, I've only encountered B as a static shape.
// If there is ever a problem with another static shape, this function can easily be extended.
if (rule.shapeSet == ShapeSet { Shape::B }) {
result.shapeSet = { Shape::C };
}
return result;
ShapeRule result(rule);
// So far, I've only encountered B as a static shape.
// If there is ever a problem with another static shape, this function can easily be extended.
if (rule.shapeSet == ShapeSet { Shape::B }) {
result.shapeSet = { Shape::C };
}
return result;
}
// Contains the start times of all rules to be changed
@ -78,162 +78,162 @@ using RuleChanges = vector<centiseconds>;
// Replaces the indicated shape rules with slightly different ones, breaking up long static segments
ContinuousTimeline<ShapeRule> applyChanges(
const ContinuousTimeline<ShapeRule>& shapeRules,
const RuleChanges& changes
const ContinuousTimeline<ShapeRule>& shapeRules,
const RuleChanges& changes
) {
ContinuousTimeline<ShapeRule> result(shapeRules);
for (centiseconds changedRuleStart : changes) {
const Timed<ShapeRule> timedOriginalRule = *shapeRules.get(changedRuleStart);
const ShapeRule changedRule = getChangedShapeRule(timedOriginalRule.getValue());
result.set(timedOriginalRule.getTimeRange(), changedRule);
}
return result;
ContinuousTimeline<ShapeRule> result(shapeRules);
for (centiseconds changedRuleStart : changes) {
const Timed<ShapeRule> timedOriginalRule = *shapeRules.get(changedRuleStart);
const ShapeRule changedRule = getChangedShapeRule(timedOriginalRule.getValue());
result.set(timedOriginalRule.getTimeRange(), changedRule);
}
return result;
}
class RuleChangeScenario {
public:
RuleChangeScenario(
const ContinuousTimeline<ShapeRule>& originalRules,
const RuleChanges& changes,
const AnimationFunction& animate
) :
changedRules(applyChanges(originalRules, changes)),
animation(animate(changedRules)),
staticSegments(getStaticSegments(changedRules, animation))
{}
RuleChangeScenario(
const ContinuousTimeline<ShapeRule>& originalRules,
const RuleChanges& changes,
const AnimationFunction& animate
) :
changedRules(applyChanges(originalRules, changes)),
animation(animate(changedRules)),
staticSegments(getStaticSegments(changedRules, animation))
{}
bool isBetterThan(const RuleChangeScenario& rhs) const {
// We want zero static segments
if (staticSegments.empty() && !rhs.staticSegments.empty()) return true;
bool isBetterThan(const RuleChangeScenario& rhs) const {
// We want zero static segments
if (staticSegments.empty() && !rhs.staticSegments.empty()) return true;
// Short shapes are better than long ones. Minimize sum-of-squares.
if (getSumOfShapeDurationSquares() < rhs.getSumOfShapeDurationSquares()) return true;
// Short shapes are better than long ones. Minimize sum-of-squares.
if (getSumOfShapeDurationSquares() < rhs.getSumOfShapeDurationSquares()) return true;
return false;
}
return false;
}
int getStaticSegmentCount() const {
return static_cast<int>(staticSegments.size());
}
int getStaticSegmentCount() const {
return static_cast<int>(staticSegments.size());
}
ContinuousTimeline<ShapeRule> getChangedRules() const {
return changedRules;
}
ContinuousTimeline<ShapeRule> getChangedRules() const {
return changedRules;
}
private:
ContinuousTimeline<ShapeRule> changedRules;
JoiningContinuousTimeline<Shape> animation;
vector<TimeRange> staticSegments;
ContinuousTimeline<ShapeRule> changedRules;
JoiningContinuousTimeline<Shape> animation;
vector<TimeRange> staticSegments;
double getSumOfShapeDurationSquares() const {
return std::accumulate(
animation.begin(),
animation.end(),
0.0,
[](const double sum, const Timed<Shape>& timedShape) {
const double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
timedShape.getDuration()
).count();
return sum + duration * duration;
}
);
}
double getSumOfShapeDurationSquares() const {
return std::accumulate(
animation.begin(),
animation.end(),
0.0,
[](const double sum, const Timed<Shape>& timedShape) {
const double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
timedShape.getDuration()
).count();
return sum + duration * duration;
}
);
}
};
RuleChanges getPossibleRuleChanges(const ContinuousTimeline<ShapeRule>& shapeRules) {
RuleChanges result;
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
const ShapeRule rule = it->getValue();
if (canChange(rule)) {
result.push_back(it->getStart());
}
}
return result;
RuleChanges result;
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
const ShapeRule rule = it->getValue();
if (canChange(rule)) {
result.push_back(it->getStart());
}
}
return result;
}
ContinuousTimeline<ShapeRule> fixStaticSegmentRules(
const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate
const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate
) {
// The complexity of this function is exponential with the number of replacements.
// So let's cap that value.
const int maxReplacementCount = 3;
// The complexity of this function is exponential with the number of replacements.
// So let's cap that value.
const int maxReplacementCount = 3;
// All potential changes
const RuleChanges possibleRuleChanges = getPossibleRuleChanges(shapeRules);
// All potential changes
const RuleChanges possibleRuleChanges = getPossibleRuleChanges(shapeRules);
// Find best solution. Start with a single replacement, then increase as necessary.
RuleChangeScenario bestScenario(shapeRules, {}, animate);
for (
int replacementCount = 1;
bestScenario.getStaticSegmentCount() > 0 && replacementCount <= std::min(static_cast<int>(possibleRuleChanges.size()), maxReplacementCount);
++replacementCount
) {
// Only the first <replacementCount> elements of `currentRuleChanges` count
auto currentRuleChanges(possibleRuleChanges);
do {
RuleChangeScenario currentScenario(
shapeRules,
{ currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount },
animate
);
if (currentScenario.isBetterThan(bestScenario)) {
bestScenario = currentScenario;
}
} while (next_combination(currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount, currentRuleChanges.end()));
}
// Find best solution. Start with a single replacement, then increase as necessary.
RuleChangeScenario bestScenario(shapeRules, {}, animate);
for (
int replacementCount = 1;
bestScenario.getStaticSegmentCount() > 0 && replacementCount <= std::min(static_cast<int>(possibleRuleChanges.size()), maxReplacementCount);
++replacementCount
) {
// Only the first <replacementCount> elements of `currentRuleChanges` count
auto currentRuleChanges(possibleRuleChanges);
do {
RuleChangeScenario currentScenario(
shapeRules,
{ currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount },
animate
);
if (currentScenario.isBetterThan(bestScenario)) {
bestScenario = currentScenario;
}
} while (next_combination(currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount, currentRuleChanges.end()));
}
return bestScenario.getChangedRules();
return bestScenario.getChangedRules();
}
// Indicates whether the specified shape rule may result in different shapes depending on context
bool isFlexible(const ShapeRule& rule) {
return rule.shapeSet.size() > 1;
return rule.shapeSet.size() > 1;
}
// Extends the specified time range until it starts and ends with a non-flexible shape rule, if
// possible
TimeRange extendToFixedRules(
const TimeRange& timeRange,
const ContinuousTimeline<ShapeRule>& shapeRules
const TimeRange& timeRange,
const ContinuousTimeline<ShapeRule>& shapeRules
) {
auto first = shapeRules.find(timeRange.getStart());
while (first != shapeRules.begin() && isFlexible(first->getValue())) {
--first;
}
auto last = shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft);
while (std::next(last) != shapeRules.end() && isFlexible(last->getValue())) {
++last;
}
return { first->getStart(), last->getEnd() };
auto first = shapeRules.find(timeRange.getStart());
while (first != shapeRules.begin() && isFlexible(first->getValue())) {
--first;
}
auto last = shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft);
while (std::next(last) != shapeRules.end() && isFlexible(last->getValue())) {
++last;
}
return { first->getStart(), last->getEnd() };
}
JoiningContinuousTimeline<Shape> avoidStaticSegments(
const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate
const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate
) {
const auto animation = animate(shapeRules);
const vector<TimeRange> staticSegments = getStaticSegments(shapeRules, animation);
if (staticSegments.empty()) {
return animation;
}
const auto animation = animate(shapeRules);
const vector<TimeRange> staticSegments = getStaticSegments(shapeRules, animation);
if (staticSegments.empty()) {
return animation;
}
// Modify shape rules to eliminate static segments
ContinuousTimeline<ShapeRule> fixedShapeRules(shapeRules);
for (const TimeRange& staticSegment : staticSegments) {
// Extend time range to the left and right so we don't lose adjacent rules that might
// influence the animation
const TimeRange extendedStaticSegment = extendToFixedRules(staticSegment, shapeRules);
// Modify shape rules to eliminate static segments
ContinuousTimeline<ShapeRule> fixedShapeRules(shapeRules);
for (const TimeRange& staticSegment : staticSegments) {
// Extend time range to the left and right so we don't lose adjacent rules that might
// influence the animation
const TimeRange extendedStaticSegment = extendToFixedRules(staticSegment, shapeRules);
// Fix shape rules within the static segment
const auto fixedSegmentShapeRules = fixStaticSegmentRules(
{ extendedStaticSegment, ShapeRule::getInvalid(), fixedShapeRules },
animate
);
for (const auto& timedShapeRule : fixedSegmentShapeRules) {
fixedShapeRules.set(timedShapeRule);
}
}
// Fix shape rules within the static segment
const auto fixedSegmentShapeRules = fixStaticSegmentRules(
{ extendedStaticSegment, ShapeRule::getInvalid(), fixedShapeRules },
animate
);
for (const auto& timedShapeRule : fixedSegmentShapeRules) {
fixedShapeRules.set(timedShapeRule);
}
}
return animate(fixedShapeRules);
return animate(fixedShapeRules);
}

View File

@ -13,6 +13,6 @@ using AnimationFunction = std::function<JoiningContinuousTimeline<Shape>(const C
// Static segments happen rather often.
// See http://animateducated.blogspot.de/2016/10/lip-sync-animation-2.html?showComment=1478861729702#c2940729096183546458.
JoiningContinuousTimeline<Shape> avoidStaticSegments(
const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate
const ContinuousTimeline<ShapeRule>& shapeRules,
const AnimationFunction& animate
);

View File

@ -1,48 +1,48 @@
#include "targetShapeSet.h"
Shape convertToTargetShapeSet(Shape shape, const ShapeSet& targetShapeSet) {
if (targetShapeSet.find(shape) != targetShapeSet.end()) {
return shape;
}
const Shape basicShape = getBasicShape(shape);
if (targetShapeSet.find(basicShape) == targetShapeSet.end()) {
throw std::invalid_argument(
fmt::format("Target shape set must contain basic shape {}.", basicShape));
}
return basicShape;
if (targetShapeSet.find(shape) != targetShapeSet.end()) {
return shape;
}
const Shape basicShape = getBasicShape(shape);
if (targetShapeSet.find(basicShape) == targetShapeSet.end()) {
throw std::invalid_argument(
fmt::format("Target shape set must contain basic shape {}.", basicShape));
}
return basicShape;
}
ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetShapeSet) {
ShapeSet result;
for (Shape shape : shapes) {
result.insert(convertToTargetShapeSet(shape, targetShapeSet));
}
return result;
ShapeSet result;
for (Shape shape : shapes) {
result.insert(convertToTargetShapeSet(shape, targetShapeSet));
}
return result;
}
ContinuousTimeline<ShapeRule> convertToTargetShapeSet(
const ContinuousTimeline<ShapeRule>& shapeRules,
const ShapeSet& targetShapeSet
const ContinuousTimeline<ShapeRule>& shapeRules,
const ShapeSet& targetShapeSet
) {
ContinuousTimeline<ShapeRule> result(shapeRules);
for (const auto& timedShapeRule : shapeRules) {
ShapeRule rule = timedShapeRule.getValue();
rule.shapeSet = convertToTargetShapeSet(rule.shapeSet, targetShapeSet);
result.set(timedShapeRule.getTimeRange(), rule);
}
return result;
ContinuousTimeline<ShapeRule> result(shapeRules);
for (const auto& timedShapeRule : shapeRules) {
ShapeRule rule = timedShapeRule.getValue();
rule.shapeSet = convertToTargetShapeSet(rule.shapeSet, targetShapeSet);
result.set(timedShapeRule.getTimeRange(), rule);
}
return result;
}
JoiningContinuousTimeline<Shape> convertToTargetShapeSet(
const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
) {
JoiningContinuousTimeline<Shape> result(animation);
for (const auto& timedShape : animation) {
result.set(
timedShape.getTimeRange(),
convertToTargetShapeSet(timedShape.getValue(), targetShapeSet)
);
}
return result;
JoiningContinuousTimeline<Shape> result(animation);
for (const auto& timedShape : animation) {
result.set(
timedShape.getTimeRange(),
convertToTargetShapeSet(timedShape.getValue(), targetShapeSet)
);
}
return result;
}

View File

@ -12,13 +12,13 @@ ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetS
// Replaces each shape in each rule with the closest shape that occurs in the target shape set.
ContinuousTimeline<ShapeRule> convertToTargetShapeSet(
const ContinuousTimeline<ShapeRule>& shapeRules,
const ShapeSet& targetShapeSet
const ContinuousTimeline<ShapeRule>& shapeRules,
const ShapeSet& targetShapeSet
);
// Replaces each shape in the specified animation with the closest shape that occurs in the target
// shape set.
JoiningContinuousTimeline<Shape> convertToTargetShapeSet(
const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
);

View File

@ -9,255 +9,255 @@ using std::string;
using std::map;
string getShapesString(const JoiningContinuousTimeline<Shape>& shapes) {
string result;
for (const auto& timedShape : shapes) {
if (!result.empty()) {
result.append(" ");
}
result.append(boost::lexical_cast<std::string>(timedShape.getValue()));
}
return result;
string result;
for (const auto& timedShape : shapes) {
if (!result.empty()) {
result.append(" ");
}
result.append(boost::lexical_cast<std::string>(timedShape.getValue()));
}
return result;
}
Shape getRepresentativeShape(const JoiningTimeline<Shape>& timeline) {
if (timeline.empty()) {
throw std::invalid_argument("Cannot determine representative shape from empty timeline.");
}
if (timeline.empty()) {
throw std::invalid_argument("Cannot determine representative shape from empty timeline.");
}
// Collect candidate shapes with weights
map<Shape, centiseconds> candidateShapeWeights;
for (const auto& timedShape : timeline) {
candidateShapeWeights[timedShape.getValue()] += timedShape.getDuration();
}
// Collect candidate shapes with weights
map<Shape, centiseconds> candidateShapeWeights;
for (const auto& timedShape : timeline) {
candidateShapeWeights[timedShape.getValue()] += timedShape.getDuration();
}
// Select shape with highest total duration within the candidate range
const Shape bestShape = std::max_element(
candidateShapeWeights.begin(), candidateShapeWeights.end(),
[](auto a, auto b) { return a.second < b.second; }
)->first;
// Select shape with highest total duration within the candidate range
const Shape bestShape = std::max_element(
candidateShapeWeights.begin(), candidateShapeWeights.end(),
[](auto a, auto b) { return a.second < b.second; }
)->first;
// Shapes C and D are similar, but D is more interesting.
const bool substituteD = bestShape == Shape::C && candidateShapeWeights[Shape::D] > 0_cs;
return substituteD ? Shape::D : bestShape;
// Shapes C and D are similar, but D is more interesting.
const bool substituteD = bestShape == Shape::C && candidateShapeWeights[Shape::D] > 0_cs;
return substituteD ? Shape::D : bestShape;
}
struct ShapeReduction {
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes) :
sourceShapes(sourceShapes),
shape(getRepresentativeShape(sourceShapes)) {}
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes) :
sourceShapes(sourceShapes),
shape(getRepresentativeShape(sourceShapes)) {}
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes, TimeRange candidateRange) :
ShapeReduction(JoiningBoundedTimeline<Shape>(candidateRange, sourceShapes)) {}
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes, TimeRange candidateRange) :
ShapeReduction(JoiningBoundedTimeline<Shape>(candidateRange, sourceShapes)) {}
JoiningTimeline<Shape> sourceShapes;
Shape shape;
JoiningTimeline<Shape> sourceShapes;
Shape shape;
};
// Returns a time range of candidate shapes for the next shape to draw.
// Guaranteed to be non-empty.
TimeRange getNextMinimalCandidateRange(const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange, const centiseconds writePosition) {
if (sourceShapes.empty()) {
throw std::invalid_argument("Cannot determine candidate range for empty source timeline.");
}
const TimeRange targetRange, const centiseconds writePosition) {
if (sourceShapes.empty()) {
throw std::invalid_argument("Cannot determine candidate range for empty source timeline.");
}
// Too short, and and we get flickering. Too long, and too many shapes are lost.
// Good values turn out to be 5 to 7 cs, with 7 cs sometimes looking just marginally better.
const centiseconds minShapeDuration = 7_cs;
// Too short, and and we get flickering. Too long, and too many shapes are lost.
// Good values turn out to be 5 to 7 cs, with 7 cs sometimes looking just marginally better.
const centiseconds minShapeDuration = 7_cs;
// If the remaining time can hold more than one shape, but not two: split it evenly
const centiseconds remainingTargetDuration = writePosition - targetRange.getStart();
const bool canFitOneOrLess = remainingTargetDuration <= minShapeDuration;
const bool canFitTwo = remainingTargetDuration >= 2 * minShapeDuration;
const centiseconds duration = canFitOneOrLess || canFitTwo
? minShapeDuration
: remainingTargetDuration / 2;
// If the remaining time can hold more than one shape, but not two: split it evenly
const centiseconds remainingTargetDuration = writePosition - targetRange.getStart();
const bool canFitOneOrLess = remainingTargetDuration <= minShapeDuration;
const bool canFitTwo = remainingTargetDuration >= 2 * minShapeDuration;
const centiseconds duration = canFitOneOrLess || canFitTwo
? minShapeDuration
: remainingTargetDuration / 2;
TimeRange candidateRange(writePosition - duration, writePosition);
if (writePosition == targetRange.getEnd()) {
// This is the first iteration.
// Extend the candidate range to the right in order to consider all source shapes after the
// target range.
candidateRange.setEndIfLater(sourceShapes.getRange().getEnd());
}
if (candidateRange.getStart() >= sourceShapes.getRange().getEnd()) {
// We haven't reached the source range yet.
// Extend the candidate range to the left in order to encompass the right-most source shape.
candidateRange.setStart(sourceShapes.rbegin()->getStart());
}
if (candidateRange.getEnd() <= sourceShapes.getRange().getStart()) {
// We're past the source range. This can happen in corner cases.
// Extend the candidate range to the right in order to encompass the left-most source shape
candidateRange.setEnd(sourceShapes.begin()->getEnd());
}
TimeRange candidateRange(writePosition - duration, writePosition);
if (writePosition == targetRange.getEnd()) {
// This is the first iteration.
// Extend the candidate range to the right in order to consider all source shapes after the
// target range.
candidateRange.setEndIfLater(sourceShapes.getRange().getEnd());
}
if (candidateRange.getStart() >= sourceShapes.getRange().getEnd()) {
// We haven't reached the source range yet.
// Extend the candidate range to the left in order to encompass the right-most source shape.
candidateRange.setStart(sourceShapes.rbegin()->getStart());
}
if (candidateRange.getEnd() <= sourceShapes.getRange().getStart()) {
// We're past the source range. This can happen in corner cases.
// Extend the candidate range to the right in order to encompass the left-most source shape
candidateRange.setEnd(sourceShapes.begin()->getEnd());
}
return candidateRange;
return candidateRange;
}
ShapeReduction getNextShapeReduction(
const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange,
centiseconds writePosition
const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange,
centiseconds writePosition
) {
// Determine the next time range of candidate shapes. Consider two scenarios:
// Determine the next time range of candidate shapes. Consider two scenarios:
// ... the shortest-possible candidate range
const ShapeReduction minReduction(sourceShapes,
getNextMinimalCandidateRange(sourceShapes, targetRange, writePosition));
// ... the shortest-possible candidate range
const ShapeReduction minReduction(sourceShapes,
getNextMinimalCandidateRange(sourceShapes, targetRange, writePosition));
// ... a candidate range extended to the left to fully encompass its left-most shape
const ShapeReduction extendedReduction(sourceShapes,
{
minReduction.sourceShapes.begin()->getStart(),
minReduction.sourceShapes.getRange().getEnd()
}
);
// ... a candidate range extended to the left to fully encompass its left-most shape
const ShapeReduction extendedReduction(sourceShapes,
{
minReduction.sourceShapes.begin()->getStart(),
minReduction.sourceShapes.getRange().getEnd()
}
);
// Determine the shape that might be picked *next* if we choose the shortest-possible candidate
// range now
const ShapeReduction nextReduction(
sourceShapes,
getNextMinimalCandidateRange(sourceShapes, targetRange, minReduction.sourceShapes.getRange().getStart())
);
// Determine the shape that might be picked *next* if we choose the shortest-possible candidate
// range now
const ShapeReduction nextReduction(
sourceShapes,
getNextMinimalCandidateRange(sourceShapes, targetRange, minReduction.sourceShapes.getRange().getStart())
);
const bool minEqualsExtended = minReduction.shape == extendedReduction.shape;
const bool extendedIsSpecial = extendedReduction.shape != minReduction.shape
&& extendedReduction.shape != nextReduction.shape;
const bool minEqualsExtended = minReduction.shape == extendedReduction.shape;
const bool extendedIsSpecial = extendedReduction.shape != minReduction.shape
&& extendedReduction.shape != nextReduction.shape;
return minEqualsExtended || extendedIsSpecial ? extendedReduction : minReduction;
return minEqualsExtended || extendedIsSpecial ? extendedReduction : minReduction;
}
// Modifies the timing of the given animation to fit into the specified target time range without
// jitter.
JoiningContinuousTimeline<Shape> retime(const JoiningContinuousTimeline<Shape>& sourceShapes,
const TimeRange targetRange) {
logTimedEvent("segment", targetRange, getShapesString(sourceShapes));
const TimeRange targetRange) {
logTimedEvent("segment", targetRange, getShapesString(sourceShapes));
JoiningContinuousTimeline<Shape> result(targetRange, Shape::X);
if (sourceShapes.empty()) return result;
JoiningContinuousTimeline<Shape> result(targetRange, Shape::X);
if (sourceShapes.empty()) return result;
// Animate backwards
centiseconds writePosition = targetRange.getEnd();
while (writePosition > targetRange.getStart()) {
// Animate backwards
centiseconds writePosition = targetRange.getEnd();
while (writePosition > targetRange.getStart()) {
// Decide which shape to show next, possibly discarding short shapes
const ShapeReduction shapeReduction =
getNextShapeReduction(sourceShapes, targetRange, writePosition);
// Decide which shape to show next, possibly discarding short shapes
const ShapeReduction shapeReduction =
getNextShapeReduction(sourceShapes, targetRange, writePosition);
// Determine how long to display the shape
TimeRange targetShapeRange(shapeReduction.sourceShapes.getRange());
if (targetShapeRange.getStart() <= sourceShapes.getRange().getStart()) {
// We've used up the left-most source shape. Fill the entire remaining target range.
targetShapeRange.setStartIfEarlier(targetRange.getStart());
}
targetShapeRange.trimRight(writePosition);
// Determine how long to display the shape
TimeRange targetShapeRange(shapeReduction.sourceShapes.getRange());
if (targetShapeRange.getStart() <= sourceShapes.getRange().getStart()) {
// We've used up the left-most source shape. Fill the entire remaining target range.
targetShapeRange.setStartIfEarlier(targetRange.getStart());
}
targetShapeRange.trimRight(writePosition);
// Draw shape
result.set(targetShapeRange, shapeReduction.shape);
// Draw shape
result.set(targetShapeRange, shapeReduction.shape);
writePosition = targetShapeRange.getStart();
}
writePosition = targetShapeRange.getStart();
}
return result;
return result;
}
JoiningContinuousTimeline<Shape> retime(
const JoiningContinuousTimeline<Shape>& animation,
TimeRange sourceRange,
TimeRange targetRange
const JoiningContinuousTimeline<Shape>& animation,
TimeRange sourceRange,
TimeRange targetRange
) {
const auto sourceShapes = JoiningContinuousTimeline<Shape>(sourceRange, Shape::X, animation);
return retime(sourceShapes, targetRange);
const auto sourceShapes = JoiningContinuousTimeline<Shape>(sourceRange, Shape::X, animation);
return retime(sourceShapes, targetRange);
}
enum class MouthState {
Idle,
Closed,
Open
Idle,
Closed,
Open
};
JoiningContinuousTimeline<Shape> optimizeTiming(const JoiningContinuousTimeline<Shape>& animation) {
// Identify segments with idle, closed, and open mouth shapes
JoiningContinuousTimeline<MouthState> segments(animation.getRange(), MouthState::Idle);
for (const auto& timedShape : animation) {
const Shape shape = timedShape.getValue();
const MouthState mouthState =
shape == Shape::X
? MouthState::Idle
: shape == Shape::A
? MouthState::Closed
: MouthState::Open;
segments.set(timedShape.getTimeRange(), mouthState);
}
// Identify segments with idle, closed, and open mouth shapes
JoiningContinuousTimeline<MouthState> segments(animation.getRange(), MouthState::Idle);
for (const auto& timedShape : animation) {
const Shape shape = timedShape.getValue();
const MouthState mouthState =
shape == Shape::X
? MouthState::Idle
: shape == Shape::A
? MouthState::Closed
: MouthState::Open;
segments.set(timedShape.getTimeRange(), mouthState);
}
// The minimum duration a segment of open or closed mouth shapes must have to visually register
const centiseconds minSegmentDuration = 8_cs;
// The maximum amount by which the start of a shape can be brought forward
const centiseconds maxExtensionDuration = 6_cs;
// The minimum duration a segment of open or closed mouth shapes must have to visually register
const centiseconds minSegmentDuration = 8_cs;
// The maximum amount by which the start of a shape can be brought forward
const centiseconds maxExtensionDuration = 6_cs;
// Make sure all open and closed segments are long enough to register visually.
JoiningContinuousTimeline<Shape> result(animation.getRange(), Shape::X);
// ... we're filling the result timeline from right to left, so `resultStart` points to the
// earliest shape already written
centiseconds resultStart = result.getRange().getEnd();
for (auto segmentIt = segments.rbegin(); segmentIt != segments.rend(); ++segmentIt) {
// We don't care about idle shapes at this point.
if (segmentIt->getValue() == MouthState::Idle) continue;
// Make sure all open and closed segments are long enough to register visually.
JoiningContinuousTimeline<Shape> result(animation.getRange(), Shape::X);
// ... we're filling the result timeline from right to left, so `resultStart` points to the
// earliest shape already written
centiseconds resultStart = result.getRange().getEnd();
for (auto segmentIt = segments.rbegin(); segmentIt != segments.rend(); ++segmentIt) {
// We don't care about idle shapes at this point.
if (segmentIt->getValue() == MouthState::Idle) continue;
resultStart = std::min(segmentIt->getEnd(), resultStart);
if (resultStart - segmentIt->getStart() >= minSegmentDuration) {
// The segment is long enough; we don't have to extend it to the left.
const TimeRange targetRange(segmentIt->getStart(), resultStart);
const auto retimedSegment = retime(animation, segmentIt->getTimeRange(), targetRange);
for (const auto& timedShape : retimedSegment) {
result.set(timedShape);
}
resultStart = targetRange.getStart();
} else {
// The segment is too short; we have to extend it to the left.
// Find all adjacent segments to our left that are also too short, then distribute them
// evenly.
const auto begin = segmentIt;
auto end = std::next(begin);
while (
end != segments.rend()
&& end->getValue() != MouthState::Idle
&& end->getDuration() < minSegmentDuration
) {
++end;
}
resultStart = std::min(segmentIt->getEnd(), resultStart);
if (resultStart - segmentIt->getStart() >= minSegmentDuration) {
// The segment is long enough; we don't have to extend it to the left.
const TimeRange targetRange(segmentIt->getStart(), resultStart);
const auto retimedSegment = retime(animation, segmentIt->getTimeRange(), targetRange);
for (const auto& timedShape : retimedSegment) {
result.set(timedShape);
}
resultStart = targetRange.getStart();
} else {
// The segment is too short; we have to extend it to the left.
// Find all adjacent segments to our left that are also too short, then distribute them
// evenly.
const auto begin = segmentIt;
auto end = std::next(begin);
while (
end != segments.rend()
&& end->getValue() != MouthState::Idle
&& end->getDuration() < minSegmentDuration
) {
++end;
}
// Determine how much we should extend the entire set of short segments to the left
const size_t shortSegmentCount = std::distance(begin, end);
const centiseconds desiredDuration = minSegmentDuration * shortSegmentCount;
const centiseconds currentDuration = begin->getEnd() - std::prev(end)->getStart();
const centiseconds desiredExtensionDuration = desiredDuration - currentDuration;
const centiseconds availableExtensionDuration = end != segments.rend()
? end->getDuration() - 1_cs
: 0_cs;
const centiseconds extensionDuration = std::min({
desiredExtensionDuration, availableExtensionDuration, maxExtensionDuration
});
// Determine how much we should extend the entire set of short segments to the left
const size_t shortSegmentCount = std::distance(begin, end);
const centiseconds desiredDuration = minSegmentDuration * shortSegmentCount;
const centiseconds currentDuration = begin->getEnd() - std::prev(end)->getStart();
const centiseconds desiredExtensionDuration = desiredDuration - currentDuration;
const centiseconds availableExtensionDuration = end != segments.rend()
? end->getDuration() - 1_cs
: 0_cs;
const centiseconds extensionDuration = std::min({
desiredExtensionDuration, availableExtensionDuration, maxExtensionDuration
});
// Distribute available time range evenly among all short segments
const centiseconds shortSegmentsTargetStart =
std::prev(end)->getStart() - extensionDuration;
for (auto shortSegmentIt = begin; shortSegmentIt != end; ++shortSegmentIt) {
size_t remainingShortSegmentCount = std::distance(shortSegmentIt, end);
const centiseconds segmentDuration = (resultStart - shortSegmentsTargetStart) /
remainingShortSegmentCount;
const TimeRange segmentTargetRange(resultStart - segmentDuration, resultStart);
const auto retimedSegment =
retime(animation, shortSegmentIt->getTimeRange(), segmentTargetRange);
for (const auto& timedShape : retimedSegment) {
result.set(timedShape);
}
resultStart = segmentTargetRange.getStart();
}
// Distribute available time range evenly among all short segments
const centiseconds shortSegmentsTargetStart =
std::prev(end)->getStart() - extensionDuration;
for (auto shortSegmentIt = begin; shortSegmentIt != end; ++shortSegmentIt) {
size_t remainingShortSegmentCount = std::distance(shortSegmentIt, end);
const centiseconds segmentDuration = (resultStart - shortSegmentsTargetStart) /
remainingShortSegmentCount;
const TimeRange segmentTargetRange(resultStart - segmentDuration, resultStart);
const auto retimedSegment =
retime(animation, shortSegmentIt->getTimeRange(), segmentTargetRange);
for (const auto& timedShape : retimedSegment) {
result.set(timedShape);
}
resultStart = segmentTargetRange.getStart();
}
segmentIt = std::prev(end);
}
}
segmentIt = std::prev(end);
}
}
return result;
return result;
}

View File

@ -2,53 +2,53 @@
#include "animationRules.h"
JoiningContinuousTimeline<Shape> insertTweens(const JoiningContinuousTimeline<Shape>& animation) {
const centiseconds minTweenDuration = 4_cs;
const centiseconds maxTweenDuration = 8_cs;
const centiseconds minTweenDuration = 4_cs;
const centiseconds maxTweenDuration = 8_cs;
JoiningContinuousTimeline<Shape> result(animation);
JoiningContinuousTimeline<Shape> result(animation);
for_each_adjacent(animation.begin(), animation.end(), [&](const auto& first, const auto& second) {
auto pair = getTween(first.getValue(), second.getValue());
if (!pair) return;
for_each_adjacent(animation.begin(), animation.end(), [&](const auto& first, const auto& second) {
auto pair = getTween(first.getValue(), second.getValue());
if (!pair) return;
Shape tweenShape;
TweenTiming tweenTiming;
std::tie(tweenShape, tweenTiming) = *pair;
TimeRange firstTimeRange = first.getTimeRange();
TimeRange secondTimeRange = second.getTimeRange();
Shape tweenShape;
TweenTiming tweenTiming;
std::tie(tweenShape, tweenTiming) = *pair;
TimeRange firstTimeRange = first.getTimeRange();
TimeRange secondTimeRange = second.getTimeRange();
centiseconds tweenStart, tweenDuration;
switch (tweenTiming) {
case TweenTiming::Early:
{
tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration);
tweenStart = firstTimeRange.getEnd() - tweenDuration;
break;
}
case TweenTiming::Centered:
{
tweenDuration = std::min({
firstTimeRange.getDuration() / 4, secondTimeRange.getDuration() / 4, maxTweenDuration
});
tweenStart = firstTimeRange.getEnd() - tweenDuration / 2;
break;
}
case TweenTiming::Late:
{
tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration);
tweenStart = secondTimeRange.getStart();
break;
}
default:
{
throw std::runtime_error("Unexpected tween timing.");
}
}
centiseconds tweenStart, tweenDuration;
switch (tweenTiming) {
case TweenTiming::Early:
{
tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration);
tweenStart = firstTimeRange.getEnd() - tweenDuration;
break;
}
case TweenTiming::Centered:
{
tweenDuration = std::min({
firstTimeRange.getDuration() / 4, secondTimeRange.getDuration() / 4, maxTweenDuration
});
tweenStart = firstTimeRange.getEnd() - tweenDuration / 2;
break;
}
case TweenTiming::Late:
{
tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration);
tweenStart = secondTimeRange.getStart();
break;
}
default:
{
throw std::runtime_error("Unexpected tween timing.");
}
}
if (tweenDuration < minTweenDuration) return;
if (tweenDuration < minTweenDuration) return;
result.set(tweenStart, tweenStart + tweenDuration, tweenShape);
});
result.set(tweenStart, tweenStart + tweenDuration, tweenShape);
});
return result;
return result;
}

View File

@ -4,66 +4,66 @@
using std::invalid_argument;
TimeRange AudioClip::getTruncatedRange() const {
return TimeRange(0_cs, centiseconds(100 * size() / getSampleRate()));
return TimeRange(0_cs, centiseconds(100 * size() / getSampleRate()));
}
class SafeSampleReader {
public:
SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size);
AudioClip::value_type operator()(AudioClip::size_type index);
SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size);
AudioClip::value_type operator()(AudioClip::size_type index);
private:
SampleReader unsafeRead;
AudioClip::size_type size;
AudioClip::size_type lastIndex = -1;
AudioClip::value_type lastSample = 0;
SampleReader unsafeRead;
AudioClip::size_type size;
AudioClip::size_type lastIndex = -1;
AudioClip::value_type lastSample = 0;
};
SafeSampleReader::SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size) :
unsafeRead(unsafeRead),
size(size)
unsafeRead(unsafeRead),
size(size)
{}
inline AudioClip::value_type SafeSampleReader::operator()(AudioClip::size_type index) {
if (index < 0) {
throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index));
}
if (index >= size) {
throw invalid_argument(fmt::format(
"Cannot read from sample index {}. Clip size is {}.",
index,
size
));
}
if (index == lastIndex) {
return lastSample;
}
if (index < 0) {
throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index));
}
if (index >= size) {
throw invalid_argument(fmt::format(
"Cannot read from sample index {}. Clip size is {}.",
index,
size
));
}
if (index == lastIndex) {
return lastSample;
}
lastIndex = index;
lastSample = unsafeRead(index);
return lastSample;
lastIndex = index;
lastSample = unsafeRead(index);
return lastSample;
}
SampleReader AudioClip::createSampleReader() const {
return SafeSampleReader(createUnsafeSampleReader(), size());
return SafeSampleReader(createUnsafeSampleReader(), size());
}
AudioClip::iterator AudioClip::begin() const {
return SampleIterator(*this, 0);
return SampleIterator(*this, 0);
}
AudioClip::iterator AudioClip::end() const {
return SampleIterator(*this, size());
return SampleIterator(*this, size());
}
std::unique_ptr<AudioClip> operator|(std::unique_ptr<AudioClip> clip, const AudioEffect& effect) {
return effect(std::move(clip));
return effect(std::move(clip));
}
SampleIterator::SampleIterator() :
sampleIndex(0)
sampleIndex(0)
{}
SampleIterator::SampleIterator(const AudioClip& audioClip, size_type sampleIndex) :
sampleReader([&audioClip] { return audioClip.createSampleReader(); }),
sampleIndex(sampleIndex)
sampleReader([&audioClip] { return audioClip.createSampleReader(); }),
sampleIndex(sampleIndex)
{}

View File

@ -10,22 +10,22 @@ class SampleIterator;
class AudioClip {
public:
using value_type = float;
using size_type = int64_t;
using difference_type = int64_t;
using iterator = SampleIterator;
using SampleReader = std::function<value_type(size_type)>;
using value_type = float;
using size_type = int64_t;
using difference_type = int64_t;
using iterator = SampleIterator;
using SampleReader = std::function<value_type(size_type)>;
virtual ~AudioClip() {}
virtual std::unique_ptr<AudioClip> clone() const = 0;
virtual int getSampleRate() const = 0;
virtual size_type size() const = 0;
TimeRange getTruncatedRange() const;
SampleReader createSampleReader() const;
iterator begin() const;
iterator end() const;
virtual ~AudioClip() {}
virtual std::unique_ptr<AudioClip> clone() const = 0;
virtual int getSampleRate() const = 0;
virtual size_type size() const = 0;
TimeRange getTruncatedRange() const;
SampleReader createSampleReader() const;
iterator begin() const;
iterator end() const;
private:
virtual SampleReader createUnsafeSampleReader() const = 0;
virtual SampleReader createUnsafeSampleReader() const = 0;
};
using AudioEffect = std::function<std::unique_ptr<AudioClip>(std::unique_ptr<AudioClip>)>;
@ -36,107 +36,107 @@ using SampleReader = AudioClip::SampleReader;
class SampleIterator {
public:
using value_type = AudioClip::value_type;
using size_type = AudioClip::size_type;
using difference_type = AudioClip::difference_type;
using value_type = AudioClip::value_type;
using size_type = AudioClip::size_type;
using difference_type = AudioClip::difference_type;
SampleIterator();
SampleIterator();
size_type getSampleIndex() const;
void seek(size_type sampleIndex);
value_type operator*() const;
value_type operator[](difference_type n) const;
size_type getSampleIndex() const;
void seek(size_type sampleIndex);
value_type operator*() const;
value_type operator[](difference_type n) const;
private:
friend AudioClip;
SampleIterator(const AudioClip& audioClip, size_type sampleIndex);
friend AudioClip;
SampleIterator(const AudioClip& audioClip, size_type sampleIndex);
Lazy<SampleReader> sampleReader;
size_type sampleIndex;
Lazy<SampleReader> sampleReader;
size_type sampleIndex;
};
inline SampleIterator::size_type SampleIterator::getSampleIndex() const {
return sampleIndex;
return sampleIndex;
}
inline void SampleIterator::seek(size_type sampleIndex) {
this->sampleIndex = sampleIndex;
this->sampleIndex = sampleIndex;
}
inline SampleIterator::value_type SampleIterator::operator*() const {
return (*sampleReader)(sampleIndex);
return (*sampleReader)(sampleIndex);
}
inline SampleIterator::value_type SampleIterator::operator[](difference_type n) const {
return (*sampleReader)(sampleIndex + n);
return (*sampleReader)(sampleIndex + n);
}
inline bool operator==(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() == rhs.getSampleIndex();
return lhs.getSampleIndex() == rhs.getSampleIndex();
}
inline bool operator!=(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() != rhs.getSampleIndex();
return lhs.getSampleIndex() != rhs.getSampleIndex();
}
inline bool operator<(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() < rhs.getSampleIndex();
return lhs.getSampleIndex() < rhs.getSampleIndex();
}
inline bool operator>(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() > rhs.getSampleIndex();
return lhs.getSampleIndex() > rhs.getSampleIndex();
}
inline bool operator<=(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() <= rhs.getSampleIndex();
return lhs.getSampleIndex() <= rhs.getSampleIndex();
}
inline bool operator>=(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() >= rhs.getSampleIndex();
return lhs.getSampleIndex() >= rhs.getSampleIndex();
}
inline SampleIterator& operator+=(SampleIterator& it, SampleIterator::difference_type n) {
it.seek(it.getSampleIndex() + n);
return it;
it.seek(it.getSampleIndex() + n);
return it;
}
inline SampleIterator& operator-=(SampleIterator& it, SampleIterator::difference_type n) {
it.seek(it.getSampleIndex() - n);
return it;
it.seek(it.getSampleIndex() - n);
return it;
}
inline SampleIterator& operator++(SampleIterator& it) {
return operator+=(it, 1);
return operator+=(it, 1);
}
inline SampleIterator operator++(SampleIterator& it, int) {
SampleIterator tmp(it);
operator++(it);
return tmp;
SampleIterator tmp(it);
operator++(it);
return tmp;
}
inline SampleIterator& operator--(SampleIterator& it) {
return operator-=(it, 1);
return operator-=(it, 1);
}
inline SampleIterator operator--(SampleIterator& it, int) {
SampleIterator tmp(it);
operator--(it);
return tmp;
SampleIterator tmp(it);
operator--(it);
return tmp;
}
inline SampleIterator operator+(const SampleIterator& it, SampleIterator::difference_type n) {
SampleIterator result(it);
result += n;
return result;
SampleIterator result(it);
result += n;
return result;
}
inline SampleIterator operator-(const SampleIterator& it, SampleIterator::difference_type n) {
SampleIterator result(it);
result -= n;
return result;
SampleIterator result(it);
result -= n;
return result;
}
inline SampleIterator::difference_type operator-(const SampleIterator& lhs, const SampleIterator& rhs) {
return lhs.getSampleIndex() - rhs.getSampleIndex();
return lhs.getSampleIndex() - rhs.getSampleIndex();
}

View File

@ -4,27 +4,27 @@ using std::unique_ptr;
using std::make_unique;
AudioSegment::AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range) :
inputClip(std::move(inputClip)),
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100)
inputClip(std::move(inputClip)),
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100)
{
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
throw std::invalid_argument("Segment extends beyond input clip.");
}
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
throw std::invalid_argument("Segment extends beyond input clip.");
}
}
unique_ptr<AudioClip> AudioSegment::clone() const {
return make_unique<AudioSegment>(*this);
return make_unique<AudioSegment>(*this);
}
SampleReader AudioSegment::createUnsafeSampleReader() const {
return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) {
return read(index + sampleOffset);
};
return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) {
return read(index + sampleOffset);
};
}
AudioEffect segment(const TimeRange& range) {
return [range](unique_ptr<AudioClip> inputClip) {
return make_unique<AudioSegment>(std::move(inputClip), range);
};
return [range](unique_ptr<AudioClip> inputClip) {
return make_unique<AudioSegment>(std::move(inputClip), range);
};
}

View File

@ -4,24 +4,24 @@
class AudioSegment : public AudioClip {
public:
AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
private:
SampleReader createUnsafeSampleReader() const override;
SampleReader createUnsafeSampleReader() const override;
std::shared_ptr<AudioClip> inputClip;
size_type sampleOffset, sampleCount;
std::shared_ptr<AudioClip> inputClip;
size_type sampleOffset, sampleCount;
};
inline int AudioSegment::getSampleRate() const {
return inputClip->getSampleRate();
return inputClip->getSampleRate();
}
inline AudioClip::size_type AudioSegment::size() const {
return sampleCount;
return sampleCount;
}
AudioEffect segment(const TimeRange& range);

View File

@ -5,65 +5,65 @@ using std::unique_ptr;
using std::make_unique;
DcOffset::DcOffset(unique_ptr<AudioClip> inputClip, float offset) :
inputClip(std::move(inputClip)),
offset(offset),
factor(1 / (1 + std::abs(offset)))
inputClip(std::move(inputClip)),
offset(offset),
factor(1 / (1 + std::abs(offset)))
{}
unique_ptr<AudioClip> DcOffset::clone() const {
return make_unique<DcOffset>(*this);
return make_unique<DcOffset>(*this);
}
SampleReader DcOffset::createUnsafeSampleReader() const {
return [
read = inputClip->createSampleReader(),
factor = factor,
offset = offset
](size_type index) {
const float sample = read(index);
return sample * factor + offset;
};
return [
read = inputClip->createSampleReader(),
factor = factor,
offset = offset
](size_type index) {
const float sample = read(index);
return sample * factor + offset;
};
}
float getDcOffset(const AudioClip& audioClip) {
int flatMeanSampleCount, fadingMeanSampleCount;
const int sampleRate = audioClip.getSampleRate();
if (audioClip.size() > 4 * sampleRate) {
// Long audio file. Average over the first 3 seconds, then fade out over the 4th.
flatMeanSampleCount = 3 * sampleRate;
fadingMeanSampleCount = 1 * sampleRate;
} else {
// Short audio file. Average over the entire duration.
flatMeanSampleCount = static_cast<int>(audioClip.size());
fadingMeanSampleCount = 0;
}
int flatMeanSampleCount, fadingMeanSampleCount;
const int sampleRate = audioClip.getSampleRate();
if (audioClip.size() > 4 * sampleRate) {
// Long audio file. Average over the first 3 seconds, then fade out over the 4th.
flatMeanSampleCount = 3 * sampleRate;
fadingMeanSampleCount = 1 * sampleRate;
} else {
// Short audio file. Average over the entire duration.
flatMeanSampleCount = static_cast<int>(audioClip.size());
fadingMeanSampleCount = 0;
}
const auto read = audioClip.createSampleReader();
double sum = 0;
for (int i = 0; i < flatMeanSampleCount; ++i) {
sum += read(i);
}
for (int i = 0; i < fadingMeanSampleCount; ++i) {
const double weight =
static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount;
sum += read(flatMeanSampleCount + i) * weight;
}
const auto read = audioClip.createSampleReader();
double sum = 0;
for (int i = 0; i < flatMeanSampleCount; ++i) {
sum += read(i);
}
for (int i = 0; i < fadingMeanSampleCount; ++i) {
const double weight =
static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount;
sum += read(flatMeanSampleCount + i) * weight;
}
const double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0;
const double offset = sum / totalWeight;
return static_cast<float>(offset);
const double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0;
const double offset = sum / totalWeight;
return static_cast<float>(offset);
}
AudioEffect addDcOffset(float offset, float epsilon) {
return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> {
if (std::abs(offset) < epsilon) return inputClip;
return make_unique<DcOffset>(std::move(inputClip), offset);
};
return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> {
if (std::abs(offset) < epsilon) return inputClip;
return make_unique<DcOffset>(std::move(inputClip), offset);
};
}
AudioEffect removeDcOffset(float epsilon) {
return [epsilon](unique_ptr<AudioClip> inputClip) {
const float offset = getDcOffset(*inputClip);
return std::move(inputClip) | addDcOffset(-offset, epsilon);
};
return [epsilon](unique_ptr<AudioClip> inputClip) {
const float offset = getDcOffset(*inputClip);
return std::move(inputClip) | addDcOffset(-offset, epsilon);
};
}

View File

@ -6,24 +6,24 @@
// to prevent clipping
class DcOffset : public AudioClip {
public:
DcOffset(std::unique_ptr<AudioClip> inputClip, float offset);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
DcOffset(std::unique_ptr<AudioClip> inputClip, float offset);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
private:
SampleReader createUnsafeSampleReader() const override;
SampleReader createUnsafeSampleReader() const override;
std::shared_ptr<AudioClip> inputClip;
float offset;
float factor;
std::shared_ptr<AudioClip> inputClip;
float offset;
float factor;
};
inline int DcOffset::getSampleRate() const {
return inputClip->getSampleRate();
return inputClip->getSampleRate();
}
inline AudioClip::size_type DcOffset::size() const {
return inputClip->size();
return inputClip->size();
}
float getDcOffset(const AudioClip& audioClip);

View File

@ -13,154 +13,154 @@ using std::ifstream;
using std::ios_base;
std::string vorbisErrorToString(int64_t errorCode) {
switch (errorCode) {
case OV_EREAD:
return "Read error while fetching compressed data for decode.";
case OV_EFAULT:
return "Internal logic fault; indicates a bug or heap/stack corruption.";
case OV_EIMPL:
return "Feature not implemented";
case OV_EINVAL:
return "Either an invalid argument, or incompletely initialized argument passed to a call.";
case OV_ENOTVORBIS:
return "The given file/data was not recognized as Ogg Vorbis data.";
case OV_EBADHEADER:
return "The file/data is apparently an Ogg Vorbis stream, but contains a corrupted or undecipherable header.";
case OV_EVERSION:
return "The bitstream format revision of the given Vorbis stream is not supported.";
case OV_ENOTAUDIO:
return "Packet is not an audio packet.";
case OV_EBADPACKET:
return "Error in packet.";
case OV_EBADLINK:
return "The given link exists in the Vorbis data stream, but is not decipherable due to garbage or corruption.";
case OV_ENOSEEK:
return "The given stream is not seekable.";
default:
return "An unexpected Vorbis error occurred.";
}
switch (errorCode) {
case OV_EREAD:
return "Read error while fetching compressed data for decode.";
case OV_EFAULT:
return "Internal logic fault; indicates a bug or heap/stack corruption.";
case OV_EIMPL:
return "Feature not implemented";
case OV_EINVAL:
return "Either an invalid argument, or incompletely initialized argument passed to a call.";
case OV_ENOTVORBIS:
return "The given file/data was not recognized as Ogg Vorbis data.";
case OV_EBADHEADER:
return "The file/data is apparently an Ogg Vorbis stream, but contains a corrupted or undecipherable header.";
case OV_EVERSION:
return "The bitstream format revision of the given Vorbis stream is not supported.";
case OV_ENOTAUDIO:
return "Packet is not an audio packet.";
case OV_EBADPACKET:
return "Error in packet.";
case OV_EBADLINK:
return "The given link exists in the Vorbis data stream, but is not decipherable due to garbage or corruption.";
case OV_ENOSEEK:
return "The given stream is not seekable.";
default:
return "An unexpected Vorbis error occurred.";
}
}
template<typename T>
T throwOnError(T code) {
// OV_HOLE, though technically an error code, is only informational
const bool error = code < 0 && code != OV_HOLE;
if (error) {
const std::string message =
fmt::format("{} (Vorbis error {})", vorbisErrorToString(code), code);
throw std::runtime_error(message);
}
return code;
// OV_HOLE, though technically an error code, is only informational
const bool error = code < 0 && code != OV_HOLE;
if (error) {
const std::string message =
fmt::format("{} (Vorbis error {})", vorbisErrorToString(code), code);
throw std::runtime_error(message);
}
return code;
}
size_t readCallback(void* buffer, size_t elementSize, size_t elementCount, void* dataSource) {
assert(elementSize == 1);
assert(elementSize == 1);
ifstream& stream = *static_cast<ifstream*>(dataSource);
stream.read(static_cast<char*>(buffer), elementCount);
const std::streamsize bytesRead = stream.gcount();
stream.clear(); // In case we read past EOF
return static_cast<size_t>(bytesRead);
ifstream& stream = *static_cast<ifstream*>(dataSource);
stream.read(static_cast<char*>(buffer), elementCount);
const std::streamsize bytesRead = stream.gcount();
stream.clear(); // In case we read past EOF
return static_cast<size_t>(bytesRead);
}
int seekCallback(void* dataSource, ogg_int64_t offset, int origin) {
static const vector<ios_base::seekdir> seekDirections {
ios_base::beg, ios_base::cur, ios_base::end
};
static const vector<ios_base::seekdir> seekDirections {
ios_base::beg, ios_base::cur, ios_base::end
};
ifstream& stream = *static_cast<ifstream*>(dataSource);
stream.seekg(offset, seekDirections.at(origin));
stream.clear(); // In case we sought to EOF
return 0;
ifstream& stream = *static_cast<ifstream*>(dataSource);
stream.seekg(offset, seekDirections.at(origin));
stream.clear(); // In case we sought to EOF
return 0;
}
long tellCallback(void* dataSource) {
ifstream& stream = *static_cast<ifstream*>(dataSource);
const auto position = stream.tellg();
assert(position >= 0);
return static_cast<long>(position);
ifstream& stream = *static_cast<ifstream*>(dataSource);
const auto position = stream.tellg();
assert(position >= 0);
return static_cast<long>(position);
}
// RAII wrapper around OggVorbis_File
class OggVorbisFile final {
public:
OggVorbisFile(const path& filePath);
OggVorbisFile(const path& filePath);
OggVorbisFile(const OggVorbisFile&) = delete;
OggVorbisFile& operator=(const OggVorbisFile&) = delete;
OggVorbisFile(const OggVorbisFile&) = delete;
OggVorbisFile& operator=(const OggVorbisFile&) = delete;
OggVorbis_File* get() {
return &oggVorbisHandle;
}
OggVorbis_File* get() {
return &oggVorbisHandle;
}
~OggVorbisFile() {
ov_clear(&oggVorbisHandle);
}
~OggVorbisFile() {
ov_clear(&oggVorbisHandle);
}
private:
OggVorbis_File oggVorbisHandle;
ifstream stream;
OggVorbis_File oggVorbisHandle;
ifstream stream;
};
OggVorbisFile::OggVorbisFile(const path& filePath) :
oggVorbisHandle(),
stream(openFile(filePath))
oggVorbisHandle(),
stream(openFile(filePath))
{
// Throw only on badbit, not on failbit.
// Ogg Vorbis expects read operations past the end of the file to
// succeed, not to throw.
stream.exceptions(ifstream::badbit);
// Throw only on badbit, not on failbit.
// Ogg Vorbis expects read operations past the end of the file to
// succeed, not to throw.
stream.exceptions(ifstream::badbit);
// Ogg Vorbis normally uses the `FILE` API from the C standard library.
// This doesn't handle Unicode paths on Windows.
// Use wrapper functions around `ifstream` instead.
const ov_callbacks callbacks { readCallback, seekCallback, nullptr, tellCallback };
throwOnError(ov_open_callbacks(&stream, &oggVorbisHandle, nullptr, 0, callbacks));
// Ogg Vorbis normally uses the `FILE` API from the C standard library.
// This doesn't handle Unicode paths on Windows.
// Use wrapper functions around `ifstream` instead.
const ov_callbacks callbacks { readCallback, seekCallback, nullptr, tellCallback };
throwOnError(ov_open_callbacks(&stream, &oggVorbisHandle, nullptr, 0, callbacks));
}
OggVorbisFileReader::OggVorbisFileReader(const path& filePath) :
filePath(filePath)
filePath(filePath)
{
OggVorbisFile file(filePath);
vorbis_info* vorbisInfo = ov_info(file.get(), -1);
sampleRate = vorbisInfo->rate;
channelCount = vorbisInfo->channels;
sampleCount = throwOnError(ov_pcm_total(file.get(), -1));
OggVorbisFile file(filePath);
vorbis_info* vorbisInfo = ov_info(file.get(), -1);
sampleRate = vorbisInfo->rate;
channelCount = vorbisInfo->channels;
sampleCount = throwOnError(ov_pcm_total(file.get(), -1));
}
std::unique_ptr<AudioClip> OggVorbisFileReader::clone() const {
return std::make_unique<OggVorbisFileReader>(*this);
return std::make_unique<OggVorbisFileReader>(*this);
}
SampleReader OggVorbisFileReader::createUnsafeSampleReader() const {
return [
channelCount = channelCount,
file = make_shared<OggVorbisFile>(filePath),
buffer = static_cast<value_type**>(nullptr),
bufferStart = size_type(0),
bufferSize = size_type(0)
](size_type index) mutable {
if (index < bufferStart || index >= bufferStart + bufferSize) {
// Seek
throwOnError(ov_pcm_seek(file->get(), index));
return [
channelCount = channelCount,
file = make_shared<OggVorbisFile>(filePath),
buffer = static_cast<value_type**>(nullptr),
bufferStart = size_type(0),
bufferSize = size_type(0)
](size_type index) mutable {
if (index < bufferStart || index >= bufferStart + bufferSize) {
// Seek
throwOnError(ov_pcm_seek(file->get(), index));
// Read a block of samples
constexpr int maxSize = 1024;
bufferStart = index;
bufferSize = throwOnError(ov_read_float(file->get(), &buffer, maxSize, nullptr));
if (bufferSize == 0) {
throw std::runtime_error("Unexpected end of file.");
}
}
// Read a block of samples
constexpr int maxSize = 1024;
bufferStart = index;
bufferSize = throwOnError(ov_read_float(file->get(), &buffer, maxSize, nullptr));
if (bufferSize == 0) {
throw std::runtime_error("Unexpected end of file.");
}
}
// Downmix channels
const size_type bufferIndex = index - bufferStart;
value_type sum = 0.0f;
for (int channel = 0; channel < channelCount; ++channel) {
sum += buffer[channel][bufferIndex];
}
return sum / channelCount;
};
// Downmix channels
const size_type bufferIndex = index - bufferStart;
value_type sum = 0.0f;
for (int channel = 0; channel < channelCount; ++channel) {
sum += buffer[channel][bufferIndex];
}
return sum / channelCount;
};
}

View File

@ -5,16 +5,16 @@
class OggVorbisFileReader : public AudioClip {
public:
OggVorbisFileReader(const std::filesystem::path& filePath);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override { return sampleRate; }
size_type size() const override { return sampleCount; }
OggVorbisFileReader(const std::filesystem::path& filePath);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override { return sampleRate; }
size_type size() const override { return sampleCount; }
private:
SampleReader createUnsafeSampleReader() const override;
SampleReader createUnsafeSampleReader() const override;
std::filesystem::path filePath;
int sampleRate;
int channelCount;
size_type sampleCount;
std::filesystem::path filePath;
int sampleRate;
int channelCount;
size_type sampleCount;
};

View File

@ -8,63 +8,63 @@ using std::unique_ptr;
using std::make_unique;
SampleRateConverter::SampleRateConverter(unique_ptr<AudioClip> inputClip, int outputSampleRate) :
inputClip(std::move(inputClip)),
downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate),
outputSampleRate(outputSampleRate),
outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor))
inputClip(std::move(inputClip)),
downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate),
outputSampleRate(outputSampleRate),
outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor))
{
if (outputSampleRate <= 0) {
throw invalid_argument("Sample rate must be positive.");
}
if (this->inputClip->getSampleRate() < outputSampleRate) {
throw invalid_argument(fmt::format(
"Upsampling not supported. Input sample rate must not be below {}Hz.",
outputSampleRate
));
}
if (outputSampleRate <= 0) {
throw invalid_argument("Sample rate must be positive.");
}
if (this->inputClip->getSampleRate() < outputSampleRate) {
throw invalid_argument(fmt::format(
"Upsampling not supported. Input sample rate must not be below {}Hz.",
outputSampleRate
));
}
}
unique_ptr<AudioClip> SampleRateConverter::clone() const {
return make_unique<SampleRateConverter>(*this);
return make_unique<SampleRateConverter>(*this);
}
float mean(double inputStart, double inputEnd, const SampleReader& read) {
// Calculate weighted sum...
double sum = 0;
// Calculate weighted sum...
double sum = 0;
// ... first sample (weight <= 1)
const int64_t startIndex = static_cast<int64_t>(inputStart);
sum += read(startIndex) * ((startIndex + 1) - inputStart);
// ... first sample (weight <= 1)
const int64_t startIndex = static_cast<int64_t>(inputStart);
sum += read(startIndex) * ((startIndex + 1) - inputStart);
// ... middle samples (weight 1 each)
const int64_t endIndex = static_cast<int64_t>(inputEnd);
for (int64_t index = startIndex + 1; index < endIndex; ++index) {
sum += read(index);
}
// ... middle samples (weight 1 each)
const int64_t endIndex = static_cast<int64_t>(inputEnd);
for (int64_t index = startIndex + 1; index < endIndex; ++index) {
sum += read(index);
}
// ... last sample (weight < 1)
if (endIndex < inputEnd) {
sum += read(endIndex) * (inputEnd - endIndex);
}
// ... last sample (weight < 1)
if (endIndex < inputEnd) {
sum += read(endIndex) * (inputEnd - endIndex);
}
return static_cast<float>(sum / (inputEnd - inputStart));
return static_cast<float>(sum / (inputEnd - inputStart));
}
SampleReader SampleRateConverter::createUnsafeSampleReader() const {
return [
read = inputClip->createSampleReader(),
downscalingFactor = downscalingFactor,
size = inputClip->size()
](size_type index) {
const double inputStart = index * downscalingFactor;
const double inputEnd =
std::min((index + 1) * downscalingFactor, static_cast<double>(size));
return mean(inputStart, inputEnd, read);
};
return [
read = inputClip->createSampleReader(),
downscalingFactor = downscalingFactor,
size = inputClip->size()
](size_type index) {
const double inputStart = index * downscalingFactor;
const double inputEnd =
std::min((index + 1) * downscalingFactor, static_cast<double>(size));
return mean(inputStart, inputEnd, read);
};
}
AudioEffect resample(int sampleRate) {
return [sampleRate](unique_ptr<AudioClip> inputClip) {
return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate);
};
return [sampleRate](unique_ptr<AudioClip> inputClip) {
return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate);
};
}

View File

@ -5,25 +5,25 @@
class SampleRateConverter : public AudioClip {
public:
SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
private:
SampleReader createUnsafeSampleReader() const override;
SampleReader createUnsafeSampleReader() const override;
std::shared_ptr<AudioClip> inputClip;
double downscalingFactor; // input sample rate / output sample rate
int outputSampleRate;
int64_t outputSampleCount;
std::shared_ptr<AudioClip> inputClip;
double downscalingFactor; // input sample rate / output sample rate
int outputSampleRate;
int64_t outputSampleCount;
};
AudioEffect resample(int sampleRate);
inline int SampleRateConverter::getSampleRate() const {
return outputSampleRate;
return outputSampleRate;
}
inline AudioClip::size_type SampleRateConverter::size() const {
return outputSampleCount;
return outputSampleCount;
}

View File

@ -20,478 +20,478 @@ using std::streamoff;
// Converts an int in the range min..max to a float in the range -1..1
float toNormalizedFloat(int value, int min, int max) {
const float fMin = static_cast<float>(min);
const float fMax = static_cast<float>(max);
const float fValue = static_cast<float>(value);
return ((fValue - fMin) / (fMax - fMin) * 2) - 1;
const float fMin = static_cast<float>(min);
const float fMax = static_cast<float>(max);
const float fValue = static_cast<float>(value);
return ((fValue - fMin) / (fMax - fMin) * 2) - 1;
}
streamoff roundUpToEven(streamoff i) {
return (i + 1) & (~1);
return (i + 1) & (~1);
}
namespace Codec {
constexpr int Pcm = 0x01;
constexpr int Float = 0x03;
constexpr int Extensible = 0xFFFE;
constexpr int Pcm = 0x01;
constexpr int Float = 0x03;
constexpr int Extensible = 0xFFFE;
};
string codecToString(int codec);
WaveFormatInfo getWaveFormatInfo(const path& filePath) {
WaveFormatInfo formatInfo {};
WaveFormatInfo formatInfo {};
auto file = openFile(filePath);
auto file = openFile(filePath);
file.seekg(0, std::ios_base::end);
const streamoff fileSize = file.tellg();
file.seekg(0);
file.seekg(0, std::ios_base::end);
const streamoff fileSize = file.tellg();
file.seekg(0);
auto remaining = [&](int byteCount) {
const streamoff filePosition = file.tellg();
return byteCount <= fileSize - filePosition;
};
auto remaining = [&](int byteCount) {
const streamoff filePosition = file.tellg();
return byteCount <= fileSize - filePosition;
};
// Read header
if (!remaining(10)) {
throw runtime_error("WAVE file is corrupt. Header not found.");
}
const auto rootChunkId = read<uint32_t>(file);
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
throw runtime_error("Unknown file format. Only WAVE files are supported.");
}
read<uint32_t>(file); // Chunk size
const uint32_t waveId = read<uint32_t>(file);
if (waveId != fourcc('W', 'A', 'V', 'E')) {
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
}
// Read header
if (!remaining(10)) {
throw runtime_error("WAVE file is corrupt. Header not found.");
}
const auto rootChunkId = read<uint32_t>(file);
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
throw runtime_error("Unknown file format. Only WAVE files are supported.");
}
read<uint32_t>(file); // Chunk size
const uint32_t waveId = read<uint32_t>(file);
if (waveId != fourcc('W', 'A', 'V', 'E')) {
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
}
// Read chunks until we reach the data chunk
bool processedFormatChunk = false;
bool processedDataChunk = false;
while ((!processedFormatChunk || !processedDataChunk) && remaining(8)) {
const uint32_t chunkId = read<uint32_t>(file);
const streamoff chunkSize = read<int32_t>(file);
const streamoff chunkEnd = roundUpToEven(file.tellg() + chunkSize);
switch (chunkId) {
case fourcc('f', 'm', 't', ' '):
{
// Read relevant data
uint16_t codec = read<uint16_t>(file);
formatInfo.channelCount = read<uint16_t>(file);
formatInfo.frameRate = read<int32_t>(file);
read<uint32_t>(file); // Bytes per second
const int bytesPerFrame = read<uint16_t>(file);
const int bitsPerSampleOnDisk = read<uint16_t>(file);
int bitsPerSample = bitsPerSampleOnDisk;
if (chunkSize > 16) {
const int extensionSize = read<uint16_t>(file);
if (extensionSize >= 22) {
// Read extension fields
bitsPerSample = read<uint16_t>(file);
read<uint32_t>(file); // Skip channel mask
const uint16_t codecOverride = read<uint16_t>(file);
if (codec == Codec::Extensible) {
codec = codecOverride;
}
}
}
// Read chunks until we reach the data chunk
bool processedFormatChunk = false;
bool processedDataChunk = false;
while ((!processedFormatChunk || !processedDataChunk) && remaining(8)) {
const uint32_t chunkId = read<uint32_t>(file);
const streamoff chunkSize = read<int32_t>(file);
const streamoff chunkEnd = roundUpToEven(file.tellg() + chunkSize);
switch (chunkId) {
case fourcc('f', 'm', 't', ' '):
{
// Read relevant data
uint16_t codec = read<uint16_t>(file);
formatInfo.channelCount = read<uint16_t>(file);
formatInfo.frameRate = read<int32_t>(file);
read<uint32_t>(file); // Bytes per second
const int bytesPerFrame = read<uint16_t>(file);
const int bitsPerSampleOnDisk = read<uint16_t>(file);
int bitsPerSample = bitsPerSampleOnDisk;
if (chunkSize > 16) {
const int extensionSize = read<uint16_t>(file);
if (extensionSize >= 22) {
// Read extension fields
bitsPerSample = read<uint16_t>(file);
read<uint32_t>(file); // Skip channel mask
const uint16_t codecOverride = read<uint16_t>(file);
if (codec == Codec::Extensible) {
codec = codecOverride;
}
}
}
// Determine sample format
int bytesPerSample;
switch (codec) {
case Codec::Pcm:
// Determine sample size.
// According to the WAVE standard, sample sizes that are not multiples of 8
// bits (e.g. 12 bits) can be treated like the next-larger byte size.
if (bitsPerSample == 8) {
formatInfo.sampleFormat = SampleFormat::UInt8;
bytesPerSample = 1;
} else if (bitsPerSample <= 16) {
formatInfo.sampleFormat = SampleFormat::Int16;
bytesPerSample = 2;
} else if (bitsPerSample <= 24) {
formatInfo.sampleFormat = SampleFormat::Int24;
bytesPerSample = 3;
} else if (bitsPerSample <= 32) {
formatInfo.sampleFormat = SampleFormat::Int32;
bytesPerSample = 4;
} else {
throw runtime_error(
format("Unsupported sample format: {}-bit PCM.", bitsPerSample));
}
if (bytesPerSample != bytesPerFrame / formatInfo.channelCount) {
throw runtime_error("Unsupported sample organization.");
}
break;
case Codec::Float:
if (bitsPerSample == 32) {
formatInfo.sampleFormat = SampleFormat::Float32;
bytesPerSample = 4;
} else if (bitsPerSample == 64) {
formatInfo.sampleFormat = SampleFormat::Float64;
bytesPerSample = 8;
} else {
throw runtime_error(
format("Unsupported sample format: {}-bit IEEE Float.", bitsPerSample)
);
}
break;
default:
throw runtime_error(format(
"Unsupported audio codec: '{}'. Only uncompressed codecs ('{}' and '{}') are supported.",
codecToString(codec), codecToString(Codec::Pcm), codecToString(Codec::Float)
));
}
formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount;
processedFormatChunk = true;
break;
}
case fourcc('d', 'a', 't', 'a'):
{
formatInfo.dataOffset = file.tellg();
formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame;
processedDataChunk = true;
break;
}
default:
{
// Ignore unknown chunk
break;
}
}
// Determine sample format
int bytesPerSample;
switch (codec) {
case Codec::Pcm:
// Determine sample size.
// According to the WAVE standard, sample sizes that are not multiples of 8
// bits (e.g. 12 bits) can be treated like the next-larger byte size.
if (bitsPerSample == 8) {
formatInfo.sampleFormat = SampleFormat::UInt8;
bytesPerSample = 1;
} else if (bitsPerSample <= 16) {
formatInfo.sampleFormat = SampleFormat::Int16;
bytesPerSample = 2;
} else if (bitsPerSample <= 24) {
formatInfo.sampleFormat = SampleFormat::Int24;
bytesPerSample = 3;
} else if (bitsPerSample <= 32) {
formatInfo.sampleFormat = SampleFormat::Int32;
bytesPerSample = 4;
} else {
throw runtime_error(
format("Unsupported sample format: {}-bit PCM.", bitsPerSample));
}
if (bytesPerSample != bytesPerFrame / formatInfo.channelCount) {
throw runtime_error("Unsupported sample organization.");
}
break;
case Codec::Float:
if (bitsPerSample == 32) {
formatInfo.sampleFormat = SampleFormat::Float32;
bytesPerSample = 4;
} else if (bitsPerSample == 64) {
formatInfo.sampleFormat = SampleFormat::Float64;
bytesPerSample = 8;
} else {
throw runtime_error(
format("Unsupported sample format: {}-bit IEEE Float.", bitsPerSample)
);
}
break;
default:
throw runtime_error(format(
"Unsupported audio codec: '{}'. Only uncompressed codecs ('{}' and '{}') are supported.",
codecToString(codec), codecToString(Codec::Pcm), codecToString(Codec::Float)
));
}
formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount;
processedFormatChunk = true;
break;
}
case fourcc('d', 'a', 't', 'a'):
{
formatInfo.dataOffset = file.tellg();
formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame;
processedDataChunk = true;
break;
}
default:
{
// Ignore unknown chunk
break;
}
}
// Seek to end of chunk
file.seekg(chunkEnd, std::ios_base::beg);
}
// Seek to end of chunk
file.seekg(chunkEnd, std::ios_base::beg);
}
if (!processedFormatChunk) throw runtime_error("Missing format chunk.");
if (!processedDataChunk) throw runtime_error("Missing data chunk.");
if (!processedFormatChunk) throw runtime_error("Missing format chunk.");
if (!processedDataChunk) throw runtime_error("Missing data chunk.");
return formatInfo;
return formatInfo;
}
WaveFileReader::WaveFileReader(const path& filePath) :
filePath(filePath),
formatInfo(getWaveFormatInfo(filePath)) {}
filePath(filePath),
formatInfo(getWaveFormatInfo(filePath)) {}
unique_ptr<AudioClip> WaveFileReader::clone() const {
return make_unique<WaveFileReader>(*this);
return make_unique<WaveFileReader>(*this);
}
inline AudioClip::value_type readSample(
std::ifstream& file,
SampleFormat sampleFormat,
int channelCount
std::ifstream& file,
SampleFormat sampleFormat,
int channelCount
) {
float sum = 0;
for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) {
switch (sampleFormat) {
case SampleFormat::UInt8:
{
const uint8_t raw = read<uint8_t>(file);
sum += toNormalizedFloat(raw, 0, UINT8_MAX);
break;
}
case SampleFormat::Int16:
{
const int16_t raw = read<int16_t>(file);
sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
break;
}
case SampleFormat::Int24:
{
int raw = read<int, 24>(file);
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
break;
}
case SampleFormat::Int32:
{
const int32_t raw = read<int32_t>(file);
sum += toNormalizedFloat(raw, INT32_MIN, INT32_MAX);
break;
}
case SampleFormat::Float32:
{
sum += read<float>(file);
break;
}
case SampleFormat::Float64:
{
sum += static_cast<float>(read<double>(file));
break;
}
}
}
float sum = 0;
for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) {
switch (sampleFormat) {
case SampleFormat::UInt8:
{
const uint8_t raw = read<uint8_t>(file);
sum += toNormalizedFloat(raw, 0, UINT8_MAX);
break;
}
case SampleFormat::Int16:
{
const int16_t raw = read<int16_t>(file);
sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
break;
}
case SampleFormat::Int24:
{
int raw = read<int, 24>(file);
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
break;
}
case SampleFormat::Int32:
{
const int32_t raw = read<int32_t>(file);
sum += toNormalizedFloat(raw, INT32_MIN, INT32_MAX);
break;
}
case SampleFormat::Float32:
{
sum += read<float>(file);
break;
}
case SampleFormat::Float64:
{
sum += static_cast<float>(read<double>(file));
break;
}
}
}
return sum / channelCount;
return sum / channelCount;
}
SampleReader WaveFileReader::createUnsafeSampleReader() const {
return
[
formatInfo = formatInfo,
file = std::make_shared<std::ifstream>(openFile(filePath)),
filePos = std::streampos(0)
](size_type index) mutable {
const std::streampos newFilePos = formatInfo.dataOffset
+ static_cast<streamoff>(index * formatInfo.bytesPerFrame);
if (newFilePos != filePos) {
file->seekg(newFilePos);
}
const value_type result =
readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount);
filePos = newFilePos + static_cast<streamoff>(formatInfo.bytesPerFrame);
return result;
};
return
[
formatInfo = formatInfo,
file = std::make_shared<std::ifstream>(openFile(filePath)),
filePos = std::streampos(0)
](size_type index) mutable {
const std::streampos newFilePos = formatInfo.dataOffset
+ static_cast<streamoff>(index * formatInfo.bytesPerFrame);
if (newFilePos != filePos) {
file->seekg(newFilePos);
}
const value_type result =
readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount);
filePos = newFilePos + static_cast<streamoff>(formatInfo.bytesPerFrame);
return result;
};
}
string codecToString(int codec) {
switch (codec) {
case 0x0001: return "PCM";
case 0x0002: return "Microsoft ADPCM";
case 0x0003: return "IEEE Float";
case 0x0004: return "Compaq VSELP";
case 0x0005: return "IBM CVSD";
case 0x0006: return "Microsoft a-Law";
case 0x0007: return "Microsoft u-Law";
case 0x0008: return "Microsoft DTS";
case 0x0009: return "DRM";
case 0x000a: return "WMA 9 Speech";
case 0x000b: return "Microsoft Windows Media RT Voice";
case 0x0010: return "OKI-ADPCM";
case 0x0011: return "Intel IMA/DVI-ADPCM";
case 0x0012: return "Videologic Mediaspace ADPCM";
case 0x0013: return "Sierra ADPCM";
case 0x0014: return "Antex G.723 ADPCM";
case 0x0015: return "DSP Solutions DIGISTD";
case 0x0016: return "DSP Solutions DIGIFIX";
case 0x0017: return "Dialoic OKI ADPCM";
case 0x0018: return "Media Vision ADPCM";
case 0x0019: return "HP CU";
case 0x001a: return "HP Dynamic Voice";
case 0x0020: return "Yamaha ADPCM";
case 0x0021: return "SONARC Speech Compression";
case 0x0022: return "DSP Group True Speech";
case 0x0023: return "Echo Speech Corp.";
case 0x0024: return "Virtual Music Audiofile AF36";
case 0x0025: return "Audio Processing Tech.";
case 0x0026: return "Virtual Music Audiofile AF10";
case 0x0027: return "Aculab Prosody 1612";
case 0x0028: return "Merging Tech. LRC";
case 0x0030: return "Dolby AC2";
case 0x0031: return "Microsoft GSM610";
case 0x0032: return "MSN Audio";
case 0x0033: return "Antex ADPCME";
case 0x0034: return "Control Resources VQLPC";
case 0x0035: return "DSP Solutions DIGIREAL";
case 0x0036: return "DSP Solutions DIGIADPCM";
case 0x0037: return "Control Resources CR10";
case 0x0038: return "Natural MicroSystems VBX ADPCM";
case 0x0039: return "Crystal Semiconductor IMA ADPCM";
case 0x003a: return "Echo Speech ECHOSC3";
case 0x003b: return "Rockwell ADPCM";
case 0x003c: return "Rockwell DIGITALK";
case 0x003d: return "Xebec Multimedia";
case 0x0040: return "Antex G.721 ADPCM";
case 0x0041: return "Antex G.728 CELP";
case 0x0042: return "Microsoft MSG723";
case 0x0043: return "IBM AVC ADPCM";
case 0x0045: return "ITU-T G.726";
case 0x0050: return "Microsoft MPEG";
case 0x0051: return "RT23 or PAC";
case 0x0052: return "InSoft RT24";
case 0x0053: return "InSoft PAC";
case 0x0055: return "MP3";
case 0x0059: return "Cirrus";
case 0x0060: return "Cirrus Logic";
case 0x0061: return "ESS Tech. PCM";
case 0x0062: return "Voxware Inc.";
case 0x0063: return "Canopus ATRAC";
case 0x0064: return "APICOM G.726 ADPCM";
case 0x0065: return "APICOM G.722 ADPCM";
case 0x0066: return "Microsoft DSAT";
case 0x0067: return "Micorsoft DSAT DISPLAY";
case 0x0069: return "Voxware Byte Aligned";
case 0x0070: return "Voxware AC8";
case 0x0071: return "Voxware AC10";
case 0x0072: return "Voxware AC16";
case 0x0073: return "Voxware AC20";
case 0x0074: return "Voxware MetaVoice";
case 0x0075: return "Voxware MetaSound";
case 0x0076: return "Voxware RT29HW";
case 0x0077: return "Voxware VR12";
case 0x0078: return "Voxware VR18";
case 0x0079: return "Voxware TQ40";
case 0x007a: return "Voxware SC3";
case 0x007b: return "Voxware SC3";
case 0x0080: return "Soundsoft";
case 0x0081: return "Voxware TQ60";
case 0x0082: return "Microsoft MSRT24";
case 0x0083: return "AT&T G.729A";
case 0x0084: return "Motion Pixels MVI MV12";
case 0x0085: return "DataFusion G.726";
case 0x0086: return "DataFusion GSM610";
case 0x0088: return "Iterated Systems Audio";
case 0x0089: return "Onlive";
case 0x008a: return "Multitude, Inc. FT SX20";
case 0x008b: return "Infocom ITS A/S G.721 ADPCM";
case 0x008c: return "Convedia G729";
case 0x008d: return "Not specified congruency, Inc.";
case 0x0091: return "Siemens SBC24";
case 0x0092: return "Sonic Foundry Dolby AC3 APDIF";
case 0x0093: return "MediaSonic G.723";
case 0x0094: return "Aculab Prosody 8kbps";
case 0x0097: return "ZyXEL ADPCM";
case 0x0098: return "Philips LPCBB";
case 0x0099: return "Studer Professional Audio Packed";
case 0x00a0: return "Malden PhonyTalk";
case 0x00a1: return "Racal Recorder GSM";
case 0x00a2: return "Racal Recorder G720.a";
case 0x00a3: return "Racal G723.1";
case 0x00a4: return "Racal Tetra ACELP";
case 0x00b0: return "NEC AAC NEC Corporation";
case 0x00ff: return "AAC";
case 0x0100: return "Rhetorex ADPCM";
case 0x0101: return "IBM u-Law";
case 0x0102: return "IBM a-Law";
case 0x0103: return "IBM ADPCM";
case 0x0111: return "Vivo G.723";
case 0x0112: return "Vivo Siren";
case 0x0120: return "Philips Speech Processing CELP";
case 0x0121: return "Philips Speech Processing GRUNDIG";
case 0x0123: return "Digital G.723";
case 0x0125: return "Sanyo LD ADPCM";
case 0x0130: return "Sipro Lab ACEPLNET";
case 0x0131: return "Sipro Lab ACELP4800";
case 0x0132: return "Sipro Lab ACELP8V3";
case 0x0133: return "Sipro Lab G.729";
case 0x0134: return "Sipro Lab G.729A";
case 0x0135: return "Sipro Lab Kelvin";
case 0x0136: return "VoiceAge AMR";
case 0x0140: return "Dictaphone G.726 ADPCM";
case 0x0150: return "Qualcomm PureVoice";
case 0x0151: return "Qualcomm HalfRate";
case 0x0155: return "Ring Zero Systems TUBGSM";
case 0x0160: return "Microsoft Audio1";
case 0x0161: return "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio";
case 0x0162: return "Windows Media Audio Professional V9";
case 0x0163: return "Windows Media Audio Lossless V9";
case 0x0164: return "WMA Pro over S/PDIF";
case 0x0170: return "UNISYS NAP ADPCM";
case 0x0171: return "UNISYS NAP ULAW";
case 0x0172: return "UNISYS NAP ALAW";
case 0x0173: return "UNISYS NAP 16K";
case 0x0174: return "MM SYCOM ACM SYC008 SyCom Technologies";
case 0x0175: return "MM SYCOM ACM SYC701 G726L SyCom Technologies";
case 0x0176: return "MM SYCOM ACM SYC701 CELP54 SyCom Technologies";
case 0x0177: return "MM SYCOM ACM SYC701 CELP68 SyCom Technologies";
case 0x0178: return "Knowledge Adventure ADPCM";
case 0x0180: return "Fraunhofer IIS MPEG2AAC";
case 0x0190: return "Digital Theater Systems DTS DS";
case 0x0200: return "Creative Labs ADPCM";
case 0x0202: return "Creative Labs FASTSPEECH8";
case 0x0203: return "Creative Labs FASTSPEECH10";
case 0x0210: return "UHER ADPCM";
case 0x0215: return "Ulead DV ACM";
case 0x0216: return "Ulead DV ACM";
case 0x0220: return "Quarterdeck Corp.";
case 0x0230: return "I-Link VC";
case 0x0240: return "Aureal Semiconductor Raw Sport";
case 0x0241: return "ESST AC3";
case 0x0250: return "Interactive Products HSX";
case 0x0251: return "Interactive Products RPELP";
case 0x0260: return "Consistent CS2";
case 0x0270: return "Sony SCX";
case 0x0271: return "Sony SCY";
case 0x0272: return "Sony ATRAC3";
case 0x0273: return "Sony SPC";
case 0x0280: return "TELUM Telum Inc.";
case 0x0281: return "TELUMIA Telum Inc.";
case 0x0285: return "Norcom Voice Systems ADPCM";
case 0x0300: return "Fujitsu FM TOWNS SND";
case 0x0301:
case 0x0302:
case 0x0303:
case 0x0304:
case 0x0305:
case 0x0306:
case 0x0307:
case 0x0308: return "Fujitsu (not specified)";
case 0x0350: return "Micronas Semiconductors, Inc. Development";
case 0x0351: return "Micronas Semiconductors, Inc. CELP833";
case 0x0400: return "Brooktree Digital";
case 0x0401: return "Intel Music Coder (IMC)";
case 0x0402: return "Ligos Indeo Audio";
case 0x0450: return "QDesign Music";
case 0x0500: return "On2 VP7 On2 Technologies";
case 0x0501: return "On2 VP6 On2 Technologies";
case 0x0680: return "AT&T VME VMPCM";
case 0x0681: return "AT&T TCP";
case 0x0700: return "YMPEG Alpha (dummy for MPEG-2 compressor)";
case 0x08ae: return "ClearJump LiteWave (lossless)";
case 0x1000: return "Olivetti GSM";
case 0x1001: return "Olivetti ADPCM";
case 0x1002: return "Olivetti CELP";
case 0x1003: return "Olivetti SBC";
case 0x1004: return "Olivetti OPR";
case 0x1100: return "Lernout & Hauspie";
case 0x1101: return "Lernout & Hauspie CELP codec";
case 0x1102:
case 0x1103:
case 0x1104: return "Lernout & Hauspie SBC codec";
case 0x1400: return "Norris Comm. Inc.";
case 0x1401: return "ISIAudio";
case 0x1500: return "AT&T Soundspace Music Compression";
case 0x181c: return "VoxWare RT24 speech codec";
case 0x181e: return "Lucent elemedia AX24000P Music codec";
case 0x1971: return "Sonic Foundry LOSSLESS";
case 0x1979: return "Innings Telecom Inc. ADPCM";
case 0x1c07: return "Lucent SX8300P speech codec";
case 0x1c0c: return "Lucent SX5363S G.723 compliant codec";
case 0x1f03: return "CUseeMe DigiTalk (ex-Rocwell)";
case 0x1fc4: return "NCT Soft ALF2CD ACM";
case 0x2000: return "FAST Multimedia DVM";
case 0x2001: return "Dolby DTS (Digital Theater System)";
case 0x2002: return "RealAudio 1 / 2 14.4";
case 0x2003: return "RealAudio 1 / 2 28.8";
case 0x2004: return "RealAudio G2 / 8 Cook (low bitrate)";
case 0x2005: return "RealAudio 3 / 4 / 5 Music (DNET)";
case 0x2006: return "RealAudio 10 AAC (RAAC)";
case 0x2007: return "RealAudio 10 AAC+ (RACP)";
case 0x2500: return "Reserved range to 0x2600 Microsoft";
case 0x3313: return "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)";
case 0x4143: return "Divio MPEG-4 AAC audio";
case 0x4201: return "Nokia adaptive multirate";
case 0x4243: return "Divio G726 Divio, Inc.";
case 0x434c: return "LEAD Speech";
case 0x564c: return "LEAD Vorbis";
case 0x5756: return "WavPack Audio";
case 0x674f: return "Ogg Vorbis (mode 1)";
case 0x6750: return "Ogg Vorbis (mode 2)";
case 0x6751: return "Ogg Vorbis (mode 3)";
case 0x676f: return "Ogg Vorbis (mode 1+)";
case 0x6770: return "Ogg Vorbis (mode 2+)";
case 0x6771: return "Ogg Vorbis (mode 3+)";
case 0x7000: return "3COM NBX 3Com Corporation";
case 0x706d: return "FAAD AAC";
case 0x7a21: return "GSM-AMR (CBR, no SID)";
case 0x7a22: return "GSM-AMR (VBR, including SID)";
case 0xa100: return "Comverse Infosys Ltd. G723 1";
case 0xa101: return "Comverse Infosys Ltd. AVQSBC";
case 0xa102: return "Comverse Infosys Ltd. OLDSBC";
case 0xa103: return "Symbol Technologies G729A";
case 0xa104: return "VoiceAge AMR WB VoiceAge Corporation";
case 0xa105: return "Ingenient Technologies Inc. G726";
case 0xa106: return "ISO/MPEG-4 advanced audio Coding";
case 0xa107: return "Encore Software Ltd G726";
case 0xa109: return "Speex ACM Codec xiph.org";
case 0xdfac: return "DebugMode SonicFoundry Vegas FrameServer ACM Codec";
case 0xf1ac: return "Free Lossless Audio Codec FLAC";
case 0xfffe: return "Extensible";
case 0xffff: return "Development";
default:
return format("{0:#x}", codec);
}
switch (codec) {
case 0x0001: return "PCM";
case 0x0002: return "Microsoft ADPCM";
case 0x0003: return "IEEE Float";
case 0x0004: return "Compaq VSELP";
case 0x0005: return "IBM CVSD";
case 0x0006: return "Microsoft a-Law";
case 0x0007: return "Microsoft u-Law";
case 0x0008: return "Microsoft DTS";
case 0x0009: return "DRM";
case 0x000a: return "WMA 9 Speech";
case 0x000b: return "Microsoft Windows Media RT Voice";
case 0x0010: return "OKI-ADPCM";
case 0x0011: return "Intel IMA/DVI-ADPCM";
case 0x0012: return "Videologic Mediaspace ADPCM";
case 0x0013: return "Sierra ADPCM";
case 0x0014: return "Antex G.723 ADPCM";
case 0x0015: return "DSP Solutions DIGISTD";
case 0x0016: return "DSP Solutions DIGIFIX";
case 0x0017: return "Dialoic OKI ADPCM";
case 0x0018: return "Media Vision ADPCM";
case 0x0019: return "HP CU";
case 0x001a: return "HP Dynamic Voice";
case 0x0020: return "Yamaha ADPCM";
case 0x0021: return "SONARC Speech Compression";
case 0x0022: return "DSP Group True Speech";
case 0x0023: return "Echo Speech Corp.";
case 0x0024: return "Virtual Music Audiofile AF36";
case 0x0025: return "Audio Processing Tech.";
case 0x0026: return "Virtual Music Audiofile AF10";
case 0x0027: return "Aculab Prosody 1612";
case 0x0028: return "Merging Tech. LRC";
case 0x0030: return "Dolby AC2";
case 0x0031: return "Microsoft GSM610";
case 0x0032: return "MSN Audio";
case 0x0033: return "Antex ADPCME";
case 0x0034: return "Control Resources VQLPC";
case 0x0035: return "DSP Solutions DIGIREAL";
case 0x0036: return "DSP Solutions DIGIADPCM";
case 0x0037: return "Control Resources CR10";
case 0x0038: return "Natural MicroSystems VBX ADPCM";
case 0x0039: return "Crystal Semiconductor IMA ADPCM";
case 0x003a: return "Echo Speech ECHOSC3";
case 0x003b: return "Rockwell ADPCM";
case 0x003c: return "Rockwell DIGITALK";
case 0x003d: return "Xebec Multimedia";
case 0x0040: return "Antex G.721 ADPCM";
case 0x0041: return "Antex G.728 CELP";
case 0x0042: return "Microsoft MSG723";
case 0x0043: return "IBM AVC ADPCM";
case 0x0045: return "ITU-T G.726";
case 0x0050: return "Microsoft MPEG";
case 0x0051: return "RT23 or PAC";
case 0x0052: return "InSoft RT24";
case 0x0053: return "InSoft PAC";
case 0x0055: return "MP3";
case 0x0059: return "Cirrus";
case 0x0060: return "Cirrus Logic";
case 0x0061: return "ESS Tech. PCM";
case 0x0062: return "Voxware Inc.";
case 0x0063: return "Canopus ATRAC";
case 0x0064: return "APICOM G.726 ADPCM";
case 0x0065: return "APICOM G.722 ADPCM";
case 0x0066: return "Microsoft DSAT";
case 0x0067: return "Micorsoft DSAT DISPLAY";
case 0x0069: return "Voxware Byte Aligned";
case 0x0070: return "Voxware AC8";
case 0x0071: return "Voxware AC10";
case 0x0072: return "Voxware AC16";
case 0x0073: return "Voxware AC20";
case 0x0074: return "Voxware MetaVoice";
case 0x0075: return "Voxware MetaSound";
case 0x0076: return "Voxware RT29HW";
case 0x0077: return "Voxware VR12";
case 0x0078: return "Voxware VR18";
case 0x0079: return "Voxware TQ40";
case 0x007a: return "Voxware SC3";
case 0x007b: return "Voxware SC3";
case 0x0080: return "Soundsoft";
case 0x0081: return "Voxware TQ60";
case 0x0082: return "Microsoft MSRT24";
case 0x0083: return "AT&T G.729A";
case 0x0084: return "Motion Pixels MVI MV12";
case 0x0085: return "DataFusion G.726";
case 0x0086: return "DataFusion GSM610";
case 0x0088: return "Iterated Systems Audio";
case 0x0089: return "Onlive";
case 0x008a: return "Multitude, Inc. FT SX20";
case 0x008b: return "Infocom ITS A/S G.721 ADPCM";
case 0x008c: return "Convedia G729";
case 0x008d: return "Not specified congruency, Inc.";
case 0x0091: return "Siemens SBC24";
case 0x0092: return "Sonic Foundry Dolby AC3 APDIF";
case 0x0093: return "MediaSonic G.723";
case 0x0094: return "Aculab Prosody 8kbps";
case 0x0097: return "ZyXEL ADPCM";
case 0x0098: return "Philips LPCBB";
case 0x0099: return "Studer Professional Audio Packed";
case 0x00a0: return "Malden PhonyTalk";
case 0x00a1: return "Racal Recorder GSM";
case 0x00a2: return "Racal Recorder G720.a";
case 0x00a3: return "Racal G723.1";
case 0x00a4: return "Racal Tetra ACELP";
case 0x00b0: return "NEC AAC NEC Corporation";
case 0x00ff: return "AAC";
case 0x0100: return "Rhetorex ADPCM";
case 0x0101: return "IBM u-Law";
case 0x0102: return "IBM a-Law";
case 0x0103: return "IBM ADPCM";
case 0x0111: return "Vivo G.723";
case 0x0112: return "Vivo Siren";
case 0x0120: return "Philips Speech Processing CELP";
case 0x0121: return "Philips Speech Processing GRUNDIG";
case 0x0123: return "Digital G.723";
case 0x0125: return "Sanyo LD ADPCM";
case 0x0130: return "Sipro Lab ACEPLNET";
case 0x0131: return "Sipro Lab ACELP4800";
case 0x0132: return "Sipro Lab ACELP8V3";
case 0x0133: return "Sipro Lab G.729";
case 0x0134: return "Sipro Lab G.729A";
case 0x0135: return "Sipro Lab Kelvin";
case 0x0136: return "VoiceAge AMR";
case 0x0140: return "Dictaphone G.726 ADPCM";
case 0x0150: return "Qualcomm PureVoice";
case 0x0151: return "Qualcomm HalfRate";
case 0x0155: return "Ring Zero Systems TUBGSM";
case 0x0160: return "Microsoft Audio1";
case 0x0161: return "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio";
case 0x0162: return "Windows Media Audio Professional V9";
case 0x0163: return "Windows Media Audio Lossless V9";
case 0x0164: return "WMA Pro over S/PDIF";
case 0x0170: return "UNISYS NAP ADPCM";
case 0x0171: return "UNISYS NAP ULAW";
case 0x0172: return "UNISYS NAP ALAW";
case 0x0173: return "UNISYS NAP 16K";
case 0x0174: return "MM SYCOM ACM SYC008 SyCom Technologies";
case 0x0175: return "MM SYCOM ACM SYC701 G726L SyCom Technologies";
case 0x0176: return "MM SYCOM ACM SYC701 CELP54 SyCom Technologies";
case 0x0177: return "MM SYCOM ACM SYC701 CELP68 SyCom Technologies";
case 0x0178: return "Knowledge Adventure ADPCM";
case 0x0180: return "Fraunhofer IIS MPEG2AAC";
case 0x0190: return "Digital Theater Systems DTS DS";
case 0x0200: return "Creative Labs ADPCM";
case 0x0202: return "Creative Labs FASTSPEECH8";
case 0x0203: return "Creative Labs FASTSPEECH10";
case 0x0210: return "UHER ADPCM";
case 0x0215: return "Ulead DV ACM";
case 0x0216: return "Ulead DV ACM";
case 0x0220: return "Quarterdeck Corp.";
case 0x0230: return "I-Link VC";
case 0x0240: return "Aureal Semiconductor Raw Sport";
case 0x0241: return "ESST AC3";
case 0x0250: return "Interactive Products HSX";
case 0x0251: return "Interactive Products RPELP";
case 0x0260: return "Consistent CS2";
case 0x0270: return "Sony SCX";
case 0x0271: return "Sony SCY";
case 0x0272: return "Sony ATRAC3";
case 0x0273: return "Sony SPC";
case 0x0280: return "TELUM Telum Inc.";
case 0x0281: return "TELUMIA Telum Inc.";
case 0x0285: return "Norcom Voice Systems ADPCM";
case 0x0300: return "Fujitsu FM TOWNS SND";
case 0x0301:
case 0x0302:
case 0x0303:
case 0x0304:
case 0x0305:
case 0x0306:
case 0x0307:
case 0x0308: return "Fujitsu (not specified)";
case 0x0350: return "Micronas Semiconductors, Inc. Development";
case 0x0351: return "Micronas Semiconductors, Inc. CELP833";
case 0x0400: return "Brooktree Digital";
case 0x0401: return "Intel Music Coder (IMC)";
case 0x0402: return "Ligos Indeo Audio";
case 0x0450: return "QDesign Music";
case 0x0500: return "On2 VP7 On2 Technologies";
case 0x0501: return "On2 VP6 On2 Technologies";
case 0x0680: return "AT&T VME VMPCM";
case 0x0681: return "AT&T TCP";
case 0x0700: return "YMPEG Alpha (dummy for MPEG-2 compressor)";
case 0x08ae: return "ClearJump LiteWave (lossless)";
case 0x1000: return "Olivetti GSM";
case 0x1001: return "Olivetti ADPCM";
case 0x1002: return "Olivetti CELP";
case 0x1003: return "Olivetti SBC";
case 0x1004: return "Olivetti OPR";
case 0x1100: return "Lernout & Hauspie";
case 0x1101: return "Lernout & Hauspie CELP codec";
case 0x1102:
case 0x1103:
case 0x1104: return "Lernout & Hauspie SBC codec";
case 0x1400: return "Norris Comm. Inc.";
case 0x1401: return "ISIAudio";
case 0x1500: return "AT&T Soundspace Music Compression";
case 0x181c: return "VoxWare RT24 speech codec";
case 0x181e: return "Lucent elemedia AX24000P Music codec";
case 0x1971: return "Sonic Foundry LOSSLESS";
case 0x1979: return "Innings Telecom Inc. ADPCM";
case 0x1c07: return "Lucent SX8300P speech codec";
case 0x1c0c: return "Lucent SX5363S G.723 compliant codec";
case 0x1f03: return "CUseeMe DigiTalk (ex-Rocwell)";
case 0x1fc4: return "NCT Soft ALF2CD ACM";
case 0x2000: return "FAST Multimedia DVM";
case 0x2001: return "Dolby DTS (Digital Theater System)";
case 0x2002: return "RealAudio 1 / 2 14.4";
case 0x2003: return "RealAudio 1 / 2 28.8";
case 0x2004: return "RealAudio G2 / 8 Cook (low bitrate)";
case 0x2005: return "RealAudio 3 / 4 / 5 Music (DNET)";
case 0x2006: return "RealAudio 10 AAC (RAAC)";
case 0x2007: return "RealAudio 10 AAC+ (RACP)";
case 0x2500: return "Reserved range to 0x2600 Microsoft";
case 0x3313: return "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)";
case 0x4143: return "Divio MPEG-4 AAC audio";
case 0x4201: return "Nokia adaptive multirate";
case 0x4243: return "Divio G726 Divio, Inc.";
case 0x434c: return "LEAD Speech";
case 0x564c: return "LEAD Vorbis";
case 0x5756: return "WavPack Audio";
case 0x674f: return "Ogg Vorbis (mode 1)";
case 0x6750: return "Ogg Vorbis (mode 2)";
case 0x6751: return "Ogg Vorbis (mode 3)";
case 0x676f: return "Ogg Vorbis (mode 1+)";
case 0x6770: return "Ogg Vorbis (mode 2+)";
case 0x6771: return "Ogg Vorbis (mode 3+)";
case 0x7000: return "3COM NBX 3Com Corporation";
case 0x706d: return "FAAD AAC";
case 0x7a21: return "GSM-AMR (CBR, no SID)";
case 0x7a22: return "GSM-AMR (VBR, including SID)";
case 0xa100: return "Comverse Infosys Ltd. G723 1";
case 0xa101: return "Comverse Infosys Ltd. AVQSBC";
case 0xa102: return "Comverse Infosys Ltd. OLDSBC";
case 0xa103: return "Symbol Technologies G729A";
case 0xa104: return "VoiceAge AMR WB VoiceAge Corporation";
case 0xa105: return "Ingenient Technologies Inc. G726";
case 0xa106: return "ISO/MPEG-4 advanced audio Coding";
case 0xa107: return "Encore Software Ltd G726";
case 0xa109: return "Speex ACM Codec xiph.org";
case 0xdfac: return "DebugMode SonicFoundry Vegas FrameServer ACM Codec";
case 0xf1ac: return "Free Lossless Audio Codec FLAC";
case 0xfffe: return "Extensible";
case 0xffff: return "Development";
default:
return format("{0:#x}", codec);
}
}

View File

@ -4,43 +4,43 @@
#include "AudioClip.h"
enum class SampleFormat {
UInt8,
Int16,
Int24,
Int32,
Float32,
Float64
UInt8,
Int16,
Int24,
Int32,
Float32,
Float64
};
struct WaveFormatInfo {
int bytesPerFrame;
SampleFormat sampleFormat;
int frameRate;
int64_t frameCount;
int channelCount;
std::streampos dataOffset;
int bytesPerFrame;
SampleFormat sampleFormat;
int frameRate;
int64_t frameCount;
int channelCount;
std::streampos dataOffset;
};
WaveFormatInfo getWaveFormatInfo(const std::filesystem::path& filePath);
class WaveFileReader : public AudioClip {
public:
WaveFileReader(const std::filesystem::path& filePath);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
WaveFileReader(const std::filesystem::path& filePath);
std::unique_ptr<AudioClip> clone() const override;
int getSampleRate() const override;
size_type size() const override;
private:
SampleReader createUnsafeSampleReader() const override;
SampleReader createUnsafeSampleReader() const override;
std::filesystem::path filePath;
WaveFormatInfo formatInfo;
std::filesystem::path filePath;
WaveFormatInfo formatInfo;
};
inline int WaveFileReader::getSampleRate() const {
return formatInfo.frameRate;
return formatInfo.frameRate;
}
inline AudioClip::size_type WaveFileReader::size() const {
return formatInfo.frameCount;
return formatInfo.frameCount;
}

View File

@ -10,20 +10,20 @@ using std::runtime_error;
using fmt::format;
std::unique_ptr<AudioClip> createAudioFileClip(path filePath) {
try {
const string extension =
boost::algorithm::to_lower_copy(filePath.extension().u8string());
if (extension == ".wav") {
return std::make_unique<WaveFileReader>(filePath);
}
if (extension == ".ogg") {
return std::make_unique<OggVorbisFileReader>(filePath);
}
throw runtime_error(format(
"Unsupported file extension '{}'. Supported extensions are '.wav' and '.ogg'.",
extension
));
} catch (...) {
std::throw_with_nested(runtime_error(format("Could not open sound file {}.", filePath.u8string())));
}
try {
const string extension =
boost::algorithm::to_lower_copy(filePath.extension().u8string());
if (extension == ".wav") {
return std::make_unique<WaveFileReader>(filePath);
}
if (extension == ".ogg") {
return std::make_unique<OggVorbisFileReader>(filePath);
}
throw runtime_error(format(
"Unsupported file extension '{}'. Supported extensions are '.wav' and '.ogg'.",
extension
));
} catch (...) {
std::throw_with_nested(runtime_error(format("Could not open sound file {}.", filePath.u8string())));
}
}

View File

@ -4,43 +4,43 @@
namespace little_endian {
template<typename Type, int bitsToRead = 8 * sizeof(Type)>
Type read(std::istream& stream) {
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
template<typename Type, int bitsToRead = 8 * sizeof(Type)>
Type read(std::istream& stream) {
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
Type result = 0;
char* p = reinterpret_cast<char*>(&result);
const int bytesToRead = bitsToRead / 8;
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
*(p + byteIndex) = static_cast<char>(stream.get());
}
return result;
}
Type result = 0;
char* p = reinterpret_cast<char*>(&result);
const int bytesToRead = bitsToRead / 8;
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
*(p + byteIndex) = static_cast<char>(stream.get());
}
return result;
}
template<typename Type, int bitsToWrite = 8 * sizeof(Type)>
void write(Type value, std::ostream& stream) {
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
template<typename Type, int bitsToWrite = 8 * sizeof(Type)>
void write(Type value, std::ostream& stream) {
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
char* p = reinterpret_cast<char*>(&value);
const int bytesToWrite = bitsToWrite / 8;
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
stream.put(*(p + byteIndex));
}
}
char* p = reinterpret_cast<char*>(&value);
const int bytesToWrite = bitsToWrite / 8;
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
stream.put(*(p + byteIndex));
}
}
constexpr uint32_t fourcc(
unsigned char c0,
unsigned char c1,
unsigned char c2,
unsigned char c3
) {
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
}
constexpr uint32_t fourcc(
unsigned char c0,
unsigned char c1,
unsigned char c2,
unsigned char c3
) {
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
}
inline std::string fourccToString(uint32_t fourcc) {
return std::string(reinterpret_cast<char*>(&fourcc), 4);
}
inline std::string fourccToString(uint32_t fourcc) {
return std::string(reinterpret_cast<char*>(&fourcc), 4);
}
}

View File

@ -6,53 +6,53 @@ using std::vector;
// Converts a float in the range -1..1 to a signed 16-bit int
inline int16_t floatSampleToInt16(float sample) {
sample = std::max(sample, -1.0f);
sample = std::min(sample, 1.0f);
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
sample = std::max(sample, -1.0f);
sample = std::min(sample, 1.0f);
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
}
void process16bitAudioClip(
const AudioClip& audioClip,
const function<void(const vector<int16_t>&)>& processBuffer,
size_t bufferCapacity,
ProgressSink& progressSink
const AudioClip& audioClip,
const function<void(const vector<int16_t>&)>& processBuffer,
size_t bufferCapacity,
ProgressSink& progressSink
) {
// Process entire sound stream
vector<int16_t> buffer;
buffer.reserve(bufferCapacity);
size_t sampleCount = 0;
auto it = audioClip.begin();
const auto end = audioClip.end();
do {
// Read to buffer
buffer.clear();
for (; buffer.size() < bufferCapacity && it != end; ++it) {
// Read sample to buffer
buffer.push_back(floatSampleToInt16(*it));
}
// Process entire sound stream
vector<int16_t> buffer;
buffer.reserve(bufferCapacity);
size_t sampleCount = 0;
auto it = audioClip.begin();
const auto end = audioClip.end();
do {
// Read to buffer
buffer.clear();
for (; buffer.size() < bufferCapacity && it != end; ++it) {
// Read sample to buffer
buffer.push_back(floatSampleToInt16(*it));
}
// Process buffer
processBuffer(buffer);
// Process buffer
processBuffer(buffer);
sampleCount += buffer.size();
progressSink.reportProgress(static_cast<double>(sampleCount) / static_cast<double>(audioClip.size()));
} while (!buffer.empty());
sampleCount += buffer.size();
progressSink.reportProgress(static_cast<double>(sampleCount) / static_cast<double>(audioClip.size()));
} while (!buffer.empty());
}
void process16bitAudioClip(
const AudioClip& audioClip,
const function<void(const vector<int16_t>&)>& processBuffer,
ProgressSink& progressSink
const AudioClip& audioClip,
const function<void(const vector<int16_t>&)>& processBuffer,
ProgressSink& progressSink
) {
const size_t capacity = 1600; // 0.1 second capacity
process16bitAudioClip(audioClip, processBuffer, capacity, progressSink);
const size_t capacity = 1600; // 0.1 second capacity
process16bitAudioClip(audioClip, processBuffer, capacity, progressSink);
}
vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip) {
vector<int16_t> result(static_cast<size_t>(audioClip.size()));
int index = 0;
for (float sample : audioClip) {
result[index++] = floatSampleToInt16(sample);
}
return result;
vector<int16_t> result(static_cast<size_t>(audioClip.size()));
int index = 0;
for (float sample : audioClip) {
result[index++] = floatSampleToInt16(sample);
}
return result;
}

View File

@ -6,16 +6,16 @@
#include "tools/progress.h"
void process16bitAudioClip(
const AudioClip& audioClip,
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
size_t bufferCapacity,
ProgressSink& progressSink
const AudioClip& audioClip,
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
size_t bufferCapacity,
ProgressSink& progressSink
);
void process16bitAudioClip(
const AudioClip& audioClip,
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
ProgressSink& progressSink
const AudioClip& audioClip,
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
ProgressSink& progressSink
);
std::vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip);

View File

@ -17,79 +17,79 @@ using std::runtime_error;
using std::unique_ptr;
JoiningBoundedTimeline<void> detectVoiceActivity(
const AudioClip& inputAudioClip,
ProgressSink& progressSink
const AudioClip& inputAudioClip,
ProgressSink& progressSink
) {
// Prepare audio for VAD
constexpr int webRtcSamplingRate = 8000;
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone()
| resample(webRtcSamplingRate)
| removeDcOffset();
// Prepare audio for VAD
constexpr int webRtcSamplingRate = 8000;
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone()
| resample(webRtcSamplingRate)
| removeDcOffset();
VadInst* vadHandle = WebRtcVad_Create();
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
VadInst* vadHandle = WebRtcVad_Create();
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
auto freeHandle = gsl::finally([&]() { WebRtcVad_Free(vadHandle); });
auto freeHandle = gsl::finally([&]() { WebRtcVad_Free(vadHandle); });
int error = WebRtcVad_Init(vadHandle);
if (error) throw runtime_error("Error initializing WebRTC VAD.");
int error = WebRtcVad_Init(vadHandle);
if (error) throw runtime_error("Error initializing WebRTC VAD.");
const int aggressiveness = 2; // 0..3. The higher, the more is cut off.
error = WebRtcVad_set_mode(vadHandle, aggressiveness);
if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness.");
const int aggressiveness = 2; // 0..3. The higher, the more is cut off.
error = WebRtcVad_set_mode(vadHandle, aggressiveness);
if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness.");
// Detect activity
JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange());
centiseconds time = 0_cs;
const size_t frameSize = webRtcSamplingRate / 100;
const auto processBuffer = [&](const vector<int16_t>& buffer) {
// WebRTC is picky regarding buffer size
if (buffer.size() < frameSize) return;
// Detect activity
JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange());
centiseconds time = 0_cs;
const size_t frameSize = webRtcSamplingRate / 100;
const auto processBuffer = [&](const vector<int16_t>& buffer) {
// WebRTC is picky regarding buffer size
if (buffer.size() < frameSize) return;
const int result = WebRtcVad_Process(
vadHandle,
webRtcSamplingRate,
buffer.data(),
buffer.size()
);
if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD.");
const int result = WebRtcVad_Process(
vadHandle,
webRtcSamplingRate,
buffer.data(),
buffer.size()
);
if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD.");
// Ignore the result of WebRtcVad_Process, instead directly interpret the internal VAD flag.
// The result of WebRtcVad_Process stays 1 for a number of frames after the last detected
// activity.
const bool isActive = reinterpret_cast<VadInstT*>(vadHandle)->vad == 1;
// Ignore the result of WebRtcVad_Process, instead directly interpret the internal VAD flag.
// The result of WebRtcVad_Process stays 1 for a number of frames after the last detected
// activity.
const bool isActive = reinterpret_cast<VadInstT*>(vadHandle)->vad == 1;
if (isActive) {
activity.set(time, time + 1_cs);
}
if (isActive) {
activity.set(time, time + 1_cs);
}
time += 1_cs;
};
process16bitAudioClip(*audioClip, processBuffer, frameSize, progressSink);
time += 1_cs;
};
process16bitAudioClip(*audioClip, processBuffer, frameSize, progressSink);
// Fill small gaps in activity
const centiseconds maxGap(10);
for (const auto& pair : getPairs(activity)) {
if (pair.second.getStart() - pair.first.getEnd() <= maxGap) {
activity.set(pair.first.getEnd(), pair.second.getStart());
}
}
// Fill small gaps in activity
const centiseconds maxGap(10);
for (const auto& pair : getPairs(activity)) {
if (pair.second.getStart() - pair.first.getEnd() <= maxGap) {
activity.set(pair.first.getEnd(), pair.second.getStart());
}
}
// Discard very short segments of activity
const centiseconds minSegmentLength(5);
for (const auto& segment : Timeline<void>(activity)) {
if (segment.getDuration() < minSegmentLength) {
activity.clear(segment.getTimeRange());
}
}
// Discard very short segments of activity
const centiseconds minSegmentLength(5);
for (const auto& segment : Timeline<void>(activity)) {
if (segment.getDuration() < minSegmentLength) {
activity.clear(segment.getTimeRange());
}
}
logging::debugFormat(
"Found {} sections of voice activity: {}",
activity.size(),
join(activity | transformed([](const Timed<void>& t) {
return format("{0}-{1}", t.getStart(), t.getEnd());
}), ", ")
);
logging::debugFormat(
"Found {} sections of voice activity: {}",
activity.size(),
join(activity | transformed([](const Timed<void>& t) {
return format("{0}-{1}", t.getStart(), t.getEnd());
}), ", ")
);
return activity;
return activity;
}

View File

@ -4,6 +4,6 @@
#include "tools/progress.h"
JoiningBoundedTimeline<void> detectVoiceActivity(
const AudioClip& audioClip,
ProgressSink& progressSink
const AudioClip& audioClip,
ProgressSink& progressSink
);

View File

@ -5,39 +5,39 @@
using namespace little_endian;
void createWaveFile(const AudioClip& audioClip, std::string fileName) {
// Open file
std::ofstream file;
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
file.open(fileName, std::ios::out | std::ios::binary);
// Open file
std::ofstream file;
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
file.open(fileName, std::ios::out | std::ios::binary);
// Write RIFF chunk
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
const uint32_t formatChunkSize = 16;
const uint16_t channelCount = 1;
const uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
const uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize);
const uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
write<uint32_t>(riffChunkSize, file);
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
// Write RIFF chunk
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
const uint32_t formatChunkSize = 16;
const uint16_t channelCount = 1;
const uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
const uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize);
const uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
write<uint32_t>(riffChunkSize, file);
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
// Write format chunk
write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
write<uint32_t>(formatChunkSize, file);
const uint16_t codec = 0x03; // 32-bit float
write<uint16_t>(codec, file);
write<uint16_t>(channelCount, file);
const uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate());
write<uint32_t>(frameRate, file);
const uint32_t bytesPerSecond = frameRate * frameSize;
write<uint32_t>(bytesPerSecond, file);
write<uint16_t>(frameSize, file);
const uint16_t bitsPerSample = 8 * sizeof(float);
write<uint16_t>(bitsPerSample, file);
// Write format chunk
write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
write<uint32_t>(formatChunkSize, file);
const uint16_t codec = 0x03; // 32-bit float
write<uint16_t>(codec, file);
write<uint16_t>(channelCount, file);
const uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate());
write<uint32_t>(frameRate, file);
const uint32_t bytesPerSecond = frameRate * frameSize;
write<uint32_t>(bytesPerSecond, file);
write<uint16_t>(frameSize, file);
const uint16_t bitsPerSample = 8 * sizeof(float);
write<uint16_t>(bitsPerSample, file);
// Write data chunk
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
write<uint32_t>(dataChunkSize, file);
for (float sample : audioClip) {
write<float>(sample, file);
}
// Write data chunk
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
write<uint32_t>(dataChunkSize, file);
for (float sample : audioClip) {
write<float>(sample, file);
}
}

View File

@ -4,89 +4,89 @@ using std::string;
using boost::optional;
PhoneConverter& PhoneConverter::get() {
static PhoneConverter converter;
return converter;
static PhoneConverter converter;
return converter;
}
string PhoneConverter::getTypeName() {
return "Phone";
return "Phone";
}
EnumConverter<Phone>::member_data PhoneConverter::getMemberData() {
return member_data {
{ Phone::AO, "AO" },
{ Phone::AA, "AA" },
{ Phone::IY, "IY" },
{ Phone::UW, "UW" },
{ Phone::EH, "EH" },
{ Phone::IH, "IH" },
{ Phone::UH, "UH" },
{ Phone::AH, "AH" },
{ Phone::Schwa, "Schwa" },
{ Phone::AE, "AE" },
{ Phone::EY, "EY" },
{ Phone::AY, "AY" },
{ Phone::OW, "OW" },
{ Phone::AW, "AW" },
{ Phone::OY, "OY" },
{ Phone::ER, "ER" },
return member_data {
{ Phone::AO, "AO" },
{ Phone::AA, "AA" },
{ Phone::IY, "IY" },
{ Phone::UW, "UW" },
{ Phone::EH, "EH" },
{ Phone::IH, "IH" },
{ Phone::UH, "UH" },
{ Phone::AH, "AH" },
{ Phone::Schwa, "Schwa" },
{ Phone::AE, "AE" },
{ Phone::EY, "EY" },
{ Phone::AY, "AY" },
{ Phone::OW, "OW" },
{ Phone::AW, "AW" },
{ Phone::OY, "OY" },
{ Phone::ER, "ER" },
{ Phone::P, "P" },
{ Phone::B, "B" },
{ Phone::T, "T" },
{ Phone::D, "D" },
{ Phone::K, "K" },
{ Phone::G, "G" },
{ Phone::CH, "CH" },
{ Phone::JH, "JH" },
{ Phone::F, "F" },
{ Phone::V, "V" },
{ Phone::TH, "TH" },
{ Phone::DH, "DH" },
{ Phone::S, "S" },
{ Phone::Z, "Z" },
{ Phone::SH, "SH" },
{ Phone::ZH, "ZH" },
{ Phone::HH, "HH" },
{ Phone::M, "M" },
{ Phone::N, "N" },
{ Phone::NG, "NG" },
{ Phone::L, "L" },
{ Phone::R, "R" },
{ Phone::Y, "Y" },
{ Phone::W, "W" },
{ Phone::P, "P" },
{ Phone::B, "B" },
{ Phone::T, "T" },
{ Phone::D, "D" },
{ Phone::K, "K" },
{ Phone::G, "G" },
{ Phone::CH, "CH" },
{ Phone::JH, "JH" },
{ Phone::F, "F" },
{ Phone::V, "V" },
{ Phone::TH, "TH" },
{ Phone::DH, "DH" },
{ Phone::S, "S" },
{ Phone::Z, "Z" },
{ Phone::SH, "SH" },
{ Phone::ZH, "ZH" },
{ Phone::HH, "HH" },
{ Phone::M, "M" },
{ Phone::N, "N" },
{ Phone::NG, "NG" },
{ Phone::L, "L" },
{ Phone::R, "R" },
{ Phone::Y, "Y" },
{ Phone::W, "W" },
{ Phone::Breath, "Breath" },
{ Phone::Cough, "Cough" },
{ Phone::Smack, "Smack" },
{ Phone::Noise, "Noise" }
};
{ Phone::Breath, "Breath" },
{ Phone::Cough, "Cough" },
{ Phone::Smack, "Smack" },
{ Phone::Noise, "Noise" }
};
}
optional<Phone> PhoneConverter::tryParse(const string& s) {
auto result = EnumConverter<Phone>::tryParse(s);
if (result) return result;
auto result = EnumConverter<Phone>::tryParse(s);
if (result) return result;
if (s == "+BREATH+") {
return Phone::Breath;
}
if (s == "+COUGH+") {
return Phone::Cough;
}
if (s == "+SMACK+") {
return Phone::Smack;
}
return Phone::Noise;
if (s == "+BREATH+") {
return Phone::Breath;
}
if (s == "+COUGH+") {
return Phone::Cough;
}
if (s == "+SMACK+") {
return Phone::Smack;
}
return Phone::Noise;
}
std::ostream& operator<<(std::ostream& stream, Phone value) {
return PhoneConverter::get().write(stream, value);
return PhoneConverter::get().write(stream, value);
}
std::istream& operator>>(std::istream& stream, Phone& value) {
return PhoneConverter::get().read(stream, value);
return PhoneConverter::get().read(stream, value);
}
bool isVowel(Phone phone) {
return phone <= Phone::LastVowel;
return phone <= Phone::LastVowel;
}

View File

@ -4,88 +4,88 @@
// Defines a subset of the Arpabet
enum class Phone {
/////////
// Vowels
/////////
// Vowels
// ... monophthongs
AO, // [ɔ] as in [o]ff, f[a]ll, fr[o]st
AA, // [ɑ] as in f[a]ther
IY, // [i] as in b[ee], sh[e]
UW, // [u] as in y[ou], n[ew], f[oo]d
EH, // [ɛ] as in r[e]d, m[e]n
IH, // [ɪ] as in b[i]g, w[i]n
UH, // [ʊ] as in sh[ou]ld, c[ou]ld
AH, // [ʌ] as in b[u]t, s[u]n
Schwa, // [ə] as in [a]lone, disc[u]s
AE, // [æ] as in [a]t, b[a]t
// ... monophthongs
AO, // [ɔ] as in [o]ff, f[a]ll, fr[o]st
AA, // [ɑ] as in f[a]ther
IY, // [i] as in b[ee], sh[e]
UW, // [u] as in y[ou], n[ew], f[oo]d
EH, // [ɛ] as in r[e]d, m[e]n
IH, // [ɪ] as in b[i]g, w[i]n
UH, // [ʊ] as in sh[ou]ld, c[ou]ld
AH, // [ʌ] as in b[u]t, s[u]n
Schwa, // [ə] as in [a]lone, disc[u]s
AE, // [æ] as in [a]t, b[a]t
// ... diphthongs
EY, // [eɪ] as in s[ay], [ei]ght
AY, // [aɪ] as in m[y], wh[y], r[i]de
OW, // [oʊ] as in sh[ow], c[oa]t
AW, // [aʊ] as in h[ow], n[ow]
OY, // [ɔɪ] as in b[oy], t[oy]
// ... diphthongs
EY, // [eɪ] as in s[ay], [ei]ght
AY, // [aɪ] as in m[y], wh[y], r[i]de
OW, // [oʊ] as in sh[ow], c[oa]t
AW, // [aʊ] as in h[ow], n[ow]
OY, // [ɔɪ] as in b[oy], t[oy]
// ... r-colored
ER, // [ɝ] as in h[er], b[ir]d, h[ur]t
LastVowel = ER,
// ... r-colored
ER, // [ɝ] as in h[er], b[ir]d, h[ur]t
LastVowel = ER,
/////////////
// Consonants
/////////////
// Consonants
// ... stops
P, // [p] as in [p]ay
B, // [b] as in [b]uy
T, // [t] as in [t]ake
D, // [d] as in [d]ay
K, // [k] as in [k]ey
G, // [g] as in [g]o
// ... stops
P, // [p] as in [p]ay
B, // [b] as in [b]uy
T, // [t] as in [t]ake
D, // [d] as in [d]ay
K, // [k] as in [k]ey
G, // [g] as in [g]o
// ... affricates
CH, // [tʃ] as in [ch]air
JH, // [dʒ] as in [j]ust
// ... affricates
CH, // [tʃ] as in [ch]air
JH, // [dʒ] as in [j]ust
// ... fricatives
F, // [f] as in [f]or
V, // [v] as in [v]ery
TH, // [θ] as in [th]anks
DH, // [ð] as in [th]at
S, // [s] as in [s]ay
Z, // [z] as in [z]oo
SH, // [ʃ] as in [sh]ow
ZH, // [ʒ] as in mea[s]ure, plea[s]ure
HH, // [h] as in [h]ouse
// ... fricatives
F, // [f] as in [f]or
V, // [v] as in [v]ery
TH, // [θ] as in [th]anks
DH, // [ð] as in [th]at
S, // [s] as in [s]ay
Z, // [z] as in [z]oo
SH, // [ʃ] as in [sh]ow
ZH, // [ʒ] as in mea[s]ure, plea[s]ure
HH, // [h] as in [h]ouse
// ... nasals
M, // [m] as in [m]an
N, // [n] as in [no]
NG, // [ŋ] as in si[ng]
// ... nasals
M, // [m] as in [m]an
N, // [n] as in [no]
NG, // [ŋ] as in si[ng]
// ... liquids
L, // [ɫ] as in [l]ate
R, // [r, ɹ] as in [r]un
// ... liquids
L, // [ɫ] as in [l]ate
R, // [r, ɹ] as in [r]un
// ... semivowels
Y, // [j] as in [y]es
W, // [w] as in [w]ay
// ... semivowels
Y, // [j] as in [y]es
W, // [w] as in [w]ay
/////////////
// Misc.
/////////////
// Misc.
Breath,
Cough,
Smack,
Noise
Breath,
Cough,
Smack,
Noise
};
class PhoneConverter : public EnumConverter<Phone> {
public:
static PhoneConverter& get();
static PhoneConverter& get();
protected:
std::string getTypeName() override;
member_data getMemberData() override;
std::string getTypeName() override;
member_data getMemberData() override;
public:
boost::optional<Phone> tryParse(const std::string& s) override;
boost::optional<Phone> tryParse(const std::string& s) override;
};
std::ostream& operator<<(std::ostream& stream, Phone value);

View File

@ -4,54 +4,54 @@ using std::string;
using std::set;
ShapeConverter& ShapeConverter::get() {
static ShapeConverter converter;
return converter;
static ShapeConverter converter;
return converter;
}
set<Shape> ShapeConverter::getBasicShapes() {
static const set<Shape> result = [] {
set<Shape> result;
for (int i = 0; i <= static_cast<int>(Shape::LastBasicShape); ++i) {
result.insert(static_cast<Shape>(i));
}
return result;
}();
return result;
static const set<Shape> result = [] {
set<Shape> result;
for (int i = 0; i <= static_cast<int>(Shape::LastBasicShape); ++i) {
result.insert(static_cast<Shape>(i));
}
return result;
}();
return result;
}
set<Shape> ShapeConverter::getExtendedShapes() {
static const set<Shape> result = [] {
set<Shape> result;
for (int i = static_cast<int>(Shape::LastBasicShape) + 1; i < static_cast<int>(Shape::EndSentinel); ++i) {
result.insert(static_cast<Shape>(i));
}
return result;
}();
return result;
static const set<Shape> result = [] {
set<Shape> result;
for (int i = static_cast<int>(Shape::LastBasicShape) + 1; i < static_cast<int>(Shape::EndSentinel); ++i) {
result.insert(static_cast<Shape>(i));
}
return result;
}();
return result;
}
string ShapeConverter::getTypeName() {
return "Shape";
return "Shape";
}
EnumConverter<Shape>::member_data ShapeConverter::getMemberData() {
return member_data {
{ Shape::A, "A" },
{ Shape::B, "B" },
{ Shape::C, "C" },
{ Shape::D, "D" },
{ Shape::E, "E" },
{ Shape::F, "F" },
{ Shape::G, "G" },
{ Shape::H, "H" },
{ Shape::X, "X" }
};
return member_data {
{ Shape::A, "A" },
{ Shape::B, "B" },
{ Shape::C, "C" },
{ Shape::D, "D" },
{ Shape::E, "E" },
{ Shape::F, "F" },
{ Shape::G, "G" },
{ Shape::H, "H" },
{ Shape::X, "X" }
};
}
std::ostream& operator<<(std::ostream& stream, Shape value) {
return ShapeConverter::get().write(stream, value);
return ShapeConverter::get().write(stream, value);
}
std::istream& operator>>(std::istream& stream, Shape& value) {
return ShapeConverter::get().read(stream, value);
return ShapeConverter::get().read(stream, value);
}

View File

@ -7,33 +7,33 @@
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
enum class Shape {
// Basic shapes
A, // Closed mouth (M, B, P)
B, // Clenched teeth (most consonants, some vowels like EE as in b[ee])
C, // Open mouth (vowels like m[e]n, s[u]n, s[a]y)
D, // Mouth wide open (vowels like f[a]ther, b[a]t, wh[y])
E, // Rounded mouth (vowels like [o]ff)
F, // Puckered lips (y[ou], b[o]y, [w]ay)
LastBasicShape = F,
// Basic shapes
A, // Closed mouth (M, B, P)
B, // Clenched teeth (most consonants, some vowels like EE as in b[ee])
C, // Open mouth (vowels like m[e]n, s[u]n, s[a]y)
D, // Mouth wide open (vowels like f[a]ther, b[a]t, wh[y])
E, // Rounded mouth (vowels like [o]ff)
F, // Puckered lips (y[ou], b[o]y, [w]ay)
LastBasicShape = F,
// Extended shapes
// Extended shapes
G, // "F", "V"
H, // "L"
X, // Idle
G, // "F", "V"
H, // "L"
X, // Idle
EndSentinel
EndSentinel
};
class ShapeConverter : public EnumConverter<Shape> {
public:
static ShapeConverter& get();
static std::set<Shape> getBasicShapes();
static std::set<Shape> getExtendedShapes();
static ShapeConverter& get();
static std::set<Shape> getBasicShapes();
static std::set<Shape> getExtendedShapes();
protected:
std::string getTypeName() override;
member_data getMemberData() override;
std::string getTypeName() override;
member_data getMemberData() override;
};
std::ostream& operator<<(std::ostream& stream, Shape value);
@ -41,7 +41,7 @@ std::ostream& operator<<(std::ostream& stream, Shape value);
std::istream& operator>>(std::istream& stream, Shape& value);
inline bool isClosed(Shape shape) {
return shape == Shape::A || shape == Shape::X;
return shape == Shape::A || shape == Shape::X;
}
// A set of mouth shapes.

View File

@ -7,66 +7,66 @@ using std::chrono::duration_cast;
using std::string;
DatExporter::DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair) :
frameRate(frameRate),
convertToPrestonBlair(convertToPrestonBlair),
prestonBlairShapeNames {
{ Shape::A, "MBP" },
{ Shape::B, "etc" },
{ Shape::C, "E" },
{ Shape::D, "AI" },
{ Shape::E, "O" },
{ Shape::F, "U" },
{ Shape::G, "FV" },
{ Shape::H, "L" },
{ Shape::X, "rest" },
}
frameRate(frameRate),
convertToPrestonBlair(convertToPrestonBlair),
prestonBlairShapeNames {
{ Shape::A, "MBP" },
{ Shape::B, "etc" },
{ Shape::C, "E" },
{ Shape::D, "AI" },
{ Shape::E, "O" },
{ Shape::F, "U" },
{ Shape::G, "FV" },
{ Shape::H, "L" },
{ Shape::X, "rest" },
}
{
// Animation works with a fixed frame rate of 100.
// Downsampling to much less than 25 fps may result in dropped frames.
// Upsampling to more than 100 fps doesn't make sense.
const double minFrameRate = 24.0;
const double maxFrameRate = 100.0;
// Animation works with a fixed frame rate of 100.
// Downsampling to much less than 25 fps may result in dropped frames.
// Upsampling to more than 100 fps doesn't make sense.
const double minFrameRate = 24.0;
const double maxFrameRate = 100.0;
if (frameRate < minFrameRate || frameRate > maxFrameRate) {
throw std::runtime_error(fmt::format("Frame rate must be between {} and {} fps.", minFrameRate, maxFrameRate));
}
if (frameRate < minFrameRate || frameRate > maxFrameRate) {
throw std::runtime_error(fmt::format("Frame rate must be between {} and {} fps.", minFrameRate, maxFrameRate));
}
if (convertToPrestonBlair) {
for (Shape shape : targetShapeSet) {
if (prestonBlairShapeNames.find(shape) == prestonBlairShapeNames.end()) {
throw std::runtime_error(fmt::format("Mouth shape {} cannot be converted to Preston Blair shape names."));
}
}
}
if (convertToPrestonBlair) {
for (Shape shape : targetShapeSet) {
if (prestonBlairShapeNames.find(shape) == prestonBlairShapeNames.end()) {
throw std::runtime_error(fmt::format("Mouth shape {} cannot be converted to Preston Blair shape names."));
}
}
}
}
void DatExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
outputStream << "MohoSwitch1" << "\n";
outputStream << "MohoSwitch1" << "\n";
// Output shapes with start times
int lastFrameNumber = 0;
for (auto& timedShape : input.animation) {
const int frameNumber = toFrameNumber(timedShape.getStart());
if (frameNumber == lastFrameNumber) continue;
// Output shapes with start times
int lastFrameNumber = 0;
for (auto& timedShape : input.animation) {
const int frameNumber = toFrameNumber(timedShape.getStart());
if (frameNumber == lastFrameNumber) continue;
const string shapeName = toString(timedShape.getValue());
outputStream << frameNumber << " " << shapeName << "\n";
lastFrameNumber = frameNumber;
}
const string shapeName = toString(timedShape.getValue());
outputStream << frameNumber << " " << shapeName << "\n";
lastFrameNumber = frameNumber;
}
// Output closed mouth with end time
int frameNumber = toFrameNumber(input.animation.getRange().getEnd());
if (frameNumber == lastFrameNumber) ++frameNumber;
const string shapeName = toString(convertToTargetShapeSet(Shape::X, input.targetShapeSet));
outputStream << frameNumber << " " << shapeName << "\n";
// Output closed mouth with end time
int frameNumber = toFrameNumber(input.animation.getRange().getEnd());
if (frameNumber == lastFrameNumber) ++frameNumber;
const string shapeName = toString(convertToTargetShapeSet(Shape::X, input.targetShapeSet));
outputStream << frameNumber << " " << shapeName << "\n";
}
string DatExporter::toString(Shape shape) const {
return convertToPrestonBlair
? prestonBlairShapeNames.at(shape)
: boost::lexical_cast<std::string>(shape);
return convertToPrestonBlair
? prestonBlairShapeNames.at(shape)
: boost::lexical_cast<std::string>(shape);
}
int DatExporter::toFrameNumber(centiseconds time) const {
return 1 + static_cast<int>(frameRate * duration_cast<duration<double>>(time).count());
return 1 + static_cast<int>(frameRate * duration_cast<duration<double>>(time).count());
}

View File

@ -8,14 +8,14 @@
// Exporter for Moho's switch data file format
class DatExporter : public Exporter {
public:
DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair);
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair);
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
private:
int toFrameNumber(centiseconds time) const;
std::string toString(Shape shape) const;
int toFrameNumber(centiseconds time) const;
std::string toString(Shape shape) const;
double frameRate;
bool convertToPrestonBlair;
std::map<Shape, std::string> prestonBlairShapeNames;
double frameRate;
bool convertToPrestonBlair;
std::map<Shape, std::string> prestonBlairShapeNames;
};

View File

@ -6,21 +6,21 @@
class ExporterInput {
public:
ExporterInput(
const std::filesystem::path& inputFilePath,
const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet) :
inputFilePath(inputFilePath),
animation(animation),
targetShapeSet(targetShapeSet) {}
ExporterInput(
const std::filesystem::path& inputFilePath,
const JoiningContinuousTimeline<Shape>& animation,
const ShapeSet& targetShapeSet) :
inputFilePath(inputFilePath),
animation(animation),
targetShapeSet(targetShapeSet) {}
std::filesystem::path inputFilePath;
JoiningContinuousTimeline<Shape> animation;
ShapeSet targetShapeSet;
std::filesystem::path inputFilePath;
JoiningContinuousTimeline<Shape> animation;
ShapeSet targetShapeSet;
};
class Exporter {
public:
virtual ~Exporter() {}
virtual void exportAnimation(const ExporterInput& input, std::ostream& outputStream) = 0;
virtual ~Exporter() {}
virtual void exportAnimation(const ExporterInput& input, std::ostream& outputStream) = 0;
};

View File

@ -5,24 +5,24 @@
using std::string;
void JsonExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
// Export as JSON.
// I'm not using a library because the code is short enough without one and it lets me control
// the formatting.
outputStream << "{\n";
outputStream << " \"metadata\": {\n";
outputStream << " \"soundFile\": \"" << escapeJsonString(absolute(input.inputFilePath).u8string()) << "\",\n";
outputStream << " \"duration\": " << formatDuration(input.animation.getRange().getDuration()) << "\n";
outputStream << " },\n";
outputStream << " \"mouthCues\": [\n";
bool isFirst = true;
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
if (!isFirst) outputStream << ",\n";
isFirst = false;
outputStream << " { \"start\": " << formatDuration(timedShape.getStart())
<< ", \"end\": " << formatDuration(timedShape.getEnd())
<< ", \"value\": \"" << timedShape.getValue() << "\" }";
}
outputStream << "\n";
outputStream << " ]\n";
outputStream << "}\n";
// Export as JSON.
// I'm not using a library because the code is short enough without one and it lets me control
// the formatting.
outputStream << "{\n";
outputStream << " \"metadata\": {\n";
outputStream << " \"soundFile\": \"" << escapeJsonString(absolute(input.inputFilePath).u8string()) << "\",\n";
outputStream << " \"duration\": " << formatDuration(input.animation.getRange().getDuration()) << "\n";
outputStream << " },\n";
outputStream << " \"mouthCues\": [\n";
bool isFirst = true;
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
if (!isFirst) outputStream << ",\n";
isFirst = false;
outputStream << " { \"start\": " << formatDuration(timedShape.getStart())
<< ", \"end\": " << formatDuration(timedShape.getEnd())
<< ", \"value\": \"" << timedShape.getValue() << "\" }";
}
outputStream << "\n";
outputStream << " ]\n";
outputStream << "}\n";
}

View File

@ -4,5 +4,5 @@
class JsonExporter : public Exporter {
public:
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
};

View File

@ -2,19 +2,19 @@
#include "animation/targetShapeSet.h"
void TsvExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
// Output shapes with start times
for (auto& timedShape : input.animation) {
outputStream
<< formatDuration(timedShape.getStart())
<< "\t"
<< timedShape.getValue()
<< "\n";
}
// Output shapes with start times
for (auto& timedShape : input.animation) {
outputStream
<< formatDuration(timedShape.getStart())
<< "\t"
<< timedShape.getValue()
<< "\n";
}
// Output closed mouth with end time
outputStream
<< formatDuration(input.animation.getRange().getEnd())
<< "\t"
<< convertToTargetShapeSet(Shape::X, input.targetShapeSet)
<< "\n";
// Output closed mouth with end time
outputStream
<< formatDuration(input.animation.getRange().getEnd())
<< "\t"
<< convertToTargetShapeSet(Shape::X, input.targetShapeSet)
<< "\n";
}

View File

@ -4,6 +4,6 @@
class TsvExporter : public Exporter {
public:
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
};

View File

@ -8,33 +8,33 @@ using std::string;
using boost::property_tree::ptree;
void XmlExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
ptree tree;
ptree tree;
// Add metadata
tree.put("rhubarbResult.metadata.soundFile", absolute(input.inputFilePath).u8string());
tree.put(
"rhubarbResult.metadata.duration",
formatDuration(input.animation.getRange().getDuration())
);
// Add metadata
tree.put("rhubarbResult.metadata.soundFile", absolute(input.inputFilePath).u8string());
tree.put(
"rhubarbResult.metadata.duration",
formatDuration(input.animation.getRange().getDuration())
);
// Add mouth cues
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
ptree& mouthCueElement = tree.add(
"rhubarbResult.mouthCues.mouthCue",
timedShape.getValue()
);
mouthCueElement.put("<xmlattr>.start", formatDuration(timedShape.getStart()));
mouthCueElement.put("<xmlattr>.end", formatDuration(timedShape.getEnd()));
}
// Add mouth cues
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
ptree& mouthCueElement = tree.add(
"rhubarbResult.mouthCues.mouthCue",
timedShape.getValue()
);
mouthCueElement.put("<xmlattr>.start", formatDuration(timedShape.getStart()));
mouthCueElement.put("<xmlattr>.end", formatDuration(timedShape.getEnd()));
}
#ifndef BOOST_VERSION //present in version.hpp
#error "Could not detect Boost version."
#ifndef BOOST_VERSION //present in version.hpp
#error "Could not detect Boost version."
#endif
#if BOOST_VERSION < 105600 // Support legacy syntax
using writer_setting = boost::property_tree::xml_writer_settings<char>;
using writer_setting = boost::property_tree::xml_writer_settings<char>;
#else
using writer_setting = boost::property_tree::xml_writer_settings<string>;
using writer_setting = boost::property_tree::xml_writer_settings<string>;
#endif
write_xml(outputStream, tree, writer_setting(' ', 2));
write_xml(outputStream, tree, writer_setting(' ', 2));
}

View File

@ -4,5 +4,5 @@
class XmlExporter : public Exporter {
public:
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
};

View File

@ -3,14 +3,14 @@
// Makes sure there is at least one mouth shape
std::vector<Timed<Shape>> dummyShapeIfEmpty(
const JoiningTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
const JoiningTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
) {
std::vector<Timed<Shape>> result;
std::copy(animation.begin(), animation.end(), std::back_inserter(result));
if (result.empty()) {
// Add zero-length empty mouth
result.emplace_back(0_cs, 0_cs, convertToTargetShapeSet(Shape::X, targetShapeSet));
}
return result;
std::vector<Timed<Shape>> result;
std::copy(animation.begin(), animation.end(), std::back_inserter(result));
if (result.empty()) {
// Add zero-length empty mouth
result.emplace_back(0_cs, 0_cs, convertToTargetShapeSet(Shape::X, targetShapeSet));
}
return result;
}

View File

@ -5,6 +5,6 @@
// Makes sure there is at least one mouth shape
std::vector<Timed<Shape>> dummyShapeIfEmpty(
const JoiningTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
const JoiningTimeline<Shape>& animation,
const ShapeSet& targetShapeSet
);

View File

@ -9,27 +9,27 @@ using std::string;
using std::filesystem::path;
JoiningContinuousTimeline<Shape> animateAudioClip(
const AudioClip& audioClip,
const optional<string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink)
const AudioClip& audioClip,
const optional<string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink)
{
const BoundedTimeline<Phone> phones =
recognizer.recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
JoiningContinuousTimeline<Shape> result = animate(phones, targetShapeSet);
return result;
const BoundedTimeline<Phone> phones =
recognizer.recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
JoiningContinuousTimeline<Shape> result = animate(phones, targetShapeSet);
return result;
}
JoiningContinuousTimeline<Shape> animateWaveFile(
path filePath,
const optional<string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink)
path filePath,
const optional<string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink)
{
const auto audioClip = createAudioFileClip(filePath);
return animateAudioClip(*audioClip, dialog, recognizer, targetShapeSet, maxThreadCount, progressSink);
const auto audioClip = createAudioFileClip(filePath);
return animateAudioClip(*audioClip, dialog, recognizer, targetShapeSet, maxThreadCount, progressSink);
}

View File

@ -9,17 +9,17 @@
#include "recognition/Recognizer.h"
JoiningContinuousTimeline<Shape> animateAudioClip(
const AudioClip& audioClip,
const boost::optional<std::string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink);
const AudioClip& audioClip,
const boost::optional<std::string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink);
JoiningContinuousTimeline<Shape> animateWaveFile(
std::filesystem::path filePath,
const boost::optional<std::string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink);
std::filesystem::path filePath,
const boost::optional<std::string>& dialog,
const Recognizer& recognizer,
const ShapeSet& targetShapeSet,
int maxThreadCount,
ProgressSink& progressSink);

View File

@ -10,30 +10,30 @@ using std::string;
namespace logging {
// Returns an int representing the current thread.
// This used to be a simple thread_local variable, but Xcode doesn't support that yet
int getThreadCounter() {
using thread_id = std::thread::id;
// Returns an int representing the current thread.
// This used to be a simple thread_local variable, but Xcode doesn't support that yet
int getThreadCounter() {
using thread_id = std::thread::id;
static std::mutex counterMutex;
lock_guard<std::mutex> lock(counterMutex);
static std::mutex counterMutex;
lock_guard<std::mutex> lock(counterMutex);
static unordered_map<thread_id, int> threadCounters;
static int lastThreadId = 0;
thread_id threadId = std::this_thread::get_id();
if (threadCounters.find(threadId) == threadCounters.end()) {
threadCounters.insert({ threadId, ++lastThreadId });
}
return threadCounters.find(threadId)->second;
}
static unordered_map<thread_id, int> threadCounters;
static int lastThreadId = 0;
thread_id threadId = std::this_thread::get_id();
if (threadCounters.find(threadId) == threadCounters.end()) {
threadCounters.insert({ threadId, ++lastThreadId });
}
return threadCounters.find(threadId)->second;
}
Entry::Entry(Level level, const string& message) :
timestamp(),
level(level),
message(message)
{
time(&timestamp);
this->threadCounter = getThreadCounter();
}
Entry::Entry(Level level, const string& message) :
timestamp(),
level(level),
message(message)
{
time(&timestamp);
this->threadCounter = getThreadCounter();
}
}

View File

@ -3,15 +3,15 @@
#include "Level.h"
namespace logging {
struct Entry {
Entry(Level level, const std::string& message);
virtual ~Entry() = default;
struct Entry {
Entry(Level level, const std::string& message);
virtual ~Entry() = default;
time_t timestamp;
int threadCounter;
Level level;
std::string message;
};
time_t timestamp;
int threadCounter;
Level level;
std::string message;
};
}

View File

@ -5,10 +5,10 @@
namespace logging {
class Formatter {
public:
virtual ~Formatter() = default;
virtual std::string format(const Entry& entry) = 0;
};
class Formatter {
public:
virtual ~Formatter() = default;
virtual std::string format(const Entry& entry) = 0;
};
}

View File

@ -4,32 +4,32 @@ using std::string;
namespace logging {
LevelConverter& LevelConverter::get() {
static LevelConverter converter;
return converter;
}
LevelConverter& LevelConverter::get() {
static LevelConverter converter;
return converter;
}
string LevelConverter::getTypeName() {
return "Level";
}
string LevelConverter::getTypeName() {
return "Level";
}
EnumConverter<Level>::member_data LevelConverter::getMemberData() {
return member_data {
{ Level::Trace, "Trace" },
{ Level::Debug, "Debug" },
{ Level::Info, "Info" },
{ Level::Warn, "Warn" },
{ Level::Error, "Error" },
{ Level::Fatal, "Fatal" }
};
}
EnumConverter<Level>::member_data LevelConverter::getMemberData() {
return member_data {
{ Level::Trace, "Trace" },
{ Level::Debug, "Debug" },
{ Level::Info, "Info" },
{ Level::Warn, "Warn" },
{ Level::Error, "Error" },
{ Level::Fatal, "Fatal" }
};
}
std::ostream& operator<<(std::ostream& stream, Level value) {
return LevelConverter::get().write(stream, value);
}
std::ostream& operator<<(std::ostream& stream, Level value) {
return LevelConverter::get().write(stream, value);
}
std::istream& operator >>(std::istream& stream, Level& value) {
return LevelConverter::get().read(stream, value);
}
std::istream& operator >>(std::istream& stream, Level& value) {
return LevelConverter::get().read(stream, value);
}
}

View File

@ -4,26 +4,26 @@
namespace logging {
enum class Level {
Trace,
Debug,
Info,
Warn,
Error,
Fatal,
EndSentinel
};
enum class Level {
Trace,
Debug,
Info,
Warn,
Error,
Fatal,
EndSentinel
};
class LevelConverter : public EnumConverter<Level> {
public:
static LevelConverter& get();
protected:
std::string getTypeName() override;
member_data getMemberData() override;
};
class LevelConverter : public EnumConverter<Level> {
public:
static LevelConverter& get();
protected:
std::string getTypeName() override;
member_data getMemberData() override;
};
std::ostream& operator<<(std::ostream& stream, Level value);
std::ostream& operator<<(std::ostream& stream, Level value);
std::istream& operator >>(std::istream& stream, Level& value);
std::istream& operator >>(std::istream& stream, Level& value);
}

View File

@ -4,10 +4,10 @@
namespace logging {
class Sink {
public:
virtual ~Sink() = default;
virtual void receive(const Entry& entry) = 0;
};
class Sink {
public:
virtual ~Sink() = default;
virtual void receive(const Entry& entry) = 0;
};
}

View File

@ -7,17 +7,17 @@ using std::string;
namespace logging {
string SimpleConsoleFormatter::format(const Entry& entry) {
return fmt::format("[{0}] {1}", entry.level, entry.message);
}
string SimpleConsoleFormatter::format(const Entry& entry) {
return fmt::format("[{0}] {1}", entry.level, entry.message);
}
string SimpleFileFormatter::format(const Entry& entry) {
return fmt::format(
"[{0}] {1} {2}",
formatTime(entry.timestamp, "%F %H:%M:%S"),
entry.threadCounter,
consoleFormatter.format(entry)
);
}
string SimpleFileFormatter::format(const Entry& entry) {
return fmt::format(
"[{0}] {1} {2}",
formatTime(entry.timestamp, "%F %H:%M:%S"),
entry.threadCounter,
consoleFormatter.format(entry)
);
}
}

View File

@ -4,16 +4,16 @@
namespace logging {
class SimpleConsoleFormatter : public Formatter {
public:
std::string format(const Entry& entry) override;
};
class SimpleConsoleFormatter : public Formatter {
public:
std::string format(const Entry& entry) override;
};
class SimpleFileFormatter : public Formatter {
public:
std::string format(const Entry& entry) override;
private:
SimpleConsoleFormatter consoleFormatter;
};
class SimpleFileFormatter : public Formatter {
public:
std::string format(const Entry& entry) override;
private:
SimpleConsoleFormatter consoleFormatter;
};
}

View File

@ -10,46 +10,46 @@ using std::shared_ptr;
using std::lock_guard;
std::mutex& getLogMutex() {
static std::mutex mutex;
return mutex;
static std::mutex mutex;
return mutex;
}
vector<shared_ptr<Sink>>& getSinks() {
static vector<shared_ptr<Sink>> sinks;
return sinks;
static vector<shared_ptr<Sink>> sinks;
return sinks;
}
bool logging::addSink(shared_ptr<Sink> sink) {
lock_guard<std::mutex> lock(getLogMutex());
lock_guard<std::mutex> lock(getLogMutex());
auto& sinks = getSinks();
if (std::find(sinks.begin(), sinks.end(), sink) == sinks.end()) {
sinks.push_back(sink);
return true;
}
return false;
auto& sinks = getSinks();
if (std::find(sinks.begin(), sinks.end(), sink) == sinks.end()) {
sinks.push_back(sink);
return true;
}
return false;
}
bool logging::removeSink(std::shared_ptr<Sink> sink) {
lock_guard<std::mutex> lock(getLogMutex());
lock_guard<std::mutex> lock(getLogMutex());
auto& sinks = getSinks();
const auto it = std::find(sinks.begin(), sinks.end(), sink);
if (it != sinks.end()) {
sinks.erase(it);
return true;
}
return false;
auto& sinks = getSinks();
const auto it = std::find(sinks.begin(), sinks.end(), sink);
if (it != sinks.end()) {
sinks.erase(it);
return true;
}
return false;
}
void logging::log(const Entry& entry) {
lock_guard<std::mutex> lock(getLogMutex());
for (auto& sink : getSinks()) {
sink->receive(entry);
}
lock_guard<std::mutex> lock(getLogMutex());
for (auto& sink : getSinks()) {
sink->receive(entry);
}
}
void logging::log(Level level, const string& message) {
const Entry entry = Entry(level, message);
log(entry);
const Entry entry = Entry(level, message);
log(entry);
}

View File

@ -6,32 +6,32 @@
namespace logging {
bool addSink(std::shared_ptr<Sink> sink);
bool addSink(std::shared_ptr<Sink> sink);
bool removeSink(std::shared_ptr<Sink> sink);
bool removeSink(std::shared_ptr<Sink> sink);
void log(const Entry& entry);
void log(const Entry& entry);
void log(Level level, const std::string& message);
void log(Level level, const std::string& message);
template<typename... Args>
void logFormat(Level level, fmt::CStringRef format, const Args&... args) {
log(level, fmt::format(format, args...));
}
template<typename... Args>
void logFormat(Level level, fmt::CStringRef format, const Args&... args) {
log(level, fmt::format(format, args...));
}
#define LOG_WITH_LEVEL(levelName, levelEnum) \
inline void levelName(const std::string& message) { \
log(Level::levelEnum, message); \
} \
template <typename... Args> \
void levelName ## Format(fmt::CStringRef format, const Args&... args) { \
logFormat(Level::levelEnum, format, args...); \
}
inline void levelName(const std::string& message) { \
log(Level::levelEnum, message); \
} \
template <typename... Args> \
void levelName ## Format(fmt::CStringRef format, const Args&... args) { \
logFormat(Level::levelEnum, format, args...); \
}
LOG_WITH_LEVEL(trace, Trace)
LOG_WITH_LEVEL(debug, Debug)
LOG_WITH_LEVEL(info, Info)
LOG_WITH_LEVEL(warn, Warn)
LOG_WITH_LEVEL(error, Error)
LOG_WITH_LEVEL(fatal, Fatal)
LOG_WITH_LEVEL(trace, Trace)
LOG_WITH_LEVEL(debug, Debug)
LOG_WITH_LEVEL(info, Info)
LOG_WITH_LEVEL(warn, Warn)
LOG_WITH_LEVEL(error, Error)
LOG_WITH_LEVEL(fatal, Fatal)
}

View File

@ -7,29 +7,29 @@ using std::shared_ptr;
namespace logging {
LevelFilter::LevelFilter(shared_ptr<Sink> innerSink, Level minLevel) :
innerSink(innerSink),
minLevel(minLevel)
{}
LevelFilter::LevelFilter(shared_ptr<Sink> innerSink, Level minLevel) :
innerSink(innerSink),
minLevel(minLevel)
{}
void LevelFilter::receive(const Entry& entry) {
if (entry.level >= minLevel) {
innerSink->receive(entry);
}
}
void LevelFilter::receive(const Entry& entry) {
if (entry.level >= minLevel) {
innerSink->receive(entry);
}
}
StreamSink::StreamSink(shared_ptr<std::ostream> stream, shared_ptr<Formatter> formatter) :
stream(stream),
formatter(formatter)
{}
StreamSink::StreamSink(shared_ptr<std::ostream> stream, shared_ptr<Formatter> formatter) :
stream(stream),
formatter(formatter)
{}
void StreamSink::receive(const Entry& entry) {
const string line = formatter->format(entry);
*stream << line << std::endl;
}
void StreamSink::receive(const Entry& entry) {
const string line = formatter->format(entry);
*stream << line << std::endl;
}
StdErrSink::StdErrSink(shared_ptr<Formatter> formatter) :
StreamSink(std::shared_ptr<std::ostream>(&std::cerr, [](void*) {}), formatter)
{}
StdErrSink::StdErrSink(shared_ptr<Formatter> formatter) :
StreamSink(std::shared_ptr<std::ostream>(&std::cerr, [](void*) {}), formatter)
{}
}

View File

@ -5,29 +5,29 @@
#include "Formatter.h"
namespace logging {
enum class Level;
enum class Level;
class LevelFilter : public Sink {
public:
LevelFilter(std::shared_ptr<Sink> innerSink, Level minLevel);
void receive(const Entry& entry) override;
private:
std::shared_ptr<Sink> innerSink;
Level minLevel;
};
class LevelFilter : public Sink {
public:
LevelFilter(std::shared_ptr<Sink> innerSink, Level minLevel);
void receive(const Entry& entry) override;
private:
std::shared_ptr<Sink> innerSink;
Level minLevel;
};
class StreamSink : public Sink {
public:
StreamSink(std::shared_ptr<std::ostream> stream, std::shared_ptr<Formatter> formatter);
void receive(const Entry& entry) override;
private:
std::shared_ptr<std::ostream> stream;
std::shared_ptr<Formatter> formatter;
};
class StreamSink : public Sink {
public:
StreamSink(std::shared_ptr<std::ostream> stream, std::shared_ptr<Formatter> formatter);
void receive(const Entry& entry) override;
private:
std::shared_ptr<std::ostream> stream;
std::shared_ptr<Formatter> formatter;
};
class StdErrSink : public StreamSink {
public:
explicit StdErrSink(std::shared_ptr<Formatter> formatter);
};
class StdErrSink : public StreamSink {
public:
explicit StdErrSink(std::shared_ptr<Formatter> formatter);
};
}

View File

@ -11,103 +11,103 @@ using std::string;
using boost::optional;
static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) {
UNUSED(dialog);
UNUSED(dialog);
lambda_unique_ptr<cmd_ln_t> config(
cmd_ln_init(
nullptr, ps_args(), true,
// Set acoustic model
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
// Set phonetic language model
"-allphone", (getSphinxModelDirectory() / "en-us-phone.lm.bin").u8string().c_str(),
"-allphone_ci", "yes",
// Set language model probability weight.
// Low values (<= 0.4) can lead to fluttering animation.
// High values (>= 1.0) can lead to imprecise or freezing animation.
"-lw", "0.8",
// Add noise against zero silence
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
"-dither", "yes",
// Disable VAD -- we're doing that ourselves
"-remove_silence", "no",
// Perform per-utterance cepstral mean normalization
"-cmn", "batch",
lambda_unique_ptr<cmd_ln_t> config(
cmd_ln_init(
nullptr, ps_args(), true,
// Set acoustic model
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
// Set phonetic language model
"-allphone", (getSphinxModelDirectory() / "en-us-phone.lm.bin").u8string().c_str(),
"-allphone_ci", "yes",
// Set language model probability weight.
// Low values (<= 0.4) can lead to fluttering animation.
// High values (>= 1.0) can lead to imprecise or freezing animation.
"-lw", "0.8",
// Add noise against zero silence
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
"-dither", "yes",
// Disable VAD -- we're doing that ourselves
"-remove_silence", "no",
// Perform per-utterance cepstral mean normalization
"-cmn", "batch",
// The following settings are recommended at
// http://cmusphinx.sourceforge.net/wiki/phonemerecognition
// The following settings are recommended at
// http://cmusphinx.sourceforge.net/wiki/phonemerecognition
// Set beam width applied to every frame in Viterbi search
"-beam", "1e-20",
// Set beam width applied to phone transitions
"-pbeam", "1e-20",
nullptr),
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
if (!config) throw runtime_error("Error creating configuration.");
// Set beam width applied to every frame in Viterbi search
"-beam", "1e-20",
// Set beam width applied to phone transitions
"-pbeam", "1e-20",
nullptr),
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
if (!config) throw runtime_error("Error creating configuration.");
lambda_unique_ptr<ps_decoder_t> decoder(
ps_init(config.get()),
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
if (!decoder) throw runtime_error("Error creating speech decoder.");
lambda_unique_ptr<ps_decoder_t> decoder(
ps_init(config.get()),
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
if (!decoder) throw runtime_error("Error creating speech decoder.");
return decoder;
return decoder;
}
static Timeline<Phone> utteranceToPhones(
const AudioClip& audioClip,
TimeRange utteranceTimeRange,
ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink
const AudioClip& audioClip,
TimeRange utteranceTimeRange,
ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink
) {
// Pad time range to give PocketSphinx some breathing room
TimeRange paddedTimeRange = utteranceTimeRange;
const centiseconds padding(3);
paddedTimeRange.grow(padding);
paddedTimeRange.trim(audioClip.getTruncatedRange());
// Pad time range to give PocketSphinx some breathing room
TimeRange paddedTimeRange = utteranceTimeRange;
const centiseconds padding(3);
paddedTimeRange.grow(padding);
paddedTimeRange.trim(audioClip.getTruncatedRange());
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
| segment(paddedTimeRange)
| resample(sphinxSampleRate);
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
| segment(paddedTimeRange)
| resample(sphinxSampleRate);
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
// Detect phones (returned as words)
BoundedTimeline<string> phoneStrings = recognizeWords(audioBuffer, decoder);
phoneStrings.shift(paddedTimeRange.getStart());
Timeline<Phone> utterancePhones;
for (const auto& timedPhoneString : phoneStrings) {
Phone phone = PhoneConverter::get().parse(timedPhoneString.getValue());
if (phone == Phone::AH && timedPhoneString.getDuration() < 6_cs) {
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
phone = Phone::Schwa;
}
utterancePhones.set(timedPhoneString.getTimeRange(), phone);
}
// Detect phones (returned as words)
BoundedTimeline<string> phoneStrings = recognizeWords(audioBuffer, decoder);
phoneStrings.shift(paddedTimeRange.getStart());
Timeline<Phone> utterancePhones;
for (const auto& timedPhoneString : phoneStrings) {
Phone phone = PhoneConverter::get().parse(timedPhoneString.getValue());
if (phone == Phone::AH && timedPhoneString.getDuration() < 6_cs) {
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
phone = Phone::Schwa;
}
utterancePhones.set(timedPhoneString.getTimeRange(), phone);
}
// Log raw phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("rawPhone", timedPhone);
}
// Log raw phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("rawPhone", timedPhone);
}
// Guess positions of noise sounds
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
for (const auto& noiseSound : noiseSounds) {
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
}
// Guess positions of noise sounds
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
for (const auto& noiseSound : noiseSounds) {
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
}
// Log phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("phone", timedPhone);
}
// Log phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("phone", timedPhone);
}
utteranceProgressSink.reportProgress(1.0);
utteranceProgressSink.reportProgress(1.0);
return utterancePhones;
return utterancePhones;
}
BoundedTimeline<Phone> PhoneticRecognizer::recognizePhones(
const AudioClip& inputAudioClip,
optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
const AudioClip& inputAudioClip,
optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const {
return ::recognizePhones(inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
return ::recognizePhones(inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
}

View File

@ -5,10 +5,10 @@
class PhoneticRecognizer : public Recognizer {
public:
BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip,
boost::optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const override;
BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip,
boost::optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const override;
};

View File

@ -27,316 +27,316 @@ using boost::optional;
using std::array;
bool dictionaryContains(dict_t& dictionary, const string& word) {
return dict_wordid(&dictionary, word.c_str()) != BAD_S3WID;
return dict_wordid(&dictionary, word.c_str()) != BAD_S3WID;
}
s3wid_t getWordId(const string& word, dict_t& dictionary) {
const s3wid_t wordId = dict_wordid(&dictionary, word.c_str());
if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word));
return wordId;
const s3wid_t wordId = dict_wordid(&dictionary, word.c_str());
if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word));
return wordId;
}
void addMissingDictionaryWords(const vector<string>& words, ps_decoder_t& decoder) {
map<string, string> missingPronunciations;
for (const string& word : words) {
if (!dictionaryContains(*decoder.dict, word)) {
string pronunciation;
for (Phone phone : wordToPhones(word)) {
if (pronunciation.length() > 0) pronunciation += " ";
pronunciation += PhoneConverter::get().toString(phone);
}
missingPronunciations[word] = pronunciation;
}
}
for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) {
const bool isLast = it == --missingPronunciations.end();
logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second);
ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast);
}
map<string, string> missingPronunciations;
for (const string& word : words) {
if (!dictionaryContains(*decoder.dict, word)) {
string pronunciation;
for (Phone phone : wordToPhones(word)) {
if (pronunciation.length() > 0) pronunciation += " ";
pronunciation += PhoneConverter::get().toString(phone);
}
missingPronunciations[word] = pronunciation;
}
}
for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) {
const bool isLast = it == --missingPronunciations.end();
logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second);
ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast);
}
}
lambda_unique_ptr<ngram_model_t> createDefaultLanguageModel(ps_decoder_t& decoder) {
path modelPath = getSphinxModelDirectory() / "en-us.lm.bin";
lambda_unique_ptr<ngram_model_t> result(
ngram_model_read(decoder.config, modelPath.u8string().c_str(), NGRAM_AUTO, decoder.lmath),
[](ngram_model_t* lm) { ngram_model_free(lm); });
if (!result) {
throw runtime_error(fmt::format("Error reading language model from {}.", modelPath.u8string()));
}
path modelPath = getSphinxModelDirectory() / "en-us.lm.bin";
lambda_unique_ptr<ngram_model_t> result(
ngram_model_read(decoder.config, modelPath.u8string().c_str(), NGRAM_AUTO, decoder.lmath),
[](ngram_model_t* lm) { ngram_model_free(lm); });
if (!result) {
throw runtime_error(fmt::format("Error reading language model from {}.", modelPath.u8string()));
}
return result;
return result;
}
lambda_unique_ptr<ngram_model_t> createDialogLanguageModel(
ps_decoder_t& decoder,
const string& dialog
ps_decoder_t& decoder,
const string& dialog
) {
// Split dialog into normalized words
vector<string> words = tokenizeText(
dialog,
[&](const string& word) { return dictionaryContains(*decoder.dict, word); }
);
// Split dialog into normalized words
vector<string> words = tokenizeText(
dialog,
[&](const string& word) { return dictionaryContains(*decoder.dict, word); }
);
// Add dialog-specific words to the dictionary
addMissingDictionaryWords(words, decoder);
// Add dialog-specific words to the dictionary
addMissingDictionaryWords(words, decoder);
// Create dialog-specific language model
words.insert(words.begin(), "<s>");
words.emplace_back("</s>");
return createLanguageModel(words, decoder);
// Create dialog-specific language model
words.insert(words.begin(), "<s>");
words.emplace_back("</s>");
return createLanguageModel(words, decoder);
}
lambda_unique_ptr<ngram_model_t> createBiasedLanguageModel(
ps_decoder_t& decoder,
const string& dialog
ps_decoder_t& decoder,
const string& dialog
) {
auto defaultLanguageModel = createDefaultLanguageModel(decoder);
auto dialogLanguageModel = createDialogLanguageModel(decoder, dialog);
constexpr int modelCount = 2;
array<ngram_model_t*, modelCount> languageModels {
defaultLanguageModel.get(),
dialogLanguageModel.get()
};
array<const char*, modelCount> modelNames { "defaultLM", "dialogLM" };
array<float, modelCount> modelWeights { 0.1f, 0.9f };
lambda_unique_ptr<ngram_model_t> result(
ngram_model_set_init(
nullptr,
languageModels.data(),
const_cast<char**>(modelNames.data()),
modelWeights.data(),
modelCount
),
[](ngram_model_t* lm) { ngram_model_free(lm); });
if (!result) {
throw runtime_error("Error creating biased language model.");
}
auto defaultLanguageModel = createDefaultLanguageModel(decoder);
auto dialogLanguageModel = createDialogLanguageModel(decoder, dialog);
constexpr int modelCount = 2;
array<ngram_model_t*, modelCount> languageModels {
defaultLanguageModel.get(),
dialogLanguageModel.get()
};
array<const char*, modelCount> modelNames { "defaultLM", "dialogLM" };
array<float, modelCount> modelWeights { 0.1f, 0.9f };
lambda_unique_ptr<ngram_model_t> result(
ngram_model_set_init(
nullptr,
languageModels.data(),
const_cast<char**>(modelNames.data()),
modelWeights.data(),
modelCount
),
[](ngram_model_t* lm) { ngram_model_free(lm); });
if (!result) {
throw runtime_error("Error creating biased language model.");
}
return result;
return result;
}
static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) {
lambda_unique_ptr<cmd_ln_t> config(
cmd_ln_init(
nullptr, ps_args(), true,
// Set acoustic model
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
// Set pronunciation dictionary
"-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").u8string().c_str(),
// Add noise against zero silence
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
"-dither", "yes",
// Disable VAD -- we're doing that ourselves
"-remove_silence", "no",
// Perform per-utterance cepstral mean normalization
"-cmn", "batch",
nullptr),
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
if (!config) throw runtime_error("Error creating configuration.");
lambda_unique_ptr<cmd_ln_t> config(
cmd_ln_init(
nullptr, ps_args(), true,
// Set acoustic model
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
// Set pronunciation dictionary
"-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").u8string().c_str(),
// Add noise against zero silence
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
"-dither", "yes",
// Disable VAD -- we're doing that ourselves
"-remove_silence", "no",
// Perform per-utterance cepstral mean normalization
"-cmn", "batch",
nullptr),
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
if (!config) throw runtime_error("Error creating configuration.");
lambda_unique_ptr<ps_decoder_t> decoder(
ps_init(config.get()),
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
if (!decoder) throw runtime_error("Error creating speech decoder.");
lambda_unique_ptr<ps_decoder_t> decoder(
ps_init(config.get()),
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
if (!decoder) throw runtime_error("Error creating speech decoder.");
// Set language model
lambda_unique_ptr<ngram_model_t> languageModel(dialog
? createBiasedLanguageModel(*decoder, *dialog)
: createDefaultLanguageModel(*decoder));
ps_set_lm(decoder.get(), "lm", languageModel.get());
ps_set_search(decoder.get(), "lm");
// Set language model
lambda_unique_ptr<ngram_model_t> languageModel(dialog
? createBiasedLanguageModel(*decoder, *dialog)
: createDefaultLanguageModel(*decoder));
ps_set_lm(decoder.get(), "lm", languageModel.get());
ps_set_search(decoder.get(), "lm");
return decoder;
return decoder;
}
optional<Timeline<Phone>> getPhoneAlignment(
const vector<s3wid_t>& wordIds,
const vector<int16_t>& audioBuffer,
ps_decoder_t& decoder)
const vector<s3wid_t>& wordIds,
const vector<int16_t>& audioBuffer,
ps_decoder_t& decoder)
{
if (wordIds.empty()) return boost::none;
if (wordIds.empty()) return boost::none;
// Create alignment list
lambda_unique_ptr<ps_alignment_t> alignment(
ps_alignment_init(decoder.d2p),
[](ps_alignment_t* alignment) { ps_alignment_free(alignment); });
if (!alignment) throw runtime_error("Error creating alignment.");
for (s3wid_t wordId : wordIds) {
// Add word. Initial value for duration is ignored.
ps_alignment_add_word(alignment.get(), wordId, 0);
}
int error = ps_alignment_populate(alignment.get());
if (error) throw runtime_error("Error populating alignment struct.");
// Create alignment list
lambda_unique_ptr<ps_alignment_t> alignment(
ps_alignment_init(decoder.d2p),
[](ps_alignment_t* alignment) { ps_alignment_free(alignment); });
if (!alignment) throw runtime_error("Error creating alignment.");
for (s3wid_t wordId : wordIds) {
// Add word. Initial value for duration is ignored.
ps_alignment_add_word(alignment.get(), wordId, 0);
}
int error = ps_alignment_populate(alignment.get());
if (error) throw runtime_error("Error populating alignment struct.");
// Create search structure
acmod_t* acousticModel = decoder.acmod;
lambda_unique_ptr<ps_search_t> search(
state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()),
[](ps_search_t* search) { ps_search_free(search); });
if (!search) throw runtime_error("Error creating search.");
// Create search structure
acmod_t* acousticModel = decoder.acmod;
lambda_unique_ptr<ps_search_t> search(
state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()),
[](ps_search_t* search) { ps_search_free(search); });
if (!search) throw runtime_error("Error creating search.");
// Start recognition
error = acmod_start_utt(acousticModel);
if (error) throw runtime_error("Error starting utterance processing for alignment.");
// Start recognition
error = acmod_start_utt(acousticModel);
if (error) throw runtime_error("Error starting utterance processing for alignment.");
{
// Eventually end recognition
auto endRecognition = gsl::finally([&]() { acmod_end_utt(acousticModel); });
{
// Eventually end recognition
auto endRecognition = gsl::finally([&]() { acmod_end_utt(acousticModel); });
// Start search
ps_search_start(search.get());
// Start search
ps_search_start(search.get());
// Process entire audio clip
const int16* nextSample = audioBuffer.data();
size_t remainingSamples = audioBuffer.size();
const bool fullUtterance = true;
while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, fullUtterance) > 0) {
while (acousticModel->n_feat_frame > 0) {
ps_search_step(search.get(), acousticModel->output_frame);
acmod_advance(acousticModel);
}
}
// Process entire audio clip
const int16* nextSample = audioBuffer.data();
size_t remainingSamples = audioBuffer.size();
const bool fullUtterance = true;
while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, fullUtterance) > 0) {
while (acousticModel->n_feat_frame > 0) {
ps_search_step(search.get(), acousticModel->output_frame);
acmod_advance(acousticModel);
}
}
// End search
error = ps_search_finish(search.get());
if (error) return boost::none;
}
// End search
error = ps_search_finish(search.get());
if (error) return boost::none;
}
// Extract phones with timestamps
char** phoneNames = decoder.dict->mdef->ciname;
Timeline<Phone> result;
for (
ps_alignment_iter_t* it = ps_alignment_phones(alignment.get());
it;
it = ps_alignment_iter_next(it)
) {
// Get phone
ps_alignment_entry_t* phoneEntry = ps_alignment_iter_get(it);
const s3cipid_t phoneId = phoneEntry->id.pid.cipid;
string phoneName = phoneNames[phoneId];
// Extract phones with timestamps
char** phoneNames = decoder.dict->mdef->ciname;
Timeline<Phone> result;
for (
ps_alignment_iter_t* it = ps_alignment_phones(alignment.get());
it;
it = ps_alignment_iter_next(it)
) {
// Get phone
ps_alignment_entry_t* phoneEntry = ps_alignment_iter_get(it);
const s3cipid_t phoneId = phoneEntry->id.pid.cipid;
string phoneName = phoneNames[phoneId];
if (phoneName == "SIL") continue;
if (phoneName == "SIL") continue;
// Add entry
centiseconds start(phoneEntry->start);
centiseconds duration(phoneEntry->duration);
Phone phone = PhoneConverter::get().parse(phoneName);
if (phone == Phone::AH && duration < 6_cs) {
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
phone = Phone::Schwa;
}
const Timed<Phone> timedPhone(start, start + duration, phone);
result.set(timedPhone);
}
return result;
// Add entry
centiseconds start(phoneEntry->start);
centiseconds duration(phoneEntry->duration);
Phone phone = PhoneConverter::get().parse(phoneName);
if (phone == Phone::AH && duration < 6_cs) {
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
phone = Phone::Schwa;
}
const Timed<Phone> timedPhone(start, start + duration, phone);
result.set(timedPhone);
}
return result;
}
// Some words have multiple pronunciations, one of which results in better animation than the others.
// This function returns the optimal pronunciation for a select set of these words.
string fixPronunciation(const string& word) {
const static map<string, string> replacements {
{ "into(2)", "into" },
{ "to(2)", "to" },
{ "to(3)", "to" },
{ "today(2)", "today" },
{ "tomorrow(2)", "tomorrow" },
{ "tonight(2)", "tonight" }
};
const static map<string, string> replacements {
{ "into(2)", "into" },
{ "to(2)", "to" },
{ "to(3)", "to" },
{ "today(2)", "today" },
{ "tomorrow(2)", "tomorrow" },
{ "tonight(2)", "tonight" }
};
const auto pair = replacements.find(word);
return pair != replacements.end() ? pair->second : word;
const auto pair = replacements.find(word);
return pair != replacements.end() ? pair->second : word;
}
static Timeline<Phone> utteranceToPhones(
const AudioClip& audioClip,
TimeRange utteranceTimeRange,
ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink
const AudioClip& audioClip,
TimeRange utteranceTimeRange,
ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink
) {
ProgressMerger utteranceProgressMerger(utteranceProgressSink);
ProgressSink& wordRecognitionProgressSink =
utteranceProgressMerger.addSource("word recognition (PocketSphinx recognizer)", 1.0);
ProgressSink& alignmentProgressSink =
utteranceProgressMerger.addSource("alignment (PocketSphinx recognizer)", 0.5);
ProgressMerger utteranceProgressMerger(utteranceProgressSink);
ProgressSink& wordRecognitionProgressSink =
utteranceProgressMerger.addSource("word recognition (PocketSphinx recognizer)", 1.0);
ProgressSink& alignmentProgressSink =
utteranceProgressMerger.addSource("alignment (PocketSphinx recognizer)", 0.5);
// Pad time range to give PocketSphinx some breathing room
TimeRange paddedTimeRange = utteranceTimeRange;
const centiseconds padding(3);
paddedTimeRange.grow(padding);
paddedTimeRange.trim(audioClip.getTruncatedRange());
// Pad time range to give PocketSphinx some breathing room
TimeRange paddedTimeRange = utteranceTimeRange;
const centiseconds padding(3);
paddedTimeRange.grow(padding);
paddedTimeRange.trim(audioClip.getTruncatedRange());
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
| segment(paddedTimeRange)
| resample(sphinxSampleRate);
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
| segment(paddedTimeRange)
| resample(sphinxSampleRate);
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
// Get words
BoundedTimeline<string> words = recognizeWords(audioBuffer, decoder);
wordRecognitionProgressSink.reportProgress(1.0);
// Get words
BoundedTimeline<string> words = recognizeWords(audioBuffer, decoder);
wordRecognitionProgressSink.reportProgress(1.0);
// Log utterance text
string text;
for (auto& timedWord : words) {
string word = timedWord.getValue();
// Skip details
if (word == "<s>" || word == "</s>" || word == "<sil>") {
continue;
}
word = regex_replace(word, regex("\\(\\d\\)"), "");
if (!text.empty()) {
text += " ";
}
text += word;
}
logTimedEvent("utterance", utteranceTimeRange, text);
// Log utterance text
string text;
for (auto& timedWord : words) {
string word = timedWord.getValue();
// Skip details
if (word == "<s>" || word == "</s>" || word == "<sil>") {
continue;
}
word = regex_replace(word, regex("\\(\\d\\)"), "");
if (!text.empty()) {
text += " ";
}
text += word;
}
logTimedEvent("utterance", utteranceTimeRange, text);
// Log words
for (Timed<string> timedWord : words) {
timedWord.getTimeRange().shift(paddedTimeRange.getStart());
logTimedEvent("word", timedWord);
}
// Log words
for (Timed<string> timedWord : words) {
timedWord.getTimeRange().shift(paddedTimeRange.getStart());
logTimedEvent("word", timedWord);
}
// Convert word strings to word IDs using dictionary
vector<s3wid_t> wordIds;
for (const auto& timedWord : words) {
const string fixedWord = fixPronunciation(timedWord.getValue());
wordIds.push_back(getWordId(fixedWord, *decoder.dict));
}
// Convert word strings to word IDs using dictionary
vector<s3wid_t> wordIds;
for (const auto& timedWord : words) {
const string fixedWord = fixPronunciation(timedWord.getValue());
wordIds.push_back(getWordId(fixedWord, *decoder.dict));
}
// Align the words' phones with speech
// Align the words' phones with speech
#if BOOST_VERSION < 105600 // Support legacy syntax
#define value_or get_value_or
#endif
Timeline<Phone> utterancePhones = getPhoneAlignment(wordIds, audioBuffer, decoder)
.value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Noise));
alignmentProgressSink.reportProgress(1.0);
utterancePhones.shift(paddedTimeRange.getStart());
Timeline<Phone> utterancePhones = getPhoneAlignment(wordIds, audioBuffer, decoder)
.value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Noise));
alignmentProgressSink.reportProgress(1.0);
utterancePhones.shift(paddedTimeRange.getStart());
// Log raw phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("rawPhone", timedPhone);
}
// Log raw phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("rawPhone", timedPhone);
}
// Guess positions of noise sounds
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
for (const auto& noiseSound : noiseSounds) {
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
}
// Guess positions of noise sounds
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
for (const auto& noiseSound : noiseSounds) {
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
}
// Log phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("phone", timedPhone);
}
// Log phones
for (const auto& timedPhone : utterancePhones) {
logTimedEvent("phone", timedPhone);
}
return utterancePhones;
return utterancePhones;
}
BoundedTimeline<Phone> PocketSphinxRecognizer::recognizePhones(
const AudioClip& inputAudioClip,
optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
const AudioClip& inputAudioClip,
optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const {
return ::recognizePhones(
inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
return ::recognizePhones(
inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
}

View File

@ -5,10 +5,10 @@
class PocketSphinxRecognizer : public Recognizer {
public:
BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip,
boost::optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const override;
BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip,
boost::optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const override;
};

View File

@ -7,12 +7,12 @@
class Recognizer {
public:
virtual ~Recognizer() = default;
virtual ~Recognizer() = default;
virtual BoundedTimeline<Phone> recognizePhones(
const AudioClip& audioClip,
boost::optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const = 0;
virtual BoundedTimeline<Phone> recognizePhones(
const AudioClip& audioClip,
boost::optional<std::string> dialog,
int maxThreadCount,
ProgressSink& progressSink
) const = 0;
};

View File

@ -11,101 +11,101 @@ using std::invalid_argument;
using std::pair;
const vector<pair<wregex, wstring>>& getReplacementRules() {
static vector<pair<wregex, wstring>> rules {
#include "g2pRules.cpp"
static vector<pair<wregex, wstring>> rules {
#include "g2pRules.cpp"
// Turn bigrams into unigrams for easier conversion
{ wregex(L"ôw"), L"Ω" },
{ wregex(L"öy"), L"ω" },
{ wregex(L"@r"), L"ɝ" }
};
return rules;
// Turn bigrams into unigrams for easier conversion
{ wregex(L"ôw"), L"Ω" },
{ wregex(L"öy"), L"ω" },
{ wregex(L"@r"), L"ɝ" }
};
return rules;
}
Phone charToPhone(wchar_t c) {
// For reference, see http://www.zompist.com/spell.html
switch (c) {
case L'ä': return Phone::EY;
case L'â': return Phone::AE;
case L'ë': return Phone::IY;
case L'ê': return Phone::EH;
case L'ï': return Phone::AY;
case L'î': return Phone::IH;
case L'ö': return Phone::OW;
case L'ô': return Phone::AA; // could also be AO/AH
case L'ü': return Phone::UW; // really Y+UW
case L'û': return Phone::AH; // [ʌ] as in b[u]t
case L'u': return Phone::UW;
case L'ò': return Phone::AO;
case L'ù': return Phone::UH;
case L'@': return Phone::AH; // [ə] as in [a]lone
case L'Ω': return Phone::AW;
case L'ω': return Phone::OY;
case L'y': return Phone::Y;
case L'w': return Phone::W;
case L'ɝ': return Phone::ER;
case L'p': return Phone::P;
case L'b': return Phone::B;
case L't': return Phone::T;
case L'd': return Phone::D;
case L'g': return Phone::G;
case L'k': return Phone::K;
case L'm': return Phone::M;
case L'n': return Phone::N;
case L'ñ': return Phone::NG;
case L'f': return Phone::F;
case L'v': return Phone::V;
case L'+': return Phone::TH; // also covers DH
case L's': return Phone::S;
case L'z': return Phone::Z;
case L'$': return Phone::SH; // also covers ZH
case L'ç': return Phone::CH;
case L'j': return Phone::JH;
case L'r': return Phone::R;
case L'l': return Phone::L;
case L'h': return Phone::HH;
default:
return Phone::Noise;
}
// For reference, see http://www.zompist.com/spell.html
switch (c) {
case L'ä': return Phone::EY;
case L'â': return Phone::AE;
case L'ë': return Phone::IY;
case L'ê': return Phone::EH;
case L'ï': return Phone::AY;
case L'î': return Phone::IH;
case L'ö': return Phone::OW;
case L'ô': return Phone::AA; // could also be AO/AH
case L'ü': return Phone::UW; // really Y+UW
case L'û': return Phone::AH; // [ʌ] as in b[u]t
case L'u': return Phone::UW;
case L'ò': return Phone::AO;
case L'ù': return Phone::UH;
case L'@': return Phone::AH; // [ə] as in [a]lone
case L'Ω': return Phone::AW;
case L'ω': return Phone::OY;
case L'y': return Phone::Y;
case L'w': return Phone::W;
case L'ɝ': return Phone::ER;
case L'p': return Phone::P;
case L'b': return Phone::B;
case L't': return Phone::T;
case L'd': return Phone::D;
case L'g': return Phone::G;
case L'k': return Phone::K;
case L'm': return Phone::M;
case L'n': return Phone::N;
case L'ñ': return Phone::NG;
case L'f': return Phone::F;
case L'v': return Phone::V;
case L'+': return Phone::TH; // also covers DH
case L's': return Phone::S;
case L'z': return Phone::Z;
case L'$': return Phone::SH; // also covers ZH
case L'ç': return Phone::CH;
case L'j': return Phone::JH;
case L'r': return Phone::R;
case L'l': return Phone::L;
case L'h': return Phone::HH;
default:
return Phone::Noise;
}
}
vector<Phone> wordToPhones(const std::string& word) {
static regex validWord("^[a-z']*$");
if (!regex_match(word, validWord)) {
throw invalid_argument(fmt::format("Word '{}' contains illegal characters.", word));
}
static regex validWord("^[a-z']*$");
if (!regex_match(word, validWord)) {
throw invalid_argument(fmt::format("Word '{}' contains illegal characters.", word));
}
wstring wideWord = latin1ToWide(word);
for (const auto& rule : getReplacementRules()) {
const wregex& regex = rule.first;
const wstring& replacement = rule.second;
wstring wideWord = latin1ToWide(word);
for (const auto& rule : getReplacementRules()) {
const wregex& regex = rule.first;
const wstring& replacement = rule.second;
// Repeatedly apply rule until there is no more change
bool changed;
do {
wstring tmp = regex_replace(wideWord, regex, replacement);
changed = tmp != wideWord;
wideWord = tmp;
} while (changed);
}
// Repeatedly apply rule until there is no more change
bool changed;
do {
wstring tmp = regex_replace(wideWord, regex, replacement);
changed = tmp != wideWord;
wideWord = tmp;
} while (changed);
}
// Remove duplicate phones
vector<Phone> result;
Phone lastPhone = Phone::Noise;
for (wchar_t c : wideWord) {
Phone phone = charToPhone(c);
if (phone == Phone::Noise) {
logging::errorFormat(
"G2P error determining pronunciation for '{}': Character '{}' is not a recognized phone shorthand.",
word,
static_cast<char>(c)
);
}
// Remove duplicate phones
vector<Phone> result;
Phone lastPhone = Phone::Noise;
for (wchar_t c : wideWord) {
Phone phone = charToPhone(c);
if (phone == Phone::Noise) {
logging::errorFormat(
"G2P error determining pronunciation for '{}': Character '{}' is not a recognized phone shorthand.",
word,
static_cast<char>(c)
);
}
if (phone != lastPhone) {
result.push_back(phone);
}
lastPhone = phone;
}
return result;
if (phone != lastPhone) {
result.push_back(phone);
}
lastPhone = phone;
}
return result;
}

View File

@ -24,178 +24,178 @@ using Bigram = tuple<string, string>;
using Trigram = tuple<string, string, string>;
map<Unigram, int> getUnigramCounts(const vector<string>& words) {
map<Unigram, int> unigramCounts;
for (const Unigram& unigram : words) {
++unigramCounts[unigram];
}
return unigramCounts;
map<Unigram, int> unigramCounts;
for (const Unigram& unigram : words) {
++unigramCounts[unigram];
}
return unigramCounts;
}
map<Bigram, int> getBigramCounts(const vector<string>& words) {
map<Bigram, int> bigramCounts;
for (auto it = words.begin(); it < words.end() - 1; ++it) {
++bigramCounts[Bigram(*it, *(it + 1))];
}
return bigramCounts;
map<Bigram, int> bigramCounts;
for (auto it = words.begin(); it < words.end() - 1; ++it) {
++bigramCounts[Bigram(*it, *(it + 1))];
}
return bigramCounts;
}
map<Trigram, int> getTrigramCounts(const vector<string>& words) {
map<Trigram, int> trigramCounts;
if (words.size() >= 3) {
for (auto it = words.begin(); it < words.end() - 2; ++it) {
++trigramCounts[Trigram(*it, *(it + 1), *(it + 2))];
}
}
return trigramCounts;
map<Trigram, int> trigramCounts;
if (words.size() >= 3) {
for (auto it = words.begin(); it < words.end() - 2; ++it) {
++trigramCounts[Trigram(*it, *(it + 1), *(it + 2))];
}
}
return trigramCounts;
}
map<Unigram, double> getUnigramProbabilities(
const vector<string>& words,
const map<Unigram, int>& unigramCounts,
const double deflator
const vector<string>& words,
const map<Unigram, int>& unigramCounts,
const double deflator
) {
map<Unigram, double> unigramProbabilities;
for (const auto& pair : unigramCounts) {
const Unigram& unigram = get<0>(pair);
const int unigramCount = get<1>(pair);
unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator;
}
return unigramProbabilities;
map<Unigram, double> unigramProbabilities;
for (const auto& pair : unigramCounts) {
const Unigram& unigram = get<0>(pair);
const int unigramCount = get<1>(pair);
unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator;
}
return unigramProbabilities;
}
map<Bigram, double> getBigramProbabilities(
const map<Unigram, int>& unigramCounts,
const map<Bigram, int>& bigramCounts,
const double deflator
const map<Unigram, int>& unigramCounts,
const map<Bigram, int>& bigramCounts,
const double deflator
) {
map<Bigram, double> bigramProbabilities;
for (const auto& pair : bigramCounts) {
Bigram bigram = get<0>(pair);
const int bigramCount = get<1>(pair);
const int unigramPrefixCount = unigramCounts.at(get<0>(bigram));
bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator;
}
return bigramProbabilities;
map<Bigram, double> bigramProbabilities;
for (const auto& pair : bigramCounts) {
Bigram bigram = get<0>(pair);
const int bigramCount = get<1>(pair);
const int unigramPrefixCount = unigramCounts.at(get<0>(bigram));
bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator;
}
return bigramProbabilities;
}
map<Trigram, double> getTrigramProbabilities(
const map<Bigram, int>& bigramCounts,
const map<Trigram, int>& trigramCounts,
const double deflator
const map<Bigram, int>& bigramCounts,
const map<Trigram, int>& trigramCounts,
const double deflator
) {
map<Trigram, double> trigramProbabilities;
for (const auto& pair : trigramCounts) {
Trigram trigram = get<0>(pair);
const int trigramCount = get<1>(pair);
const int bigramPrefixCount = bigramCounts.at(Bigram(get<0>(trigram), get<1>(trigram)));
trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator;
}
return trigramProbabilities;
map<Trigram, double> trigramProbabilities;
for (const auto& pair : trigramCounts) {
Trigram trigram = get<0>(pair);
const int trigramCount = get<1>(pair);
const int bigramPrefixCount = bigramCounts.at(Bigram(get<0>(trigram), get<1>(trigram)));
trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator;
}
return trigramProbabilities;
}
map<Unigram, double> getUnigramBackoffWeights(
const map<Unigram, int>& unigramCounts,
const map<Unigram, double>& unigramProbabilities,
const map<Bigram, int>& bigramCounts,
const double discountMass)
const map<Unigram, int>& unigramCounts,
const map<Unigram, double>& unigramProbabilities,
const map<Bigram, int>& bigramCounts,
const double discountMass)
{
map<Unigram, double> unigramBackoffWeights;
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
double denominator = 1;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
if (get<0>(bigram) == unigram) {
denominator -= unigramProbabilities.at(get<1>(bigram));
}
}
unigramBackoffWeights[unigram] = discountMass / denominator;
}
return unigramBackoffWeights;
map<Unigram, double> unigramBackoffWeights;
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
double denominator = 1;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
if (get<0>(bigram) == unigram) {
denominator -= unigramProbabilities.at(get<1>(bigram));
}
}
unigramBackoffWeights[unigram] = discountMass / denominator;
}
return unigramBackoffWeights;
}
map<Bigram, double> getBigramBackoffWeights(
const map<Bigram, int>& bigramCounts,
const map<Bigram, double>& bigramProbabilities,
const map<Trigram, int>& trigramCounts,
const double discountMass)
const map<Bigram, int>& bigramCounts,
const map<Bigram, double>& bigramProbabilities,
const map<Trigram, int>& trigramCounts,
const double discountMass)
{
map<Bigram, double> bigramBackoffWeights;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
double denominator = 1;
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
if (Bigram(get<0>(trigram), get<1>(trigram)) == bigram) {
denominator -= bigramProbabilities.at(Bigram(get<1>(trigram), get<2>(trigram)));
}
}
bigramBackoffWeights[bigram] = discountMass / denominator;
}
return bigramBackoffWeights;
map<Bigram, double> bigramBackoffWeights;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
double denominator = 1;
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
if (Bigram(get<0>(trigram), get<1>(trigram)) == bigram) {
denominator -= bigramProbabilities.at(Bigram(get<1>(trigram), get<2>(trigram)));
}
}
bigramBackoffWeights[bigram] = discountMass / denominator;
}
return bigramBackoffWeights;
}
void createLanguageModelFile(const vector<string>& words, const path& filePath) {
const double discountMass = 0.5;
const double deflator = 1.0 - discountMass;
const double discountMass = 0.5;
const double deflator = 1.0 - discountMass;
map<Unigram, int> unigramCounts = getUnigramCounts(words);
map<Bigram, int> bigramCounts = getBigramCounts(words);
map<Trigram, int> trigramCounts = getTrigramCounts(words);
map<Unigram, int> unigramCounts = getUnigramCounts(words);
map<Bigram, int> bigramCounts = getBigramCounts(words);
map<Trigram, int> trigramCounts = getTrigramCounts(words);
map<Unigram, double> unigramProbabilities =
getUnigramProbabilities(words, unigramCounts, deflator);
map<Bigram, double> bigramProbabilities =
getBigramProbabilities(unigramCounts, bigramCounts, deflator);
map<Trigram, double> trigramProbabilities =
getTrigramProbabilities(bigramCounts, trigramCounts, deflator);
map<Unigram, double> unigramProbabilities =
getUnigramProbabilities(words, unigramCounts, deflator);
map<Bigram, double> bigramProbabilities =
getBigramProbabilities(unigramCounts, bigramCounts, deflator);
map<Trigram, double> trigramProbabilities =
getTrigramProbabilities(bigramCounts, trigramCounts, deflator);
map<Unigram, double> unigramBackoffWeights =
getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass);
map<Bigram, double> bigramBackoffWeights =
getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass);
map<Unigram, double> unigramBackoffWeights =
getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass);
map<Bigram, double> bigramBackoffWeights =
getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass);
std::ofstream file(filePath);
file << "Generated by " << appName << " " << appVersion << endl << endl;
std::ofstream file(filePath);
file << "Generated by " << appName << " " << appVersion << endl << endl;
file << "\\data\\" << endl;
file << "ngram 1=" << unigramCounts.size() << endl;
file << "ngram 2=" << bigramCounts.size() << endl;
file << "ngram 3=" << trigramCounts.size() << endl << endl;
file << "\\data\\" << endl;
file << "ngram 1=" << unigramCounts.size() << endl;
file << "ngram 2=" << bigramCounts.size() << endl;
file << "ngram 3=" << trigramCounts.size() << endl << endl;
file.setf(std::ios::fixed, std::ios::floatfield);
file.precision(4);
file << "\\1-grams:" << endl;
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
file << log10(unigramProbabilities.at(unigram))
<< " " << unigram
<< " " << log10(unigramBackoffWeights.at(unigram)) << endl;
}
file << endl;
file.setf(std::ios::fixed, std::ios::floatfield);
file.precision(4);
file << "\\1-grams:" << endl;
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
file << log10(unigramProbabilities.at(unigram))
<< " " << unigram
<< " " << log10(unigramBackoffWeights.at(unigram)) << endl;
}
file << endl;
file << "\\2-grams:" << endl;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
file << log10(bigramProbabilities.at(bigram))
<< " " << get<0>(bigram) << " " << get<1>(bigram)
<< " " << log10(bigramBackoffWeights.at(bigram)) << endl;
}
file << endl;
file << "\\2-grams:" << endl;
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
file << log10(bigramProbabilities.at(bigram))
<< " " << get<0>(bigram) << " " << get<1>(bigram)
<< " " << log10(bigramBackoffWeights.at(bigram)) << endl;
}
file << endl;
file << "\\3-grams:" << endl;
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
file << log10(trigramProbabilities.at(trigram))
<< " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl;
}
file << endl;
file << "\\3-grams:" << endl;
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
file << log10(trigramProbabilities.at(trigram))
<< " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl;
}
file << endl;
file << "\\end\\" << endl;
file << "\\end\\" << endl;
}
lambda_unique_ptr<ngram_model_t> createLanguageModel(
const vector<string>& words,
ps_decoder_t& decoder
const vector<string>& words,
ps_decoder_t& decoder
) {
path tempFilePath = getTempFilePath();
createLanguageModelFile(words, tempFilePath);
auto deleteTempFile = gsl::finally([&]() { std::filesystem::remove(tempFilePath); });
path tempFilePath = getTempFilePath();
createLanguageModelFile(words, tempFilePath);
auto deleteTempFile = gsl::finally([&]() { std::filesystem::remove(tempFilePath); });
return lambda_unique_ptr<ngram_model_t>(
ngram_model_read(decoder.config, tempFilePath.u8string().c_str(), NGRAM_ARPA, decoder.lmath),
[](ngram_model_t* lm) { ngram_model_free(lm); });
return lambda_unique_ptr<ngram_model_t>(
ngram_model_read(decoder.config, tempFilePath.u8string().c_str(), NGRAM_ARPA, decoder.lmath),
[](ngram_model_t* lm) { ngram_model_free(lm); });
}

View File

@ -9,6 +9,6 @@ extern "C" {
}
lambda_unique_ptr<ngram_model_t> createLanguageModel(
const std::vector<std::string>& words,
ps_decoder_t& decoder
const std::vector<std::string>& words,
ps_decoder_t& decoder
);

View File

@ -23,223 +23,223 @@ using std::filesystem::path;
using std::regex;
using boost::optional;
using std::chrono::duration_cast;
logging::Level convertSphinxErrorLevel(err_lvl_t errorLevel) {
switch (errorLevel) {
case ERR_DEBUG:
case ERR_INFO:
case ERR_INFOCONT:
return logging::Level::Trace;
case ERR_WARN:
return logging::Level::Warn;
case ERR_ERROR:
return logging::Level::Error;
case ERR_FATAL:
return logging::Level::Fatal;
default:
throw invalid_argument("Unknown log level.");
}
switch (errorLevel) {
case ERR_DEBUG:
case ERR_INFO:
case ERR_INFOCONT:
return logging::Level::Trace;
case ERR_WARN:
return logging::Level::Warn;
case ERR_ERROR:
return logging::Level::Error;
case ERR_FATAL:
return logging::Level::Fatal;
default:
throw invalid_argument("Unknown log level.");
}
}
void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format, ...) {
UNUSED(user_data);
UNUSED(user_data);
// Create varArgs list
va_list args;
va_start(args, format);
auto _ = gsl::finally([&args]() { va_end(args); });
// Create varArgs list
va_list args;
va_start(args, format);
auto _ = gsl::finally([&args]() { va_end(args); });
// Format message
const int initialSize = 256;
vector<char> chars(initialSize);
bool success = false;
while (!success) {
const int charsWritten = vsnprintf(chars.data(), chars.size(), format, args);
if (charsWritten < 0) throw runtime_error("Error formatting PocketSphinx log message.");
// Format message
const int initialSize = 256;
vector<char> chars(initialSize);
bool success = false;
while (!success) {
const int charsWritten = vsnprintf(chars.data(), chars.size(), format, args);
if (charsWritten < 0) throw runtime_error("Error formatting PocketSphinx log message.");
success = charsWritten < static_cast<int>(chars.size());
if (!success) chars.resize(chars.size() * 2);
}
const regex waste("^(DEBUG|INFO|INFOCONT|WARN|ERROR|FATAL): ");
string message =
std::regex_replace(chars.data(), waste, "", std::regex_constants::format_first_only);
boost::algorithm::trim(message);
success = charsWritten < static_cast<int>(chars.size());
if (!success) chars.resize(chars.size() * 2);
}
const regex waste("^(DEBUG|INFO|INFOCONT|WARN|ERROR|FATAL): ");
string message =
std::regex_replace(chars.data(), waste, "", std::regex_constants::format_first_only);
boost::algorithm::trim(message);
const logging::Level logLevel = convertSphinxErrorLevel(errorLevel);
logging::log(logLevel, message);
const logging::Level logLevel = convertSphinxErrorLevel(errorLevel);
logging::log(logLevel, message);
}
void redirectPocketSphinxOutput() {
static bool redirected = false;
if (redirected) return;
static bool redirected = false;
if (redirected) return;
// Discard PocketSphinx output
err_set_logfp(nullptr);
// Discard PocketSphinx output
err_set_logfp(nullptr);
// Redirect PocketSphinx output to log
err_set_callback(sphinxLogCallback, nullptr);
// Redirect PocketSphinx output to log
err_set_callback(sphinxLogCallback, nullptr);
redirected = true;
redirected = true;
}
BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip,
optional<std::string> dialog,
decoderFactory createDecoder,
utteranceToPhonesFunction utteranceToPhones,
int maxThreadCount,
ProgressSink& progressSink
const AudioClip& inputAudioClip,
optional<std::string> dialog,
decoderFactory createDecoder,
utteranceToPhonesFunction utteranceToPhones,
int maxThreadCount,
ProgressSink& progressSink
) {
ProgressMerger totalProgressMerger(progressSink);
ProgressSink& voiceActivationProgressSink =
totalProgressMerger.addSource("VAD (PocketSphinx tools)", 1.0);
ProgressSink& dialogProgressSink =
totalProgressMerger.addSource("recognition (PocketSphinx tools)", 15.0);
ProgressMerger totalProgressMerger(progressSink);
ProgressSink& voiceActivationProgressSink =
totalProgressMerger.addSource("VAD (PocketSphinx tools)", 1.0);
ProgressSink& dialogProgressSink =
totalProgressMerger.addSource("recognition (PocketSphinx tools)", 15.0);
// Make sure audio stream has no DC offset
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
// Make sure audio stream has no DC offset
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
// Split audio into utterances
JoiningBoundedTimeline<void> utterances;
try {
utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink);
} catch (...) {
std::throw_with_nested(runtime_error("Error detecting segments of speech."));
}
// Split audio into utterances
JoiningBoundedTimeline<void> utterances;
try {
utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink);
} catch (...) {
std::throw_with_nested(runtime_error("Error detecting segments of speech."));
}
redirectPocketSphinxOutput();
redirectPocketSphinxOutput();
// Prepare pool of decoders
ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool(
[&] { return createDecoder(dialog); });
// Prepare pool of decoders
ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool(
[&] { return createDecoder(dialog); });
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
std::mutex resultMutex;
const auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) {
// Detect phones for utterance
const auto decoder = decoderPool.acquire();
Timeline<Phone> utterancePhones = utteranceToPhones(
*audioClip,
timedUtterance.getTimeRange(),
*decoder,
utteranceProgressSink
);
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
std::mutex resultMutex;
const auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) {
// Detect phones for utterance
const auto decoder = decoderPool.acquire();
Timeline<Phone> utterancePhones = utteranceToPhones(
*audioClip,
timedUtterance.getTimeRange(),
*decoder,
utteranceProgressSink
);
// Copy phones to result timeline
std::lock_guard<std::mutex> lock(resultMutex);
for (const auto& timedPhone : utterancePhones) {
phones.set(timedPhone);
}
};
// Copy phones to result timeline
std::lock_guard<std::mutex> lock(resultMutex);
for (const auto& timedPhone : utterancePhones) {
phones.set(timedPhone);
}
};
const auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) {
return timedUtterance.getDuration().count();
};
const auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) {
return timedUtterance.getDuration().count();
};
// Perform speech recognition
try {
// Determine how many parallel threads to use
int threadCount = std::min({
maxThreadCount,
// Don't use more threads than there are utterances to be processed
static_cast<int>(utterances.size()),
// Don't waste time creating additional threads (and decoders!) if the recording is short
static_cast<int>(
duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5
)
});
if (threadCount < 1) {
threadCount = 1;
}
logging::debugFormat("Speech recognition using {} threads -- start", threadCount);
runParallel(
"speech recognition (PocketSphinx tools)",
processUtterance,
utterances,
threadCount,
dialogProgressSink,
getUtteranceProgressWeight
);
logging::debug("Speech recognition -- end");
} catch (...) {
std::throw_with_nested(runtime_error("Error performing speech recognition via PocketSphinx tools."));
}
// Perform speech recognition
try {
// Determine how many parallel threads to use
int threadCount = std::min({
maxThreadCount,
// Don't use more threads than there are utterances to be processed
static_cast<int>(utterances.size()),
// Don't waste time creating additional threads (and decoders!) if the recording is short
static_cast<int>(
duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5
)
});
if (threadCount < 1) {
threadCount = 1;
}
logging::debugFormat("Speech recognition using {} threads -- start", threadCount);
runParallel(
"speech recognition (PocketSphinx tools)",
processUtterance,
utterances,
threadCount,
dialogProgressSink,
getUtteranceProgressWeight
);
logging::debug("Speech recognition -- end");
} catch (...) {
std::throw_with_nested(runtime_error("Error performing speech recognition via PocketSphinx tools."));
}
return phones;
return phones;
}
const path& getSphinxModelDirectory() {
static path sphinxModelDirectory(getBinDirectory() / "res" / "sphinx");
return sphinxModelDirectory;
static path sphinxModelDirectory(getBinDirectory() / "res" / "sphinx");
return sphinxModelDirectory;
}
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones) {
JoiningTimeline<void> noiseSounds;
JoiningTimeline<void> noiseSounds;
// Find utterance parts without recognized phones
noiseSounds.set(utteranceTimeRange);
for (const auto& timedPhone : phones) {
noiseSounds.clear(timedPhone.getTimeRange());
}
// Find utterance parts without recognized phones
noiseSounds.set(utteranceTimeRange);
for (const auto& timedPhone : phones) {
noiseSounds.clear(timedPhone.getTimeRange());
}
// Remove undesired elements
const centiseconds minSoundDuration = 12_cs;
for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) {
const bool startsAtZero = unknownSound.getStart() == 0_cs;
const bool tooShort = unknownSound.getDuration() < minSoundDuration;
if (startsAtZero || tooShort) {
noiseSounds.clear(unknownSound.getTimeRange());
}
}
// Remove undesired elements
const centiseconds minSoundDuration = 12_cs;
for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) {
const bool startsAtZero = unknownSound.getStart() == 0_cs;
const bool tooShort = unknownSound.getDuration() < minSoundDuration;
if (startsAtZero || tooShort) {
noiseSounds.clear(unknownSound.getTimeRange());
}
}
return noiseSounds;
return noiseSounds;
}
BoundedTimeline<string> recognizeWords(const vector<int16_t>& audioBuffer, ps_decoder_t& decoder) {
// Restart timing at 0
ps_start_stream(&decoder);
// Restart timing at 0
ps_start_stream(&decoder);
// Start recognition
int error = ps_start_utt(&decoder);
if (error) throw runtime_error("Error starting utterance processing for word recognition.");
// Start recognition
int error = ps_start_utt(&decoder);
if (error) throw runtime_error("Error starting utterance processing for word recognition.");
// Process entire audio clip
const bool noRecognition = false;
const bool fullUtterance = true;
const int searchedFrameCount =
ps_process_raw(&decoder, audioBuffer.data(), audioBuffer.size(), noRecognition, fullUtterance);
if (searchedFrameCount < 0) {
throw runtime_error("Error analyzing raw audio data for word recognition.");
}
// Process entire audio clip
const bool noRecognition = false;
const bool fullUtterance = true;
const int searchedFrameCount =
ps_process_raw(&decoder, audioBuffer.data(), audioBuffer.size(), noRecognition, fullUtterance);
if (searchedFrameCount < 0) {
throw runtime_error("Error analyzing raw audio data for word recognition.");
}
// End recognition
error = ps_end_utt(&decoder);
if (error) throw runtime_error("Error ending utterance processing for word recognition.");
// End recognition
error = ps_end_utt(&decoder);
if (error) throw runtime_error("Error ending utterance processing for word recognition.");
BoundedTimeline<string> result(
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
);
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
if (!phonetic) {
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
// "Couldn't find <s> in first frame".
// Not every utterance does contain speech, however. In this case, we exit early to prevent
// the log output.
// We *don't* to that in phonetic mode because here, the same code would omit valid phones.
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
if (noWordsRecognized) {
return result;
}
}
BoundedTimeline<string> result(
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
);
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
if (!phonetic) {
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
// "Couldn't find <s> in first frame".
// Not every utterance does contain speech, however. In this case, we exit early to prevent
// the log output.
// We *don't* to that in phonetic mode because here, the same code would omit valid phones.
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
if (noWordsRecognized) {
return result;
}
}
// Collect words
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
const char* word = ps_seg_word(it);
int firstFrame, lastFrame;
ps_seg_frames(it, &firstFrame, &lastFrame);
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
}
// Collect words
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
const char* word = ps_seg_word(it);
int firstFrame, lastFrame;
ps_seg_frames(it, &firstFrame, &lastFrame);
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
}
return result;
return result;
}

View File

@ -11,23 +11,23 @@ extern "C" {
}
typedef std::function<lambda_unique_ptr<ps_decoder_t>(
boost::optional<std::string> dialog
boost::optional<std::string> dialog
)> decoderFactory;
typedef std::function<Timeline<Phone>(
const AudioClip& audioClip,
TimeRange utteranceTimeRange,
ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink
const AudioClip& audioClip,
TimeRange utteranceTimeRange,
ps_decoder_t& decoder,
ProgressSink& utteranceProgressSink
)> utteranceToPhonesFunction;
BoundedTimeline<Phone> recognizePhones(
const AudioClip& inputAudioClip,
boost::optional<std::string> dialog,
decoderFactory createDecoder,
utteranceToPhonesFunction utteranceToPhones,
int maxThreadCount,
ProgressSink& progressSink
const AudioClip& inputAudioClip,
boost::optional<std::string> dialog,
decoderFactory createDecoder,
utteranceToPhonesFunction utteranceToPhones,
int maxThreadCount,
ProgressSink& progressSink
);
constexpr int sphinxSampleRate = 16000;
@ -37,6 +37,6 @@ const std::filesystem::path& getSphinxModelDirectory();
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones);
BoundedTimeline<std::string> recognizeWords(
const std::vector<int16_t>& audioBuffer,
ps_decoder_t& decoder
const std::vector<int16_t>& audioBuffer,
ps_decoder_t& decoder
);

View File

@ -19,117 +19,117 @@ using boost::optional;
using std::function;
lambda_unique_ptr<cst_voice> createDummyVoice() {
lambda_unique_ptr<cst_voice> voice(new_voice(), [](cst_voice* voice) { delete_voice(voice); });
voice->name = "dummy_voice";
usenglish_init(voice.get());
cst_lexicon* lexicon = cmu_lex_init();
feat_set(voice->features, "lexicon", lexicon_val(lexicon));
return voice;
lambda_unique_ptr<cst_voice> voice(new_voice(), [](cst_voice* voice) { delete_voice(voice); });
voice->name = "dummy_voice";
usenglish_init(voice.get());
cst_lexicon* lexicon = cmu_lex_init();
feat_set(voice->features, "lexicon", lexicon_val(lexicon));
return voice;
}
static const cst_synth_module synth_method_normalize[] = {
{ "tokenizer_func", default_tokenization }, // split text into tokens
{ "textanalysis_func", default_textanalysis }, // transform tokens into words
{ nullptr, nullptr }
{ "tokenizer_func", default_tokenization }, // split text into tokens
{ "textanalysis_func", default_textanalysis }, // transform tokens into words
{ nullptr, nullptr }
};
vector<string> tokenizeViaFlite(const string& text) {
// Convert text to ASCII
const string asciiText = utf8ToAscii(text);
// Convert text to ASCII
const string asciiText = utf8ToAscii(text);
// Create utterance object with text
lambda_unique_ptr<cst_utterance> utterance(
new_utterance(),
[](cst_utterance* utterance) { delete_utterance(utterance); }
);
utt_set_input_text(utterance.get(), asciiText.c_str());
lambda_unique_ptr<cst_voice> voice = createDummyVoice();
utt_init(utterance.get(), voice.get());
// Create utterance object with text
lambda_unique_ptr<cst_utterance> utterance(
new_utterance(),
[](cst_utterance* utterance) { delete_utterance(utterance); }
);
utt_set_input_text(utterance.get(), asciiText.c_str());
lambda_unique_ptr<cst_voice> voice = createDummyVoice();
utt_init(utterance.get(), voice.get());
// Perform tokenization and text normalization
if (!apply_synth_method(utterance.get(), synth_method_normalize)) {
throw runtime_error("Error normalizing text using Flite.");
}
// Perform tokenization and text normalization
if (!apply_synth_method(utterance.get(), synth_method_normalize)) {
throw runtime_error("Error normalizing text using Flite.");
}
vector<string> result;
for (
cst_item* item = relation_head(utt_relation(utterance.get(), "Word"));
item;
item = item_next(item)
) {
const char* word = item_feat_string(item, "name");
result.emplace_back(word);
}
return result;
vector<string> result;
for (
cst_item* item = relation_head(utt_relation(utterance.get(), "Word"));
item;
item = item_next(item)
) {
const char* word = item_feat_string(item, "name");
result.emplace_back(word);
}
return result;
}
optional<string> findSimilarDictionaryWord(
const string& word,
const function<bool(const string&)>& dictionaryContains
const string& word,
const function<bool(const string&)>& dictionaryContains
) {
for (bool addPeriod : { false, true }) {
for (int apostropheIndex = -1; apostropheIndex <= static_cast<int>(word.size()); ++apostropheIndex) {
string modified = word;
if (apostropheIndex != -1) {
modified.insert(apostropheIndex, "'");
}
if (addPeriod) {
modified += ".";
}
for (bool addPeriod : { false, true }) {
for (int apostropheIndex = -1; apostropheIndex <= static_cast<int>(word.size()); ++apostropheIndex) {
string modified = word;
if (apostropheIndex != -1) {
modified.insert(apostropheIndex, "'");
}
if (addPeriod) {
modified += ".";
}
if (dictionaryContains(modified)) {
return modified;
}
}
}
if (dictionaryContains(modified)) {
return modified;
}
}
}
return boost::none;
return boost::none;
}
vector<string> tokenizeText(
const string& text,
const function<bool(const string&)>& dictionaryContains
const string& text,
const function<bool(const string&)>& dictionaryContains
) {
vector<string> words = tokenizeViaFlite(text);
vector<string> words = tokenizeViaFlite(text);
// Join words separated by apostrophes
for (int i = static_cast<int>(words.size()) - 1; i > 0; --i) {
if (!words[i].empty() && words[i][0] == '\'') {
words[i - 1].append(words[i]);
words.erase(words.begin() + i);
}
}
// Join words separated by apostrophes
for (int i = static_cast<int>(words.size()) - 1; i > 0; --i) {
if (!words[i].empty() && words[i][0] == '\'') {
words[i - 1].append(words[i]);
words.erase(words.begin() + i);
}
}
// Turn some symbols into words, remove the rest
const static vector<pair<regex, string>> replacements {
{ regex("&"), "and" },
{ regex("\\*"), "times" },
{ regex("\\+"), "plus" },
{ regex("="), "equals" },
{ regex("@"), "at" },
{ regex("[^a-z']"), "" }
};
for (auto& word : words) {
for (const auto& replacement : replacements) {
word = regex_replace(word, replacement.first, replacement.second);
}
}
// Turn some symbols into words, remove the rest
const static vector<pair<regex, string>> replacements {
{ regex("&"), "and" },
{ regex("\\*"), "times" },
{ regex("\\+"), "plus" },
{ regex("="), "equals" },
{ regex("@"), "at" },
{ regex("[^a-z']"), "" }
};
for (auto& word : words) {
for (const auto& replacement : replacements) {
word = regex_replace(word, replacement.first, replacement.second);
}
}
// Remove empty words
words.erase(
std::remove_if(words.begin(), words.end(), [](const string& s) { return s.empty(); }),
words.end()
);
// Remove empty words
words.erase(
std::remove_if(words.begin(), words.end(), [](const string& s) { return s.empty(); }),
words.end()
);
// Try to replace words that are not in the dictionary with similar ones that are
for (auto& word : words) {
if (!dictionaryContains(word)) {
optional<string> modifiedWord = findSimilarDictionaryWord(word, dictionaryContains);
if (modifiedWord) {
word = *modifiedWord;
}
}
}
// Try to replace words that are not in the dictionary with similar ones that are
for (auto& word : words) {
if (!dictionaryContains(word)) {
optional<string> modifiedWord = findSimilarDictionaryWord(word, dictionaryContains);
if (modifiedWord) {
word = *modifiedWord;
}
}
}
return words;
return words;
}

View File

@ -5,6 +5,6 @@
#include <string>
std::vector<std::string> tokenizeText(
const std::string& text,
const std::function<bool(const std::string&)>& dictionaryContains
const std::string& text,
const std::function<bool(const std::string&)>& dictionaryContains
);

View File

@ -3,27 +3,27 @@
using std::string;
ExportFormatConverter& ExportFormatConverter::get() {
static ExportFormatConverter converter;
return converter;
static ExportFormatConverter converter;
return converter;
}
string ExportFormatConverter::getTypeName() {
return "ExportFormat";
return "ExportFormat";
}
EnumConverter<ExportFormat>::member_data ExportFormatConverter::getMemberData() {
return member_data {
{ ExportFormat::Dat, "dat" },
{ ExportFormat::Tsv, "tsv" },
{ ExportFormat::Xml, "xml" },
{ ExportFormat::Json, "json" }
};
return member_data {
{ ExportFormat::Dat, "dat" },
{ ExportFormat::Tsv, "tsv" },
{ ExportFormat::Xml, "xml" },
{ ExportFormat::Json, "json" }
};
}
std::ostream& operator<<(std::ostream& stream, ExportFormat value) {
return ExportFormatConverter::get().write(stream, value);
return ExportFormatConverter::get().write(stream, value);
}
std::istream& operator>>(std::istream& stream, ExportFormat& value) {
return ExportFormatConverter::get().read(stream, value);
return ExportFormatConverter::get().read(stream, value);
}

View File

@ -3,18 +3,18 @@
#include "tools/EnumConverter.h"
enum class ExportFormat {
Dat,
Tsv,
Xml,
Json
Dat,
Tsv,
Xml,
Json
};
class ExportFormatConverter : public EnumConverter<ExportFormat> {
public:
static ExportFormatConverter& get();
static ExportFormatConverter& get();
protected:
std::string getTypeName() override;
member_data getMemberData() override;
std::string getTypeName() override;
member_data getMemberData() override;
};
std::ostream& operator<<(std::ostream& stream, ExportFormat value);

View File

@ -3,25 +3,25 @@
using std::string;
RecognizerTypeConverter& RecognizerTypeConverter::get() {
static RecognizerTypeConverter converter;
return converter;
static RecognizerTypeConverter converter;
return converter;
}
string RecognizerTypeConverter::getTypeName() {
return "RecognizerType";
return "RecognizerType";
}
EnumConverter<RecognizerType>::member_data RecognizerTypeConverter::getMemberData() {
return member_data {
{ RecognizerType::PocketSphinx, "pocketSphinx" },
{ RecognizerType::Phonetic, "phonetic" }
};
return member_data {
{ RecognizerType::PocketSphinx, "pocketSphinx" },
{ RecognizerType::Phonetic, "phonetic" }
};
}
std::ostream& operator<<(std::ostream& stream, RecognizerType value) {
return RecognizerTypeConverter::get().write(stream, value);
return RecognizerTypeConverter::get().write(stream, value);
}
std::istream& operator>>(std::istream& stream, RecognizerType& value) {
return RecognizerTypeConverter::get().read(stream, value);
return RecognizerTypeConverter::get().read(stream, value);
}

View File

@ -3,16 +3,16 @@
#include "tools/EnumConverter.h"
enum class RecognizerType {
PocketSphinx,
Phonetic
PocketSphinx,
Phonetic
};
class RecognizerTypeConverter : public EnumConverter<RecognizerType> {
public:
static RecognizerTypeConverter& get();
static RecognizerTypeConverter& get();
protected:
std::string getTypeName() override;
member_data getMemberData() override;
std::string getTypeName() override;
member_data getMemberData() override;
};
std::ostream& operator<<(std::ostream& stream, RecognizerType value);

Some files were not shown because too many files have changed in this diff Show More