Indent code files with spaces rather than tabs
This commit is contained in:
parent
71259421a9
commit
b365c4c1d5
|
@ -14,17 +14,17 @@ add_subdirectory("extras/EsotericSoftwareSpine")
|
|||
|
||||
# Install misc. files
|
||||
install(
|
||||
FILES README.adoc LICENSE.md CHANGELOG.md
|
||||
DESTINATION .
|
||||
FILES README.adoc LICENSE.md CHANGELOG.md
|
||||
DESTINATION .
|
||||
)
|
||||
|
||||
# Configure CPack
|
||||
function(get_short_system_name variable)
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
|
||||
set(${variable} "macOS" PARENT_SCOPE)
|
||||
else()
|
||||
set(${variable} "${CMAKE_SYSTEM_NAME}" PARENT_SCOPE)
|
||||
endif()
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
|
||||
set(${variable} "macOS" PARENT_SCOPE)
|
||||
else()
|
||||
set(${variable} "${CMAKE_SYSTEM_NAME}" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
set(CPACK_PACKAGE_NAME ${appName})
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
cmake_minimum_required(VERSION 3.2)
|
||||
|
||||
set(afterEffectsFiles
|
||||
"Rhubarb Lip Sync.jsx"
|
||||
"README.adoc"
|
||||
"Rhubarb Lip Sync.jsx"
|
||||
"README.adoc"
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${afterEffectsFiles}
|
||||
DESTINATION "extras/AdobeAfterEffects"
|
||||
FILES ${afterEffectsFiles}
|
||||
DESTINATION "extras/AdobeAfterEffects"
|
||||
)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,18 @@
|
|||
cmake_minimum_required(VERSION 3.2)
|
||||
|
||||
add_custom_target(
|
||||
rhubarbForSpine ALL
|
||||
"./gradlew" "build"
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
COMMENT "Building Rhubarb for Spine through Gradle."
|
||||
rhubarbForSpine ALL
|
||||
"./gradlew" "build"
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
COMMENT "Building Rhubarb for Spine through Gradle."
|
||||
)
|
||||
|
||||
install(
|
||||
DIRECTORY "build/libs/"
|
||||
DESTINATION "extras/EsotericSoftwareSpine"
|
||||
DIRECTORY "build/libs/"
|
||||
DESTINATION "extras/EsotericSoftwareSpine"
|
||||
)
|
||||
|
||||
install(
|
||||
FILES README.adoc
|
||||
DESTINATION "extras/EsotericSoftwareSpine"
|
||||
FILES README.adoc
|
||||
DESTINATION "extras/EsotericSoftwareSpine"
|
||||
)
|
||||
|
|
|
@ -14,112 +14,112 @@ import tornadofx.setValue
|
|||
import java.util.concurrent.ExecutorService
|
||||
|
||||
class AnimationFileModel(val parentModel: MainModel, animationFilePath: Path, private val executor: ExecutorService) {
|
||||
val spineJson = SpineJson(animationFilePath)
|
||||
val spineJson = SpineJson(animationFilePath)
|
||||
|
||||
val slotsProperty = SimpleObjectProperty<ObservableList<String>>()
|
||||
private var slots: ObservableList<String> by slotsProperty
|
||||
val slotsProperty = SimpleObjectProperty<ObservableList<String>>()
|
||||
private var slots: ObservableList<String> by slotsProperty
|
||||
|
||||
val mouthSlotProperty: SimpleStringProperty = SimpleStringProperty().alsoListen {
|
||||
val mouthSlot = this.mouthSlot
|
||||
val mouthNaming = if (mouthSlot != null)
|
||||
MouthNaming.guess(spineJson.getSlotAttachmentNames(mouthSlot))
|
||||
else null
|
||||
this.mouthNaming = mouthNaming
|
||||
val mouthSlotProperty: SimpleStringProperty = SimpleStringProperty().alsoListen {
|
||||
val mouthSlot = this.mouthSlot
|
||||
val mouthNaming = if (mouthSlot != null)
|
||||
MouthNaming.guess(spineJson.getSlotAttachmentNames(mouthSlot))
|
||||
else null
|
||||
this.mouthNaming = mouthNaming
|
||||
|
||||
mouthShapes = if (mouthSlot != null && mouthNaming != null) {
|
||||
val mouthNames = spineJson.getSlotAttachmentNames(mouthSlot)
|
||||
MouthShape.values().filter { mouthNames.contains(mouthNaming.getName(it)) }
|
||||
} else listOf()
|
||||
mouthShapes = if (mouthSlot != null && mouthNaming != null) {
|
||||
val mouthNames = spineJson.getSlotAttachmentNames(mouthSlot)
|
||||
MouthShape.values().filter { mouthNames.contains(mouthNaming.getName(it)) }
|
||||
} else listOf()
|
||||
|
||||
mouthSlotError = if (mouthSlot != null)
|
||||
null
|
||||
else
|
||||
"No slot with mouth drawings specified."
|
||||
}
|
||||
private var mouthSlot: String? by mouthSlotProperty
|
||||
mouthSlotError = if (mouthSlot != null)
|
||||
null
|
||||
else
|
||||
"No slot with mouth drawings specified."
|
||||
}
|
||||
private var mouthSlot: String? by mouthSlotProperty
|
||||
|
||||
val mouthSlotErrorProperty = SimpleStringProperty()
|
||||
private var mouthSlotError: String? by mouthSlotErrorProperty
|
||||
val mouthSlotErrorProperty = SimpleStringProperty()
|
||||
private var mouthSlotError: String? by mouthSlotErrorProperty
|
||||
|
||||
val mouthNamingProperty = SimpleObjectProperty<MouthNaming>()
|
||||
private var mouthNaming: MouthNaming? by mouthNamingProperty
|
||||
val mouthNamingProperty = SimpleObjectProperty<MouthNaming>()
|
||||
private var mouthNaming: MouthNaming? by mouthNamingProperty
|
||||
|
||||
val mouthShapesProperty = SimpleObjectProperty<List<MouthShape>>().alsoListen {
|
||||
mouthShapesError = getMouthShapesErrorString()
|
||||
}
|
||||
var mouthShapes: List<MouthShape> by mouthShapesProperty
|
||||
private set
|
||||
val mouthShapesProperty = SimpleObjectProperty<List<MouthShape>>().alsoListen {
|
||||
mouthShapesError = getMouthShapesErrorString()
|
||||
}
|
||||
var mouthShapes: List<MouthShape> by mouthShapesProperty
|
||||
private set
|
||||
|
||||
val mouthShapesErrorProperty = SimpleStringProperty()
|
||||
private var mouthShapesError: String? by mouthShapesErrorProperty
|
||||
val mouthShapesErrorProperty = SimpleStringProperty()
|
||||
private var mouthShapesError: String? by mouthShapesErrorProperty
|
||||
|
||||
val audioFileModelsProperty = SimpleListProperty<AudioFileModel>(
|
||||
spineJson.audioEvents
|
||||
.map { event ->
|
||||
var audioFileModel: AudioFileModel? = null
|
||||
val reportResult: (List<MouthCue>) -> Unit =
|
||||
{ result -> saveAnimation(audioFileModel!!.animationName, event.name, result) }
|
||||
audioFileModel = AudioFileModel(event, this, executor, reportResult)
|
||||
return@map audioFileModel
|
||||
}
|
||||
.asObservable()
|
||||
)
|
||||
val audioFileModels: ObservableList<AudioFileModel> by audioFileModelsProperty
|
||||
val audioFileModelsProperty = SimpleListProperty<AudioFileModel>(
|
||||
spineJson.audioEvents
|
||||
.map { event ->
|
||||
var audioFileModel: AudioFileModel? = null
|
||||
val reportResult: (List<MouthCue>) -> Unit =
|
||||
{ result -> saveAnimation(audioFileModel!!.animationName, event.name, result) }
|
||||
audioFileModel = AudioFileModel(event, this, executor, reportResult)
|
||||
return@map audioFileModel
|
||||
}
|
||||
.asObservable()
|
||||
)
|
||||
val audioFileModels: ObservableList<AudioFileModel> by audioFileModelsProperty
|
||||
|
||||
val busyProperty = SimpleBooleanProperty().apply {
|
||||
bind(object : BooleanBinding() {
|
||||
init {
|
||||
for (audioFileModel in audioFileModels) {
|
||||
super.bind(audioFileModel.busyProperty)
|
||||
}
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return audioFileModels.any { it.busy }
|
||||
}
|
||||
})
|
||||
}
|
||||
val busy by busyProperty
|
||||
val busyProperty = SimpleBooleanProperty().apply {
|
||||
bind(object : BooleanBinding() {
|
||||
init {
|
||||
for (audioFileModel in audioFileModels) {
|
||||
super.bind(audioFileModel.busyProperty)
|
||||
}
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return audioFileModels.any { it.busy }
|
||||
}
|
||||
})
|
||||
}
|
||||
val busy by busyProperty
|
||||
|
||||
val validProperty = SimpleBooleanProperty().apply {
|
||||
val errorProperties = arrayOf(mouthSlotErrorProperty, mouthShapesErrorProperty)
|
||||
bind(object : BooleanBinding() {
|
||||
init {
|
||||
super.bind(*errorProperties)
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return errorProperties.all { it.value == null }
|
||||
}
|
||||
})
|
||||
}
|
||||
val validProperty = SimpleBooleanProperty().apply {
|
||||
val errorProperties = arrayOf(mouthSlotErrorProperty, mouthShapesErrorProperty)
|
||||
bind(object : BooleanBinding() {
|
||||
init {
|
||||
super.bind(*errorProperties)
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return errorProperties.all { it.value == null }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
private fun saveAnimation(animationName: String, audioEventName: String, mouthCues: List<MouthCue>) {
|
||||
spineJson.createOrUpdateAnimation(mouthCues, audioEventName, animationName, mouthSlot!!, mouthNaming!!)
|
||||
spineJson.save()
|
||||
}
|
||||
private fun saveAnimation(animationName: String, audioEventName: String, mouthCues: List<MouthCue>) {
|
||||
spineJson.createOrUpdateAnimation(mouthCues, audioEventName, animationName, mouthSlot!!, mouthNaming!!)
|
||||
spineJson.save()
|
||||
}
|
||||
|
||||
init {
|
||||
slots = spineJson.slots.asObservable()
|
||||
mouthSlot = spineJson.guessMouthSlot()
|
||||
}
|
||||
init {
|
||||
slots = spineJson.slots.asObservable()
|
||||
mouthSlot = spineJson.guessMouthSlot()
|
||||
}
|
||||
|
||||
private fun getMouthShapesErrorString(): String? {
|
||||
val missingBasicShapes = MouthShape.basicShapes
|
||||
.filter{ !mouthShapes.contains(it) }
|
||||
if (missingBasicShapes.isEmpty()) return null
|
||||
private fun getMouthShapesErrorString(): String? {
|
||||
val missingBasicShapes = MouthShape.basicShapes
|
||||
.filter{ !mouthShapes.contains(it) }
|
||||
if (missingBasicShapes.isEmpty()) return null
|
||||
|
||||
val result = StringBuilder()
|
||||
val missingShapesString = missingBasicShapes.joinToString()
|
||||
result.appendln(
|
||||
if (missingBasicShapes.count() > 1)
|
||||
"Mouth shapes $missingShapesString are missing."
|
||||
else
|
||||
"Mouth shape $missingShapesString is missing."
|
||||
)
|
||||
val result = StringBuilder()
|
||||
val missingShapesString = missingBasicShapes.joinToString()
|
||||
result.appendln(
|
||||
if (missingBasicShapes.count() > 1)
|
||||
"Mouth shapes $missingShapesString are missing."
|
||||
else
|
||||
"Mouth shape $missingShapesString is missing."
|
||||
)
|
||||
|
||||
val first = MouthShape.basicShapes.first()
|
||||
val last = MouthShape.basicShapes.last()
|
||||
result.append("At least the basic mouth shapes $first-$last need corresponding image attachments.")
|
||||
return result.toString()
|
||||
}
|
||||
val first = MouthShape.basicShapes.first()
|
||||
val last = MouthShape.basicShapes.last()
|
||||
result.append("At least the basic mouth shapes $first-$last need corresponding image attachments.")
|
||||
return result.toString()
|
||||
}
|
||||
|
||||
}
|
|
@ -16,181 +16,181 @@ import java.util.concurrent.ExecutorService
|
|||
import java.util.concurrent.Future
|
||||
|
||||
class AudioFileModel(
|
||||
audioEvent: SpineJson.AudioEvent,
|
||||
private val parentModel: AnimationFileModel,
|
||||
private val executor: ExecutorService,
|
||||
private val reportResult: (List<MouthCue>) -> Unit
|
||||
audioEvent: SpineJson.AudioEvent,
|
||||
private val parentModel: AnimationFileModel,
|
||||
private val executor: ExecutorService,
|
||||
private val reportResult: (List<MouthCue>) -> Unit
|
||||
) {
|
||||
private val spineJson = parentModel.spineJson
|
||||
private val spineJson = parentModel.spineJson
|
||||
|
||||
private val audioFilePath: Path = spineJson.audioDirectoryPath.resolve(audioEvent.relativeAudioFilePath)
|
||||
private val audioFilePath: Path = spineJson.audioDirectoryPath.resolve(audioEvent.relativeAudioFilePath)
|
||||
|
||||
val eventNameProperty = SimpleStringProperty(audioEvent.name)
|
||||
val eventName: String by eventNameProperty
|
||||
val eventNameProperty = SimpleStringProperty(audioEvent.name)
|
||||
val eventName: String by eventNameProperty
|
||||
|
||||
val displayFilePathProperty = SimpleStringProperty(audioEvent.relativeAudioFilePath)
|
||||
val displayFilePathProperty = SimpleStringProperty(audioEvent.relativeAudioFilePath)
|
||||
|
||||
val animationNameProperty = SimpleStringProperty().apply {
|
||||
val mainModel = parentModel.parentModel
|
||||
bind(object : ObjectBinding<String>() {
|
||||
init {
|
||||
super.bind(
|
||||
mainModel.animationPrefixProperty,
|
||||
eventNameProperty,
|
||||
mainModel.animationSuffixProperty
|
||||
)
|
||||
}
|
||||
override fun computeValue(): String {
|
||||
return mainModel.animationPrefix + eventName + mainModel.animationSuffix
|
||||
}
|
||||
})
|
||||
}
|
||||
val animationName: String by animationNameProperty
|
||||
val animationNameProperty = SimpleStringProperty().apply {
|
||||
val mainModel = parentModel.parentModel
|
||||
bind(object : ObjectBinding<String>() {
|
||||
init {
|
||||
super.bind(
|
||||
mainModel.animationPrefixProperty,
|
||||
eventNameProperty,
|
||||
mainModel.animationSuffixProperty
|
||||
)
|
||||
}
|
||||
override fun computeValue(): String {
|
||||
return mainModel.animationPrefix + eventName + mainModel.animationSuffix
|
||||
}
|
||||
})
|
||||
}
|
||||
val animationName: String by animationNameProperty
|
||||
|
||||
val dialogProperty = SimpleStringProperty(audioEvent.dialog)
|
||||
private val dialog: String? by dialogProperty
|
||||
val dialogProperty = SimpleStringProperty(audioEvent.dialog)
|
||||
private val dialog: String? by dialogProperty
|
||||
|
||||
val animationProgressProperty = SimpleObjectProperty<Double?>(null)
|
||||
var animationProgress: Double? by animationProgressProperty
|
||||
private set
|
||||
val animationProgressProperty = SimpleObjectProperty<Double?>(null)
|
||||
var animationProgress: Double? by animationProgressProperty
|
||||
private set
|
||||
|
||||
private val animatedProperty = SimpleBooleanProperty().apply {
|
||||
bind(object : ObjectBinding<Boolean>() {
|
||||
init {
|
||||
super.bind(animationNameProperty, parentModel.spineJson.animationNames)
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return parentModel.spineJson.animationNames.contains(animationName)
|
||||
}
|
||||
})
|
||||
}
|
||||
private var animated by animatedProperty
|
||||
private val animatedProperty = SimpleBooleanProperty().apply {
|
||||
bind(object : ObjectBinding<Boolean>() {
|
||||
init {
|
||||
super.bind(animationNameProperty, parentModel.spineJson.animationNames)
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return parentModel.spineJson.animationNames.contains(animationName)
|
||||
}
|
||||
})
|
||||
}
|
||||
private var animated by animatedProperty
|
||||
|
||||
private val futureProperty = SimpleObjectProperty<Future<*>?>()
|
||||
private var future by futureProperty
|
||||
private val futureProperty = SimpleObjectProperty<Future<*>?>()
|
||||
private var future by futureProperty
|
||||
|
||||
val audioFileStateProperty = SimpleObjectProperty<AudioFileState>().apply {
|
||||
bind(object : ObjectBinding<AudioFileState>() {
|
||||
init {
|
||||
super.bind(animatedProperty, futureProperty, animationProgressProperty)
|
||||
}
|
||||
override fun computeValue(): AudioFileState {
|
||||
return if (future != null) {
|
||||
if (animationProgress != null)
|
||||
if (future!!.isCancelled)
|
||||
AudioFileState(AudioFileStatus.Canceling)
|
||||
else
|
||||
AudioFileState(AudioFileStatus.Animating, animationProgress)
|
||||
else
|
||||
AudioFileState(AudioFileStatus.Pending)
|
||||
} else {
|
||||
if (animated)
|
||||
AudioFileState(AudioFileStatus.Done)
|
||||
else
|
||||
AudioFileState(AudioFileStatus.NotAnimated)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
val audioFileStateProperty = SimpleObjectProperty<AudioFileState>().apply {
|
||||
bind(object : ObjectBinding<AudioFileState>() {
|
||||
init {
|
||||
super.bind(animatedProperty, futureProperty, animationProgressProperty)
|
||||
}
|
||||
override fun computeValue(): AudioFileState {
|
||||
return if (future != null) {
|
||||
if (animationProgress != null)
|
||||
if (future!!.isCancelled)
|
||||
AudioFileState(AudioFileStatus.Canceling)
|
||||
else
|
||||
AudioFileState(AudioFileStatus.Animating, animationProgress)
|
||||
else
|
||||
AudioFileState(AudioFileStatus.Pending)
|
||||
} else {
|
||||
if (animated)
|
||||
AudioFileState(AudioFileStatus.Done)
|
||||
else
|
||||
AudioFileState(AudioFileStatus.NotAnimated)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
val busyProperty = SimpleBooleanProperty().apply {
|
||||
bind(object : BooleanBinding() {
|
||||
init {
|
||||
super.bind(futureProperty)
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return future != null
|
||||
}
|
||||
val busyProperty = SimpleBooleanProperty().apply {
|
||||
bind(object : BooleanBinding() {
|
||||
init {
|
||||
super.bind(futureProperty)
|
||||
}
|
||||
override fun computeValue(): Boolean {
|
||||
return future != null
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
val busy by busyProperty
|
||||
})
|
||||
}
|
||||
val busy by busyProperty
|
||||
|
||||
val actionLabelProperty = SimpleStringProperty().apply {
|
||||
bind(object : StringBinding() {
|
||||
init {
|
||||
super.bind(futureProperty)
|
||||
}
|
||||
override fun computeValue(): String {
|
||||
return if (future != null)
|
||||
"Cancel"
|
||||
else
|
||||
"Animate"
|
||||
}
|
||||
})
|
||||
}
|
||||
val actionLabelProperty = SimpleStringProperty().apply {
|
||||
bind(object : StringBinding() {
|
||||
init {
|
||||
super.bind(futureProperty)
|
||||
}
|
||||
override fun computeValue(): String {
|
||||
return if (future != null)
|
||||
"Cancel"
|
||||
else
|
||||
"Animate"
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fun performAction() {
|
||||
if (future == null) {
|
||||
if (animated) {
|
||||
Alert(Alert.AlertType.CONFIRMATION).apply {
|
||||
headerText = "Animation '$animationName' already exists."
|
||||
contentText = "Do you want to replace the existing animation?"
|
||||
val result = showAndWait()
|
||||
if (result.get() != ButtonType.OK) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
fun performAction() {
|
||||
if (future == null) {
|
||||
if (animated) {
|
||||
Alert(Alert.AlertType.CONFIRMATION).apply {
|
||||
headerText = "Animation '$animationName' already exists."
|
||||
contentText = "Do you want to replace the existing animation?"
|
||||
val result = showAndWait()
|
||||
if (result.get() != ButtonType.OK) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
startAnimation()
|
||||
} else {
|
||||
cancelAnimation()
|
||||
}
|
||||
}
|
||||
startAnimation()
|
||||
} else {
|
||||
cancelAnimation()
|
||||
}
|
||||
}
|
||||
|
||||
private fun startAnimation() {
|
||||
val wrapperTask = Runnable {
|
||||
val recognizer = parentModel.parentModel.recognizer.value
|
||||
val extendedMouthShapes = parentModel.mouthShapes.filter { it.isExtended }.toSet()
|
||||
val reportProgress: (Double?) -> Unit = {
|
||||
progress -> runAndWait { this@AudioFileModel.animationProgress = progress }
|
||||
}
|
||||
val rhubarbTask = RhubarbTask(audioFilePath, recognizer, dialog, extendedMouthShapes, reportProgress)
|
||||
try {
|
||||
try {
|
||||
val result = rhubarbTask.call()
|
||||
runAndWait {
|
||||
reportResult(result)
|
||||
}
|
||||
} finally {
|
||||
runAndWait {
|
||||
animationProgress = null
|
||||
future = null
|
||||
}
|
||||
}
|
||||
} catch (e: InterruptedException) {
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace(System.err)
|
||||
private fun startAnimation() {
|
||||
val wrapperTask = Runnable {
|
||||
val recognizer = parentModel.parentModel.recognizer.value
|
||||
val extendedMouthShapes = parentModel.mouthShapes.filter { it.isExtended }.toSet()
|
||||
val reportProgress: (Double?) -> Unit = {
|
||||
progress -> runAndWait { this@AudioFileModel.animationProgress = progress }
|
||||
}
|
||||
val rhubarbTask = RhubarbTask(audioFilePath, recognizer, dialog, extendedMouthShapes, reportProgress)
|
||||
try {
|
||||
try {
|
||||
val result = rhubarbTask.call()
|
||||
runAndWait {
|
||||
reportResult(result)
|
||||
}
|
||||
} finally {
|
||||
runAndWait {
|
||||
animationProgress = null
|
||||
future = null
|
||||
}
|
||||
}
|
||||
} catch (e: InterruptedException) {
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace(System.err)
|
||||
|
||||
Platform.runLater {
|
||||
Alert(Alert.AlertType.ERROR).apply {
|
||||
headerText = "Error performing lip sync for event '$eventName'."
|
||||
contentText = if (e is EndUserException)
|
||||
e.message
|
||||
else
|
||||
("An internal error occurred.\n"
|
||||
+ "Please report an issue, including the following information.\n"
|
||||
+ getStackTrace(e))
|
||||
show()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
future = executor.submit(wrapperTask)
|
||||
}
|
||||
Platform.runLater {
|
||||
Alert(Alert.AlertType.ERROR).apply {
|
||||
headerText = "Error performing lip sync for event '$eventName'."
|
||||
contentText = if (e is EndUserException)
|
||||
e.message
|
||||
else
|
||||
("An internal error occurred.\n"
|
||||
+ "Please report an issue, including the following information.\n"
|
||||
+ getStackTrace(e))
|
||||
show()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
future = executor.submit(wrapperTask)
|
||||
}
|
||||
|
||||
private fun cancelAnimation() {
|
||||
future?.cancel(true)
|
||||
}
|
||||
private fun cancelAnimation() {
|
||||
future?.cancel(true)
|
||||
}
|
||||
}
|
||||
|
||||
enum class AudioFileStatus {
|
||||
NotAnimated,
|
||||
Pending,
|
||||
Animating,
|
||||
Canceling,
|
||||
Done
|
||||
NotAnimated,
|
||||
Pending,
|
||||
Animating,
|
||||
Canceling,
|
||||
Done
|
||||
}
|
||||
|
||||
data class AudioFileState(val status: AudioFileStatus, val progress: Double? = null)
|
|
@ -14,67 +14,67 @@ import tornadofx.rectangle
|
|||
import tornadofx.removeFromParent
|
||||
|
||||
fun renderErrorIndicator(): Node {
|
||||
return Group().apply {
|
||||
isManaged = false
|
||||
circle {
|
||||
radius = 7.0
|
||||
fill = Color.ORANGERED
|
||||
}
|
||||
rectangle {
|
||||
x = -1.0
|
||||
y = -5.0
|
||||
width = 2.0
|
||||
height = 7.0
|
||||
fill = Color.WHITE
|
||||
}
|
||||
rectangle {
|
||||
x = -1.0
|
||||
y = 3.0
|
||||
width = 2.0
|
||||
height = 2.0
|
||||
fill = Color.WHITE
|
||||
}
|
||||
}
|
||||
return Group().apply {
|
||||
isManaged = false
|
||||
circle {
|
||||
radius = 7.0
|
||||
fill = Color.ORANGERED
|
||||
}
|
||||
rectangle {
|
||||
x = -1.0
|
||||
y = -5.0
|
||||
width = 2.0
|
||||
height = 7.0
|
||||
fill = Color.WHITE
|
||||
}
|
||||
rectangle {
|
||||
x = -1.0
|
||||
y = 3.0
|
||||
width = 2.0
|
||||
height = 2.0
|
||||
fill = Color.WHITE
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun Parent.errorProperty() : StringProperty {
|
||||
return properties.getOrPut("rhubarb.errorProperty", {
|
||||
val errorIndicator: Node = renderErrorIndicator()
|
||||
val tooltip = Tooltip()
|
||||
val property = SimpleStringProperty()
|
||||
return properties.getOrPut("rhubarb.errorProperty", {
|
||||
val errorIndicator: Node = renderErrorIndicator()
|
||||
val tooltip = Tooltip()
|
||||
val property = SimpleStringProperty()
|
||||
|
||||
fun updateTooltipVisibility() {
|
||||
if (tooltip.text.isNotEmpty() && isFocused) {
|
||||
val bounds = localToScreen(boundsInLocal)
|
||||
tooltip.show(scene.window, bounds.minX + 5, bounds.maxY + 2)
|
||||
} else {
|
||||
tooltip.hide()
|
||||
}
|
||||
}
|
||||
fun updateTooltipVisibility() {
|
||||
if (tooltip.text.isNotEmpty() && isFocused) {
|
||||
val bounds = localToScreen(boundsInLocal)
|
||||
tooltip.show(scene.window, bounds.minX + 5, bounds.maxY + 2)
|
||||
} else {
|
||||
tooltip.hide()
|
||||
}
|
||||
}
|
||||
|
||||
focusedProperty().addListener({
|
||||
_: ObservableValue<out Boolean>, _: Boolean, _: Boolean ->
|
||||
updateTooltipVisibility()
|
||||
})
|
||||
focusedProperty().addListener({
|
||||
_: ObservableValue<out Boolean>, _: Boolean, _: Boolean ->
|
||||
updateTooltipVisibility()
|
||||
})
|
||||
|
||||
property.addListener({
|
||||
_: ObservableValue<out String?>, _: String?, newValue: String? ->
|
||||
property.addListener({
|
||||
_: ObservableValue<out String?>, _: String?, newValue: String? ->
|
||||
|
||||
if (newValue != null) {
|
||||
this.addChildIfPossible(errorIndicator)
|
||||
if (newValue != null) {
|
||||
this.addChildIfPossible(errorIndicator)
|
||||
|
||||
tooltip.text = newValue
|
||||
Tooltip.install(this, tooltip)
|
||||
updateTooltipVisibility()
|
||||
} else {
|
||||
errorIndicator.removeFromParent()
|
||||
tooltip.text = newValue
|
||||
Tooltip.install(this, tooltip)
|
||||
updateTooltipVisibility()
|
||||
} else {
|
||||
errorIndicator.removeFromParent()
|
||||
|
||||
tooltip.text = ""
|
||||
tooltip.hide()
|
||||
Tooltip.uninstall(this, tooltip)
|
||||
updateTooltipVisibility()
|
||||
}
|
||||
})
|
||||
return@getOrPut property
|
||||
}) as StringProperty
|
||||
tooltip.text = ""
|
||||
tooltip.hide()
|
||||
Tooltip.uninstall(this, tooltip)
|
||||
updateTooltipVisibility()
|
||||
}
|
||||
})
|
||||
return@getOrPut property
|
||||
}) as StringProperty
|
||||
}
|
|
@ -8,18 +8,18 @@ import java.lang.reflect.Method
|
|||
import javax.swing.ImageIcon
|
||||
|
||||
class MainApp : App(MainView::class) {
|
||||
override fun start(stage: Stage) {
|
||||
super.start(stage)
|
||||
setIcon()
|
||||
}
|
||||
override fun start(stage: Stage) {
|
||||
super.start(stage)
|
||||
setIcon()
|
||||
}
|
||||
|
||||
private fun setIcon() {
|
||||
// Set icon for windows
|
||||
for (iconSize in listOf(16, 32, 48, 256)) {
|
||||
addStageIcon(Image(this.javaClass.getResourceAsStream("/icon-$iconSize.png")))
|
||||
}
|
||||
private fun setIcon() {
|
||||
// Set icon for windows
|
||||
for (iconSize in listOf(16, 32, 48, 256)) {
|
||||
addStageIcon(Image(this.javaClass.getResourceAsStream("/icon-$iconSize.png")))
|
||||
}
|
||||
|
||||
// OS X requires the dock icon to be changed separately.
|
||||
// OS X requires the dock icon to be changed separately.
|
||||
// Not all JDKs contain the class com.apple.eawt.Application, so we have to use reflection.
|
||||
val classLoader = this.javaClass.classLoader
|
||||
try {
|
||||
|
@ -37,6 +37,6 @@ class MainApp : App(MainView::class) {
|
|||
} catch (e: Exception) {
|
||||
// Works only on OS X
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -13,51 +13,51 @@ import java.nio.file.Paths
|
|||
import java.util.concurrent.ExecutorService
|
||||
|
||||
class MainModel(private val executor: ExecutorService) {
|
||||
val filePathStringProperty = SimpleStringProperty(getDefaultPathString()).alsoListen { value ->
|
||||
filePathError = getExceptionMessage {
|
||||
animationFileModel = null
|
||||
if (value.isNullOrBlank()) {
|
||||
throw EndUserException("No input file specified.")
|
||||
}
|
||||
val filePathStringProperty = SimpleStringProperty(getDefaultPathString()).alsoListen { value ->
|
||||
filePathError = getExceptionMessage {
|
||||
animationFileModel = null
|
||||
if (value.isNullOrBlank()) {
|
||||
throw EndUserException("No input file specified.")
|
||||
}
|
||||
|
||||
val path = try {
|
||||
val trimmed = value.removeSurrounding("\"")
|
||||
Paths.get(trimmed)
|
||||
} catch (e: InvalidPathException) {
|
||||
throw EndUserException("Not a valid file path.")
|
||||
}
|
||||
val path = try {
|
||||
val trimmed = value.removeSurrounding("\"")
|
||||
Paths.get(trimmed)
|
||||
} catch (e: InvalidPathException) {
|
||||
throw EndUserException("Not a valid file path.")
|
||||
}
|
||||
|
||||
if (!Files.exists(path)) {
|
||||
throw EndUserException("File does not exist.")
|
||||
}
|
||||
if (!Files.exists(path)) {
|
||||
throw EndUserException("File does not exist.")
|
||||
}
|
||||
|
||||
animationFileModel = AnimationFileModel(this, path, executor)
|
||||
}
|
||||
}
|
||||
animationFileModel = AnimationFileModel(this, path, executor)
|
||||
}
|
||||
}
|
||||
|
||||
val filePathErrorProperty = SimpleStringProperty()
|
||||
private var filePathError: String? by filePathErrorProperty
|
||||
val filePathErrorProperty = SimpleStringProperty()
|
||||
private var filePathError: String? by filePathErrorProperty
|
||||
|
||||
val animationFileModelProperty = SimpleObjectProperty<AnimationFileModel?>()
|
||||
var animationFileModel by animationFileModelProperty
|
||||
private set
|
||||
val animationFileModelProperty = SimpleObjectProperty<AnimationFileModel?>()
|
||||
var animationFileModel by animationFileModelProperty
|
||||
private set
|
||||
|
||||
val recognizersProperty = SimpleObjectProperty<ObservableList<Recognizer>>(FXCollections.observableArrayList(
|
||||
Recognizer("pocketSphinx", "PocketSphinx (use for English recordings)"),
|
||||
Recognizer("phonetic", "Phonetic (use for non-English recordings)")
|
||||
))
|
||||
private var recognizers: ObservableList<Recognizer> by recognizersProperty
|
||||
val recognizersProperty = SimpleObjectProperty<ObservableList<Recognizer>>(FXCollections.observableArrayList(
|
||||
Recognizer("pocketSphinx", "PocketSphinx (use for English recordings)"),
|
||||
Recognizer("phonetic", "Phonetic (use for non-English recordings)")
|
||||
))
|
||||
private var recognizers: ObservableList<Recognizer> by recognizersProperty
|
||||
|
||||
val recognizerProperty = SimpleObjectProperty<Recognizer>(recognizers[0])
|
||||
var recognizer: Recognizer by recognizerProperty
|
||||
val recognizerProperty = SimpleObjectProperty<Recognizer>(recognizers[0])
|
||||
var recognizer: Recognizer by recognizerProperty
|
||||
|
||||
val animationPrefixProperty = SimpleStringProperty("say_")
|
||||
var animationPrefix: String by animationPrefixProperty
|
||||
val animationPrefixProperty = SimpleStringProperty("say_")
|
||||
var animationPrefix: String by animationPrefixProperty
|
||||
|
||||
val animationSuffixProperty = SimpleStringProperty("")
|
||||
var animationSuffix: String by animationSuffixProperty
|
||||
val animationSuffixProperty = SimpleStringProperty("")
|
||||
var animationSuffix: String by animationSuffixProperty
|
||||
|
||||
private fun getDefaultPathString() = FX.application.parameters.raw.firstOrNull()
|
||||
private fun getDefaultPathString() = FX.application.parameters.raw.firstOrNull()
|
||||
}
|
||||
|
||||
class Recognizer(val value: String, val description: String)
|
||||
|
|
|
@ -23,235 +23,235 @@ import java.io.File
|
|||
import java.util.concurrent.Executors
|
||||
|
||||
class MainView : View() {
|
||||
private val executor = Executors.newSingleThreadExecutor()
|
||||
private val mainModel = MainModel(executor)
|
||||
private val executor = Executors.newSingleThreadExecutor()
|
||||
private val mainModel = MainModel(executor)
|
||||
|
||||
init {
|
||||
title = "Rhubarb Lip Sync for Spine"
|
||||
}
|
||||
init {
|
||||
title = "Rhubarb Lip Sync for Spine"
|
||||
}
|
||||
|
||||
override val root = form {
|
||||
var filePathTextField: TextField? = null
|
||||
var filePathButton: Button? = null
|
||||
override val root = form {
|
||||
var filePathTextField: TextField? = null
|
||||
var filePathButton: Button? = null
|
||||
|
||||
val fileModelProperty = mainModel.animationFileModelProperty
|
||||
val fileModelProperty = mainModel.animationFileModelProperty
|
||||
|
||||
minWidth = 800.0
|
||||
prefWidth = 1000.0
|
||||
fieldset("Settings") {
|
||||
disableProperty().bind(fileModelProperty.select { it!!.busyProperty })
|
||||
field("Spine JSON file") {
|
||||
filePathTextField = textfield {
|
||||
textProperty().bindBidirectional(mainModel.filePathStringProperty)
|
||||
errorProperty().bind(mainModel.filePathErrorProperty)
|
||||
}
|
||||
filePathButton = button("...")
|
||||
}
|
||||
field("Mouth slot") {
|
||||
combobox<String> {
|
||||
itemsProperty().bind(fileModelProperty.select { it!!.slotsProperty })
|
||||
valueProperty().bindBidirectional(fileModelProperty.select { it!!.mouthSlotProperty })
|
||||
errorProperty().bind(fileModelProperty.select { it!!.mouthSlotErrorProperty })
|
||||
}
|
||||
}
|
||||
field("Mouth naming") {
|
||||
label {
|
||||
textProperty().bind(
|
||||
fileModelProperty
|
||||
.select { it!!.mouthNamingProperty }
|
||||
.select { SimpleStringProperty(it.displayString) }
|
||||
)
|
||||
}
|
||||
}
|
||||
field("Mouth shapes") {
|
||||
hbox {
|
||||
errorProperty().bind(fileModelProperty.select { it!!.mouthShapesErrorProperty })
|
||||
gridpane {
|
||||
hgap = 10.0
|
||||
vgap = 3.0
|
||||
row {
|
||||
label("Basic:")
|
||||
for (shape in MouthShape.basicShapes) {
|
||||
renderShapeCheckbox(shape, fileModelProperty, this)
|
||||
}
|
||||
}
|
||||
row {
|
||||
label("Extended:")
|
||||
for (shape in MouthShape.extendedShapes) {
|
||||
renderShapeCheckbox(shape, fileModelProperty, this)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
field("Dialog recognizer") {
|
||||
combobox<Recognizer> {
|
||||
itemsProperty().bind(mainModel.recognizersProperty)
|
||||
this.converter = object : StringConverter<Recognizer>() {
|
||||
override fun toString(recognizer: Recognizer?): String {
|
||||
return recognizer?.description ?: ""
|
||||
}
|
||||
override fun fromString(string: String?): Recognizer {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
}
|
||||
valueProperty().bindBidirectional(mainModel.recognizerProperty)
|
||||
}
|
||||
}
|
||||
field("Animation naming") {
|
||||
textfield {
|
||||
maxWidth = 100.0
|
||||
textProperty().bindBidirectional(mainModel.animationPrefixProperty)
|
||||
}
|
||||
label("<audio event name>")
|
||||
textfield {
|
||||
maxWidth = 100.0
|
||||
textProperty().bindBidirectional(mainModel.animationSuffixProperty)
|
||||
}
|
||||
}
|
||||
}
|
||||
fieldset("Audio events") {
|
||||
tableview<AudioFileModel> {
|
||||
placeholder = Label("There are no events with associated audio files.")
|
||||
columnResizePolicy = SmartResize.POLICY
|
||||
column("Event", AudioFileModel::eventNameProperty)
|
||||
.weightedWidth(1.0)
|
||||
column("Animation name", AudioFileModel::animationNameProperty)
|
||||
.weightedWidth(1.0)
|
||||
column("Audio file", AudioFileModel::displayFilePathProperty)
|
||||
.weightedWidth(1.0)
|
||||
column("Dialog", AudioFileModel::dialogProperty).apply {
|
||||
weightedWidth(3.0)
|
||||
// Make dialog column wrap
|
||||
setCellFactory { tableColumn ->
|
||||
return@setCellFactory TableCell<AudioFileModel, String>().also { cell ->
|
||||
cell.graphic = Text().apply {
|
||||
textProperty().bind(cell.itemProperty())
|
||||
fillProperty().bind(cell.textFillProperty())
|
||||
val widthProperty = tableColumn.widthProperty()
|
||||
.minus(cell.paddingLeftProperty)
|
||||
.minus(cell.paddingRightProperty)
|
||||
wrappingWidthProperty().bind(widthProperty)
|
||||
}
|
||||
cell.prefHeight = Control.USE_COMPUTED_SIZE
|
||||
}
|
||||
}
|
||||
}
|
||||
column("Status", AudioFileModel::audioFileStateProperty).apply {
|
||||
weightedWidth(1.0)
|
||||
setCellFactory {
|
||||
return@setCellFactory object : TableCell<AudioFileModel, AudioFileState>() {
|
||||
override fun updateItem(state: AudioFileState?, empty: Boolean) {
|
||||
super.updateItem(state, empty)
|
||||
graphic = if (state != null) {
|
||||
when (state.status) {
|
||||
AudioFileStatus.NotAnimated -> Text("Not animated").apply {
|
||||
fill = Color.GRAY
|
||||
}
|
||||
AudioFileStatus.Pending,
|
||||
AudioFileStatus.Animating -> HBox().apply {
|
||||
val progress: Double? = state.progress
|
||||
val indeterminate = -1.0
|
||||
val bar = progressbar(progress ?: indeterminate) {
|
||||
maxWidth = Double.MAX_VALUE
|
||||
}
|
||||
HBox.setHgrow(bar, Priority.ALWAYS)
|
||||
hbox {
|
||||
minWidth = 30.0
|
||||
if (progress != null) {
|
||||
text("${(progress * 100).toInt()}%") {
|
||||
alignment = Pos.BASELINE_RIGHT
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AudioFileStatus.Canceling -> Text("Canceling")
|
||||
AudioFileStatus.Done -> Text("Done").apply {
|
||||
font = Font.font(font.family, FontWeight.BOLD, font.size)
|
||||
}
|
||||
}
|
||||
} else null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
column("", AudioFileModel::actionLabelProperty).apply {
|
||||
weightedWidth(1.0)
|
||||
// Show button
|
||||
setCellFactory {
|
||||
return@setCellFactory object : TableCell<AudioFileModel, String>() {
|
||||
override fun updateItem(item: String?, empty: Boolean) {
|
||||
super.updateItem(item, empty)
|
||||
graphic = if (!empty)
|
||||
Button(item).apply {
|
||||
this.maxWidth = Double.MAX_VALUE
|
||||
setOnAction {
|
||||
val audioFileModel = this@tableview.items[index]
|
||||
audioFileModel.performAction()
|
||||
}
|
||||
val invalidProperty: Property<Boolean> = fileModelProperty
|
||||
.select { it!!.validProperty }
|
||||
.select { SimpleBooleanProperty(!it) }
|
||||
disableProperty().bind(invalidProperty)
|
||||
}
|
||||
else
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
itemsProperty().bind(fileModelProperty.select { it!!.audioFileModelsProperty })
|
||||
}
|
||||
}
|
||||
minWidth = 800.0
|
||||
prefWidth = 1000.0
|
||||
fieldset("Settings") {
|
||||
disableProperty().bind(fileModelProperty.select { it!!.busyProperty })
|
||||
field("Spine JSON file") {
|
||||
filePathTextField = textfield {
|
||||
textProperty().bindBidirectional(mainModel.filePathStringProperty)
|
||||
errorProperty().bind(mainModel.filePathErrorProperty)
|
||||
}
|
||||
filePathButton = button("...")
|
||||
}
|
||||
field("Mouth slot") {
|
||||
combobox<String> {
|
||||
itemsProperty().bind(fileModelProperty.select { it!!.slotsProperty })
|
||||
valueProperty().bindBidirectional(fileModelProperty.select { it!!.mouthSlotProperty })
|
||||
errorProperty().bind(fileModelProperty.select { it!!.mouthSlotErrorProperty })
|
||||
}
|
||||
}
|
||||
field("Mouth naming") {
|
||||
label {
|
||||
textProperty().bind(
|
||||
fileModelProperty
|
||||
.select { it!!.mouthNamingProperty }
|
||||
.select { SimpleStringProperty(it.displayString) }
|
||||
)
|
||||
}
|
||||
}
|
||||
field("Mouth shapes") {
|
||||
hbox {
|
||||
errorProperty().bind(fileModelProperty.select { it!!.mouthShapesErrorProperty })
|
||||
gridpane {
|
||||
hgap = 10.0
|
||||
vgap = 3.0
|
||||
row {
|
||||
label("Basic:")
|
||||
for (shape in MouthShape.basicShapes) {
|
||||
renderShapeCheckbox(shape, fileModelProperty, this)
|
||||
}
|
||||
}
|
||||
row {
|
||||
label("Extended:")
|
||||
for (shape in MouthShape.extendedShapes) {
|
||||
renderShapeCheckbox(shape, fileModelProperty, this)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
field("Dialog recognizer") {
|
||||
combobox<Recognizer> {
|
||||
itemsProperty().bind(mainModel.recognizersProperty)
|
||||
this.converter = object : StringConverter<Recognizer>() {
|
||||
override fun toString(recognizer: Recognizer?): String {
|
||||
return recognizer?.description ?: ""
|
||||
}
|
||||
override fun fromString(string: String?): Recognizer {
|
||||
throw NotImplementedError()
|
||||
}
|
||||
}
|
||||
valueProperty().bindBidirectional(mainModel.recognizerProperty)
|
||||
}
|
||||
}
|
||||
field("Animation naming") {
|
||||
textfield {
|
||||
maxWidth = 100.0
|
||||
textProperty().bindBidirectional(mainModel.animationPrefixProperty)
|
||||
}
|
||||
label("<audio event name>")
|
||||
textfield {
|
||||
maxWidth = 100.0
|
||||
textProperty().bindBidirectional(mainModel.animationSuffixProperty)
|
||||
}
|
||||
}
|
||||
}
|
||||
fieldset("Audio events") {
|
||||
tableview<AudioFileModel> {
|
||||
placeholder = Label("There are no events with associated audio files.")
|
||||
columnResizePolicy = SmartResize.POLICY
|
||||
column("Event", AudioFileModel::eventNameProperty)
|
||||
.weightedWidth(1.0)
|
||||
column("Animation name", AudioFileModel::animationNameProperty)
|
||||
.weightedWidth(1.0)
|
||||
column("Audio file", AudioFileModel::displayFilePathProperty)
|
||||
.weightedWidth(1.0)
|
||||
column("Dialog", AudioFileModel::dialogProperty).apply {
|
||||
weightedWidth(3.0)
|
||||
// Make dialog column wrap
|
||||
setCellFactory { tableColumn ->
|
||||
return@setCellFactory TableCell<AudioFileModel, String>().also { cell ->
|
||||
cell.graphic = Text().apply {
|
||||
textProperty().bind(cell.itemProperty())
|
||||
fillProperty().bind(cell.textFillProperty())
|
||||
val widthProperty = tableColumn.widthProperty()
|
||||
.minus(cell.paddingLeftProperty)
|
||||
.minus(cell.paddingRightProperty)
|
||||
wrappingWidthProperty().bind(widthProperty)
|
||||
}
|
||||
cell.prefHeight = Control.USE_COMPUTED_SIZE
|
||||
}
|
||||
}
|
||||
}
|
||||
column("Status", AudioFileModel::audioFileStateProperty).apply {
|
||||
weightedWidth(1.0)
|
||||
setCellFactory {
|
||||
return@setCellFactory object : TableCell<AudioFileModel, AudioFileState>() {
|
||||
override fun updateItem(state: AudioFileState?, empty: Boolean) {
|
||||
super.updateItem(state, empty)
|
||||
graphic = if (state != null) {
|
||||
when (state.status) {
|
||||
AudioFileStatus.NotAnimated -> Text("Not animated").apply {
|
||||
fill = Color.GRAY
|
||||
}
|
||||
AudioFileStatus.Pending,
|
||||
AudioFileStatus.Animating -> HBox().apply {
|
||||
val progress: Double? = state.progress
|
||||
val indeterminate = -1.0
|
||||
val bar = progressbar(progress ?: indeterminate) {
|
||||
maxWidth = Double.MAX_VALUE
|
||||
}
|
||||
HBox.setHgrow(bar, Priority.ALWAYS)
|
||||
hbox {
|
||||
minWidth = 30.0
|
||||
if (progress != null) {
|
||||
text("${(progress * 100).toInt()}%") {
|
||||
alignment = Pos.BASELINE_RIGHT
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AudioFileStatus.Canceling -> Text("Canceling")
|
||||
AudioFileStatus.Done -> Text("Done").apply {
|
||||
font = Font.font(font.family, FontWeight.BOLD, font.size)
|
||||
}
|
||||
}
|
||||
} else null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
column("", AudioFileModel::actionLabelProperty).apply {
|
||||
weightedWidth(1.0)
|
||||
// Show button
|
||||
setCellFactory {
|
||||
return@setCellFactory object : TableCell<AudioFileModel, String>() {
|
||||
override fun updateItem(item: String?, empty: Boolean) {
|
||||
super.updateItem(item, empty)
|
||||
graphic = if (!empty)
|
||||
Button(item).apply {
|
||||
this.maxWidth = Double.MAX_VALUE
|
||||
setOnAction {
|
||||
val audioFileModel = this@tableview.items[index]
|
||||
audioFileModel.performAction()
|
||||
}
|
||||
val invalidProperty: Property<Boolean> = fileModelProperty
|
||||
.select { it!!.validProperty }
|
||||
.select { SimpleBooleanProperty(!it) }
|
||||
disableProperty().bind(invalidProperty)
|
||||
}
|
||||
else
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
itemsProperty().bind(fileModelProperty.select { it!!.audioFileModelsProperty })
|
||||
}
|
||||
}
|
||||
|
||||
onDragOver = EventHandler<DragEvent> { event ->
|
||||
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
|
||||
event.acceptTransferModes(TransferMode.COPY)
|
||||
event.consume()
|
||||
}
|
||||
}
|
||||
onDragDropped = EventHandler<DragEvent> { event ->
|
||||
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
|
||||
filePathTextField!!.text = event.dragboard.files.firstOrNull()?.path
|
||||
event.isDropCompleted = true
|
||||
event.consume()
|
||||
}
|
||||
}
|
||||
onDragOver = EventHandler<DragEvent> { event ->
|
||||
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
|
||||
event.acceptTransferModes(TransferMode.COPY)
|
||||
event.consume()
|
||||
}
|
||||
}
|
||||
onDragDropped = EventHandler<DragEvent> { event ->
|
||||
if (event.dragboard.hasFiles() && mainModel.animationFileModel?.busy != true) {
|
||||
filePathTextField!!.text = event.dragboard.files.firstOrNull()?.path
|
||||
event.isDropCompleted = true
|
||||
event.consume()
|
||||
}
|
||||
}
|
||||
|
||||
whenUndocked {
|
||||
executor.shutdownNow()
|
||||
}
|
||||
whenUndocked {
|
||||
executor.shutdownNow()
|
||||
}
|
||||
|
||||
filePathButton!!.onAction = EventHandler<ActionEvent> {
|
||||
val fileChooser = FileChooser().apply {
|
||||
title = "Open Spine JSON file"
|
||||
extensionFilters.addAll(
|
||||
FileChooser.ExtensionFilter("Spine JSON file (*.json)", "*.json"),
|
||||
FileChooser.ExtensionFilter("All files (*.*)", "*.*")
|
||||
)
|
||||
val lastDirectory = filePathTextField!!.text?.let { File(it).parentFile }
|
||||
if (lastDirectory != null && lastDirectory.isDirectory) {
|
||||
initialDirectory = lastDirectory
|
||||
}
|
||||
}
|
||||
val file = fileChooser.showOpenDialog(this@MainView.primaryStage)
|
||||
if (file != null) {
|
||||
filePathTextField!!.text = file.path
|
||||
}
|
||||
}
|
||||
}
|
||||
filePathButton!!.onAction = EventHandler<ActionEvent> {
|
||||
val fileChooser = FileChooser().apply {
|
||||
title = "Open Spine JSON file"
|
||||
extensionFilters.addAll(
|
||||
FileChooser.ExtensionFilter("Spine JSON file (*.json)", "*.json"),
|
||||
FileChooser.ExtensionFilter("All files (*.*)", "*.*")
|
||||
)
|
||||
val lastDirectory = filePathTextField!!.text?.let { File(it).parentFile }
|
||||
if (lastDirectory != null && lastDirectory.isDirectory) {
|
||||
initialDirectory = lastDirectory
|
||||
}
|
||||
}
|
||||
val file = fileChooser.showOpenDialog(this@MainView.primaryStage)
|
||||
if (file != null) {
|
||||
filePathTextField!!.text = file.path
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun renderShapeCheckbox(shape: MouthShape, fileModelProperty: SimpleObjectProperty<AnimationFileModel?>, parent: EventTarget) {
|
||||
parent.label {
|
||||
textProperty().bind(
|
||||
fileModelProperty
|
||||
.select { it!!.mouthShapesProperty }
|
||||
.select { mouthShapes ->
|
||||
val hairSpace = "\u200A"
|
||||
val result = shape.toString() + hairSpace + if (mouthShapes.contains(shape)) "☑" else "☐"
|
||||
return@select SimpleStringProperty(result)
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
private fun renderShapeCheckbox(shape: MouthShape, fileModelProperty: SimpleObjectProperty<AnimationFileModel?>, parent: EventTarget) {
|
||||
parent.label {
|
||||
textProperty().bind(
|
||||
fileModelProperty
|
||||
.select { it!!.mouthShapesProperty }
|
||||
.select { mouthShapes ->
|
||||
val hairSpace = "\u200A"
|
||||
val result = shape.toString() + hairSpace + if (mouthShapes.contains(shape)) "☑" else "☐"
|
||||
return@select SimpleStringProperty(result)
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,52 +4,52 @@ import java.util.*
|
|||
|
||||
class MouthNaming(private val prefix: String, private val suffix: String, private val mouthShapeCasing: MouthShapeCasing) {
|
||||
|
||||
companion object {
|
||||
fun guess(mouthNames: List<String>): MouthNaming {
|
||||
if (mouthNames.isEmpty()) {
|
||||
return MouthNaming("", "", guessMouthShapeCasing(""))
|
||||
}
|
||||
companion object {
|
||||
fun guess(mouthNames: List<String>): MouthNaming {
|
||||
if (mouthNames.isEmpty()) {
|
||||
return MouthNaming("", "", guessMouthShapeCasing(""))
|
||||
}
|
||||
|
||||
val commonPrefix = mouthNames.commonPrefix
|
||||
val commonSuffix = mouthNames.commonSuffix
|
||||
val firstMouthName = mouthNames.first()
|
||||
if (commonPrefix.length + commonSuffix.length >= firstMouthName.length) {
|
||||
return MouthNaming(commonPrefix, "", guessMouthShapeCasing(""))
|
||||
}
|
||||
val commonPrefix = mouthNames.commonPrefix
|
||||
val commonSuffix = mouthNames.commonSuffix
|
||||
val firstMouthName = mouthNames.first()
|
||||
if (commonPrefix.length + commonSuffix.length >= firstMouthName.length) {
|
||||
return MouthNaming(commonPrefix, "", guessMouthShapeCasing(""))
|
||||
}
|
||||
|
||||
val shapeName = firstMouthName.substring(
|
||||
commonPrefix.length,
|
||||
firstMouthName.length - commonSuffix.length)
|
||||
val mouthShapeCasing = guessMouthShapeCasing(shapeName)
|
||||
return MouthNaming(commonPrefix, commonSuffix, mouthShapeCasing)
|
||||
}
|
||||
val shapeName = firstMouthName.substring(
|
||||
commonPrefix.length,
|
||||
firstMouthName.length - commonSuffix.length)
|
||||
val mouthShapeCasing = guessMouthShapeCasing(shapeName)
|
||||
return MouthNaming(commonPrefix, commonSuffix, mouthShapeCasing)
|
||||
}
|
||||
|
||||
private fun guessMouthShapeCasing(shapeName: String): MouthShapeCasing {
|
||||
return if (shapeName.isBlank() || shapeName[0].isLowerCase())
|
||||
MouthShapeCasing.Lower
|
||||
else
|
||||
MouthShapeCasing.Upper
|
||||
}
|
||||
}
|
||||
private fun guessMouthShapeCasing(shapeName: String): MouthShapeCasing {
|
||||
return if (shapeName.isBlank() || shapeName[0].isLowerCase())
|
||||
MouthShapeCasing.Lower
|
||||
else
|
||||
MouthShapeCasing.Upper
|
||||
}
|
||||
}
|
||||
|
||||
fun getName(mouthShape: MouthShape): String {
|
||||
val name = if (mouthShapeCasing == MouthShapeCasing.Upper)
|
||||
mouthShape.toString()
|
||||
else
|
||||
mouthShape.toString().toLowerCase(Locale.ROOT)
|
||||
return "$prefix$name$suffix"
|
||||
}
|
||||
fun getName(mouthShape: MouthShape): String {
|
||||
val name = if (mouthShapeCasing == MouthShapeCasing.Upper)
|
||||
mouthShape.toString()
|
||||
else
|
||||
mouthShape.toString().toLowerCase(Locale.ROOT)
|
||||
return "$prefix$name$suffix"
|
||||
}
|
||||
|
||||
val displayString: String get() {
|
||||
val casing = if (mouthShapeCasing == MouthShapeCasing.Upper)
|
||||
"<UPPER-CASE SHAPE NAME>"
|
||||
else
|
||||
"<lower-case shape name>"
|
||||
return "\"$prefix$casing$suffix\""
|
||||
}
|
||||
val displayString: String get() {
|
||||
val casing = if (mouthShapeCasing == MouthShapeCasing.Upper)
|
||||
"<UPPER-CASE SHAPE NAME>"
|
||||
else
|
||||
"<lower-case shape name>"
|
||||
return "\"$prefix$casing$suffix\""
|
||||
}
|
||||
}
|
||||
|
||||
enum class MouthShapeCasing {
|
||||
Upper,
|
||||
Lower
|
||||
Upper,
|
||||
Lower
|
||||
}
|
|
@ -1,19 +1,19 @@
|
|||
package com.rhubarb_lip_sync.rhubarb_for_spine
|
||||
|
||||
enum class MouthShape {
|
||||
A, B, C, D, E, F, G, H, X;
|
||||
A, B, C, D, E, F, G, H, X;
|
||||
|
||||
val isBasic: Boolean
|
||||
get() = this.ordinal < basicShapeCount
|
||||
val isBasic: Boolean
|
||||
get() = this.ordinal < basicShapeCount
|
||||
|
||||
val isExtended: Boolean
|
||||
get() = !this.isBasic
|
||||
val isExtended: Boolean
|
||||
get() = !this.isBasic
|
||||
|
||||
companion object {
|
||||
const val basicShapeCount = 6
|
||||
companion object {
|
||||
const val basicShapeCount = 6
|
||||
|
||||
val basicShapes = MouthShape.values().take(basicShapeCount)
|
||||
val basicShapes = MouthShape.values().take(basicShapeCount)
|
||||
|
||||
val extendedShapes = MouthShape.values().drop(basicShapeCount)
|
||||
}
|
||||
val extendedShapes = MouthShape.values().drop(basicShapeCount)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,157 +10,157 @@ import java.nio.file.Path
|
|||
import java.util.concurrent.Callable
|
||||
|
||||
class RhubarbTask(
|
||||
val audioFilePath: Path,
|
||||
val recognizer: String,
|
||||
val dialog: String?,
|
||||
val extendedMouthShapes: Set<MouthShape>,
|
||||
val reportProgress: (Double?) -> Unit
|
||||
val audioFilePath: Path,
|
||||
val recognizer: String,
|
||||
val dialog: String?,
|
||||
val extendedMouthShapes: Set<MouthShape>,
|
||||
val reportProgress: (Double?) -> Unit
|
||||
) : Callable<List<MouthCue>> {
|
||||
|
||||
override fun call(): List<MouthCue> {
|
||||
if (Thread.currentThread().isInterrupted) {
|
||||
throw InterruptedException()
|
||||
}
|
||||
if (!Files.exists(audioFilePath)) {
|
||||
throw EndUserException("File '$audioFilePath' does not exist.")
|
||||
}
|
||||
override fun call(): List<MouthCue> {
|
||||
if (Thread.currentThread().isInterrupted) {
|
||||
throw InterruptedException()
|
||||
}
|
||||
if (!Files.exists(audioFilePath)) {
|
||||
throw EndUserException("File '$audioFilePath' does not exist.")
|
||||
}
|
||||
|
||||
val dialogFile = if (dialog != null) TemporaryTextFile(dialog) else null
|
||||
val outputFile = TemporaryTextFile()
|
||||
dialogFile.use { outputFile.use {
|
||||
val processBuilder = ProcessBuilder(createProcessBuilderArgs(dialogFile?.filePath)).apply {
|
||||
// See http://java-monitor.com/forum/showthread.php?t=4067
|
||||
redirectOutput(outputFile.filePath.toFile())
|
||||
}
|
||||
val process: Process = processBuilder.start()
|
||||
val stderr = BufferedReader(InputStreamReader(process.errorStream, StandardCharsets.UTF_8))
|
||||
try {
|
||||
while (true) {
|
||||
val line = stderr.interruptibleReadLine()
|
||||
val message = parseJsonObject(line)
|
||||
when (message.string("type")!!) {
|
||||
"progress" -> {
|
||||
reportProgress(message.double("value")!!)
|
||||
}
|
||||
"success" -> {
|
||||
reportProgress(1.0)
|
||||
val resultString = String(Files.readAllBytes(outputFile.filePath), StandardCharsets.UTF_8)
|
||||
return parseRhubarbResult(resultString)
|
||||
}
|
||||
"failure" -> {
|
||||
throw EndUserException(message.string("reason") ?: "Rhubarb failed without reason.")
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: InterruptedException) {
|
||||
process.destroyForcibly()
|
||||
throw e
|
||||
} catch (e: EOFException) {
|
||||
throw EndUserException("Rhubarb terminated unexpectedly.")
|
||||
} finally {
|
||||
process.waitFor()
|
||||
}
|
||||
}}
|
||||
val dialogFile = if (dialog != null) TemporaryTextFile(dialog) else null
|
||||
val outputFile = TemporaryTextFile()
|
||||
dialogFile.use { outputFile.use {
|
||||
val processBuilder = ProcessBuilder(createProcessBuilderArgs(dialogFile?.filePath)).apply {
|
||||
// See http://java-monitor.com/forum/showthread.php?t=4067
|
||||
redirectOutput(outputFile.filePath.toFile())
|
||||
}
|
||||
val process: Process = processBuilder.start()
|
||||
val stderr = BufferedReader(InputStreamReader(process.errorStream, StandardCharsets.UTF_8))
|
||||
try {
|
||||
while (true) {
|
||||
val line = stderr.interruptibleReadLine()
|
||||
val message = parseJsonObject(line)
|
||||
when (message.string("type")!!) {
|
||||
"progress" -> {
|
||||
reportProgress(message.double("value")!!)
|
||||
}
|
||||
"success" -> {
|
||||
reportProgress(1.0)
|
||||
val resultString = String(Files.readAllBytes(outputFile.filePath), StandardCharsets.UTF_8)
|
||||
return parseRhubarbResult(resultString)
|
||||
}
|
||||
"failure" -> {
|
||||
throw EndUserException(message.string("reason") ?: "Rhubarb failed without reason.")
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: InterruptedException) {
|
||||
process.destroyForcibly()
|
||||
throw e
|
||||
} catch (e: EOFException) {
|
||||
throw EndUserException("Rhubarb terminated unexpectedly.")
|
||||
} finally {
|
||||
process.waitFor()
|
||||
}
|
||||
}}
|
||||
|
||||
throw EndUserException("Audio file processing terminated in an unexpected way.")
|
||||
}
|
||||
throw EndUserException("Audio file processing terminated in an unexpected way.")
|
||||
}
|
||||
|
||||
private fun parseRhubarbResult(jsonString: String): List<MouthCue> {
|
||||
val json = parseJsonObject(jsonString)
|
||||
val mouthCues = json.array<JsonObject>("mouthCues")!!
|
||||
return mouthCues.map { mouthCue ->
|
||||
val time = mouthCue.double("start")!!
|
||||
val mouthShape = MouthShape.valueOf(mouthCue.string("value")!!)
|
||||
return@map MouthCue(time, mouthShape)
|
||||
}
|
||||
}
|
||||
private fun parseRhubarbResult(jsonString: String): List<MouthCue> {
|
||||
val json = parseJsonObject(jsonString)
|
||||
val mouthCues = json.array<JsonObject>("mouthCues")!!
|
||||
return mouthCues.map { mouthCue ->
|
||||
val time = mouthCue.double("start")!!
|
||||
val mouthShape = MouthShape.valueOf(mouthCue.string("value")!!)
|
||||
return@map MouthCue(time, mouthShape)
|
||||
}
|
||||
}
|
||||
|
||||
private val jsonParser = JsonParser.default()
|
||||
private fun parseJsonObject(jsonString: String): JsonObject {
|
||||
return jsonParser.parse(StringReader(jsonString)) as JsonObject
|
||||
}
|
||||
private val jsonParser = JsonParser.default()
|
||||
private fun parseJsonObject(jsonString: String): JsonObject {
|
||||
return jsonParser.parse(StringReader(jsonString)) as JsonObject
|
||||
}
|
||||
|
||||
private fun createProcessBuilderArgs(dialogFilePath: Path?): List<String> {
|
||||
val extendedMouthShapesString =
|
||||
if (extendedMouthShapes.any()) extendedMouthShapes.joinToString(separator = "")
|
||||
else "\"\""
|
||||
return mutableListOf(
|
||||
rhubarbBinFilePath.toString(),
|
||||
"--machineReadable",
|
||||
"--recognizer", recognizer,
|
||||
"--exportFormat", "json",
|
||||
"--extendedShapes", extendedMouthShapesString
|
||||
).apply {
|
||||
if (dialogFilePath != null) {
|
||||
addAll(listOf(
|
||||
"--dialogFile", dialogFilePath.toString()
|
||||
))
|
||||
}
|
||||
}.apply {
|
||||
add(audioFilePath.toString())
|
||||
}
|
||||
}
|
||||
private fun createProcessBuilderArgs(dialogFilePath: Path?): List<String> {
|
||||
val extendedMouthShapesString =
|
||||
if (extendedMouthShapes.any()) extendedMouthShapes.joinToString(separator = "")
|
||||
else "\"\""
|
||||
return mutableListOf(
|
||||
rhubarbBinFilePath.toString(),
|
||||
"--machineReadable",
|
||||
"--recognizer", recognizer,
|
||||
"--exportFormat", "json",
|
||||
"--extendedShapes", extendedMouthShapesString
|
||||
).apply {
|
||||
if (dialogFilePath != null) {
|
||||
addAll(listOf(
|
||||
"--dialogFile", dialogFilePath.toString()
|
||||
))
|
||||
}
|
||||
}.apply {
|
||||
add(audioFilePath.toString())
|
||||
}
|
||||
}
|
||||
|
||||
private val guiBinDirectory: Path by lazy {
|
||||
val path = urlToPath(getLocation(RhubarbTask::class.java))
|
||||
return@lazy if (Files.isDirectory(path)) path.parent else path
|
||||
}
|
||||
private val guiBinDirectory: Path by lazy {
|
||||
val path = urlToPath(getLocation(RhubarbTask::class.java))
|
||||
return@lazy if (Files.isDirectory(path)) path.parent else path
|
||||
}
|
||||
|
||||
private val rhubarbBinFilePath: Path by lazy {
|
||||
val rhubarbBinName = if (IS_OS_WINDOWS) "rhubarb.exe" else "rhubarb"
|
||||
var currentDirectory: Path? = guiBinDirectory
|
||||
while (currentDirectory != null) {
|
||||
val candidate: Path = currentDirectory.resolve(rhubarbBinName)
|
||||
if (Files.exists(candidate)) {
|
||||
return@lazy candidate
|
||||
}
|
||||
currentDirectory = currentDirectory.parent
|
||||
}
|
||||
throw EndUserException("Could not find Rhubarb Lip Sync executable '$rhubarbBinName'."
|
||||
+ " Expected to find it in '$guiBinDirectory' or any directory above.")
|
||||
}
|
||||
private val rhubarbBinFilePath: Path by lazy {
|
||||
val rhubarbBinName = if (IS_OS_WINDOWS) "rhubarb.exe" else "rhubarb"
|
||||
var currentDirectory: Path? = guiBinDirectory
|
||||
while (currentDirectory != null) {
|
||||
val candidate: Path = currentDirectory.resolve(rhubarbBinName)
|
||||
if (Files.exists(candidate)) {
|
||||
return@lazy candidate
|
||||
}
|
||||
currentDirectory = currentDirectory.parent
|
||||
}
|
||||
throw EndUserException("Could not find Rhubarb Lip Sync executable '$rhubarbBinName'."
|
||||
+ " Expected to find it in '$guiBinDirectory' or any directory above.")
|
||||
}
|
||||
|
||||
private class TemporaryTextFile(text: String = "") : AutoCloseable {
|
||||
val filePath: Path = Files.createTempFile(null, null).also {
|
||||
Files.write(it, text.toByteArray(StandardCharsets.UTF_8))
|
||||
}
|
||||
private class TemporaryTextFile(text: String = "") : AutoCloseable {
|
||||
val filePath: Path = Files.createTempFile(null, null).also {
|
||||
Files.write(it, text.toByteArray(StandardCharsets.UTF_8))
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
Files.delete(filePath)
|
||||
}
|
||||
override fun close() {
|
||||
Files.delete(filePath)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Same as readLine, but can be interrupted.
|
||||
// Note that this function handles linebreak characters differently from readLine.
|
||||
// It only consumes the first linebreak character before returning and swallows any leading
|
||||
// linebreak characters.
|
||||
// This behavior is much easier to implement and doesn't make any difference for our purposes.
|
||||
private fun BufferedReader.interruptibleReadLine(): String {
|
||||
val result = StringBuilder()
|
||||
while (true) {
|
||||
val char = interruptibleReadChar()
|
||||
if (char == '\r' || char == '\n') {
|
||||
if (result.isNotEmpty()) return result.toString()
|
||||
} else {
|
||||
result.append(char)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Same as readLine, but can be interrupted.
|
||||
// Note that this function handles linebreak characters differently from readLine.
|
||||
// It only consumes the first linebreak character before returning and swallows any leading
|
||||
// linebreak characters.
|
||||
// This behavior is much easier to implement and doesn't make any difference for our purposes.
|
||||
private fun BufferedReader.interruptibleReadLine(): String {
|
||||
val result = StringBuilder()
|
||||
while (true) {
|
||||
val char = interruptibleReadChar()
|
||||
if (char == '\r' || char == '\n') {
|
||||
if (result.isNotEmpty()) return result.toString()
|
||||
} else {
|
||||
result.append(char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun BufferedReader.interruptibleReadChar(): Char {
|
||||
while (true) {
|
||||
if (Thread.currentThread().isInterrupted) {
|
||||
throw InterruptedException()
|
||||
}
|
||||
if (ready()) {
|
||||
val result: Int = read()
|
||||
if (result == -1) {
|
||||
throw EOFException()
|
||||
}
|
||||
return result.toChar()
|
||||
}
|
||||
Thread.yield()
|
||||
}
|
||||
}
|
||||
private fun BufferedReader.interruptibleReadChar(): Char {
|
||||
while (true) {
|
||||
if (Thread.currentThread().isInterrupted) {
|
||||
throw InterruptedException()
|
||||
}
|
||||
if (ready()) {
|
||||
val result: Int = read()
|
||||
if (result == -1) {
|
||||
throw EOFException()
|
||||
}
|
||||
return result.toChar()
|
||||
}
|
||||
Thread.yield()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,157 +7,157 @@ import java.nio.file.Files
|
|||
import java.nio.file.Path
|
||||
|
||||
class SpineJson(private val filePath: Path) {
|
||||
private val fileDirectoryPath: Path = filePath.parent
|
||||
private val json: JsonObject
|
||||
private val skeleton: JsonObject
|
||||
private val fileDirectoryPath: Path = filePath.parent
|
||||
private val json: JsonObject
|
||||
private val skeleton: JsonObject
|
||||
|
||||
init {
|
||||
if (!Files.exists(filePath)) {
|
||||
throw EndUserException("File '$filePath' does not exist.")
|
||||
}
|
||||
try {
|
||||
json = Parser.default().parse(filePath.toString()) as JsonObject
|
||||
} catch (e: Exception) {
|
||||
throw EndUserException("Wrong file format. This is not a valid JSON file.")
|
||||
}
|
||||
skeleton = json.obj("skeleton") ?: throw EndUserException("JSON file is corrupted.")
|
||||
init {
|
||||
if (!Files.exists(filePath)) {
|
||||
throw EndUserException("File '$filePath' does not exist.")
|
||||
}
|
||||
try {
|
||||
json = Parser.default().parse(filePath.toString()) as JsonObject
|
||||
} catch (e: Exception) {
|
||||
throw EndUserException("Wrong file format. This is not a valid JSON file.")
|
||||
}
|
||||
skeleton = json.obj("skeleton") ?: throw EndUserException("JSON file is corrupted.")
|
||||
|
||||
validateProperties()
|
||||
}
|
||||
validateProperties()
|
||||
}
|
||||
|
||||
private fun validateProperties() {
|
||||
imagesDirectoryPath
|
||||
audioDirectoryPath
|
||||
}
|
||||
private fun validateProperties() {
|
||||
imagesDirectoryPath
|
||||
audioDirectoryPath
|
||||
}
|
||||
|
||||
private val imagesDirectoryPath: Path get() {
|
||||
val relativeImagesDirectory = skeleton.string("images")
|
||||
?: throw EndUserException("JSON file is incomplete: Images path is missing."
|
||||
+ " Make sure to check 'Nonessential data' when exporting.")
|
||||
private val imagesDirectoryPath: Path get() {
|
||||
val relativeImagesDirectory = skeleton.string("images")
|
||||
?: throw EndUserException("JSON file is incomplete: Images path is missing."
|
||||
+ " Make sure to check 'Nonessential data' when exporting.")
|
||||
|
||||
val imagesDirectoryPath = fileDirectoryPath.resolve(relativeImagesDirectory).normalize()
|
||||
if (!Files.exists(imagesDirectoryPath)) {
|
||||
throw EndUserException("Could not find images directory relative to the JSON file."
|
||||
+ " Make sure the JSON file is in the same directory as the original Spine file.")
|
||||
}
|
||||
val imagesDirectoryPath = fileDirectoryPath.resolve(relativeImagesDirectory).normalize()
|
||||
if (!Files.exists(imagesDirectoryPath)) {
|
||||
throw EndUserException("Could not find images directory relative to the JSON file."
|
||||
+ " Make sure the JSON file is in the same directory as the original Spine file.")
|
||||
}
|
||||
|
||||
return imagesDirectoryPath
|
||||
}
|
||||
return imagesDirectoryPath
|
||||
}
|
||||
|
||||
val audioDirectoryPath: Path get() {
|
||||
val relativeAudioDirectory = skeleton.string("audio")
|
||||
?: throw EndUserException("JSON file is incomplete: Audio path is missing."
|
||||
+ " Make sure to check 'Nonessential data' when exporting.")
|
||||
val audioDirectoryPath: Path get() {
|
||||
val relativeAudioDirectory = skeleton.string("audio")
|
||||
?: throw EndUserException("JSON file is incomplete: Audio path is missing."
|
||||
+ " Make sure to check 'Nonessential data' when exporting.")
|
||||
|
||||
val audioDirectoryPath = fileDirectoryPath.resolve(relativeAudioDirectory).normalize()
|
||||
if (!Files.exists(audioDirectoryPath)) {
|
||||
throw EndUserException("Could not find audio directory relative to the JSON file."
|
||||
+ " Make sure the JSON file is in the same directory as the original Spine file.")
|
||||
}
|
||||
val audioDirectoryPath = fileDirectoryPath.resolve(relativeAudioDirectory).normalize()
|
||||
if (!Files.exists(audioDirectoryPath)) {
|
||||
throw EndUserException("Could not find audio directory relative to the JSON file."
|
||||
+ " Make sure the JSON file is in the same directory as the original Spine file.")
|
||||
}
|
||||
|
||||
return audioDirectoryPath
|
||||
}
|
||||
return audioDirectoryPath
|
||||
}
|
||||
|
||||
val frameRate: Double get() {
|
||||
return skeleton.double("fps") ?: 30.0
|
||||
}
|
||||
val frameRate: Double get() {
|
||||
return skeleton.double("fps") ?: 30.0
|
||||
}
|
||||
|
||||
val slots: List<String> get() {
|
||||
val slots = json.array("slots") ?: listOf<JsonObject>()
|
||||
return slots.mapNotNull { it.string("name") }
|
||||
}
|
||||
val slots: List<String> get() {
|
||||
val slots = json.array("slots") ?: listOf<JsonObject>()
|
||||
return slots.mapNotNull { it.string("name") }
|
||||
}
|
||||
|
||||
fun guessMouthSlot(): String? {
|
||||
return slots.firstOrNull { it.contains("mouth", ignoreCase = true) }
|
||||
?: slots.firstOrNull()
|
||||
}
|
||||
fun guessMouthSlot(): String? {
|
||||
return slots.firstOrNull { it.contains("mouth", ignoreCase = true) }
|
||||
?: slots.firstOrNull()
|
||||
}
|
||||
|
||||
data class AudioEvent(val name: String, val relativeAudioFilePath: String, val dialog: String?)
|
||||
data class AudioEvent(val name: String, val relativeAudioFilePath: String, val dialog: String?)
|
||||
|
||||
val audioEvents: List<AudioEvent> get() {
|
||||
val events = json.obj("events") ?: JsonObject()
|
||||
val result = mutableListOf<AudioEvent>()
|
||||
for ((name, value) in events) {
|
||||
if (value !is JsonObject) throw EndUserException("Invalid event found.")
|
||||
val audioEvents: List<AudioEvent> get() {
|
||||
val events = json.obj("events") ?: JsonObject()
|
||||
val result = mutableListOf<AudioEvent>()
|
||||
for ((name, value) in events) {
|
||||
if (value !is JsonObject) throw EndUserException("Invalid event found.")
|
||||
|
||||
val relativeAudioFilePath = value.string("audio") ?: continue
|
||||
val relativeAudioFilePath = value.string("audio") ?: continue
|
||||
|
||||
val dialog = value.string("string")
|
||||
result.add(AudioEvent(name, relativeAudioFilePath, dialog))
|
||||
}
|
||||
return result
|
||||
}
|
||||
val dialog = value.string("string")
|
||||
result.add(AudioEvent(name, relativeAudioFilePath, dialog))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
fun getSlotAttachmentNames(slotName: String): List<String> {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
val skins: Collection<JsonObject> = when (val skinsObject = json["skins"]) {
|
||||
is JsonObject -> skinsObject.values as Collection<JsonObject>
|
||||
is JsonArray<*> -> skinsObject as Collection<JsonObject>
|
||||
else -> emptyList()
|
||||
}
|
||||
fun getSlotAttachmentNames(slotName: String): List<String> {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
val skins: Collection<JsonObject> = when (val skinsObject = json["skins"]) {
|
||||
is JsonObject -> skinsObject.values as Collection<JsonObject>
|
||||
is JsonArray<*> -> skinsObject as Collection<JsonObject>
|
||||
else -> emptyList()
|
||||
}
|
||||
|
||||
// Get attachment names for all skins
|
||||
return skins
|
||||
.flatMap { skin ->
|
||||
skin.obj(slotName)?.keys?.toList()
|
||||
?: skin.obj("attachments")?.obj(slotName)?.keys?.toList()
|
||||
?: emptyList<String>()
|
||||
}
|
||||
.distinct()
|
||||
}
|
||||
// Get attachment names for all skins
|
||||
return skins
|
||||
.flatMap { skin ->
|
||||
skin.obj(slotName)?.keys?.toList()
|
||||
?: skin.obj("attachments")?.obj(slotName)?.keys?.toList()
|
||||
?: emptyList<String>()
|
||||
}
|
||||
.distinct()
|
||||
}
|
||||
|
||||
val animationNames = observableSet<String>(
|
||||
json.obj("animations")?.map{ it.key }?.toMutableSet() ?: mutableSetOf()
|
||||
)
|
||||
val animationNames = observableSet<String>(
|
||||
json.obj("animations")?.map{ it.key }?.toMutableSet() ?: mutableSetOf()
|
||||
)
|
||||
|
||||
fun createOrUpdateAnimation(mouthCues: List<MouthCue>, eventName: String, animationName: String,
|
||||
mouthSlot: String, mouthNaming: MouthNaming
|
||||
) {
|
||||
if (!json.containsKey("animations")) {
|
||||
json["animations"] = JsonObject()
|
||||
}
|
||||
val animations: JsonObject = json.obj("animations")!!
|
||||
fun createOrUpdateAnimation(mouthCues: List<MouthCue>, eventName: String, animationName: String,
|
||||
mouthSlot: String, mouthNaming: MouthNaming
|
||||
) {
|
||||
if (!json.containsKey("animations")) {
|
||||
json["animations"] = JsonObject()
|
||||
}
|
||||
val animations: JsonObject = json.obj("animations")!!
|
||||
|
||||
// Round times to full frames. Always round down.
|
||||
// If events coincide, prefer the latest one.
|
||||
val keyframes = mutableMapOf<Int, MouthShape>()
|
||||
for (mouthCue in mouthCues) {
|
||||
val frameNumber = (mouthCue.time * frameRate).toInt()
|
||||
keyframes[frameNumber] = mouthCue.mouthShape
|
||||
}
|
||||
// Round times to full frames. Always round down.
|
||||
// If events coincide, prefer the latest one.
|
||||
val keyframes = mutableMapOf<Int, MouthShape>()
|
||||
for (mouthCue in mouthCues) {
|
||||
val frameNumber = (mouthCue.time * frameRate).toInt()
|
||||
keyframes[frameNumber] = mouthCue.mouthShape
|
||||
}
|
||||
|
||||
animations[animationName] = JsonObject().apply {
|
||||
this["slots"] = JsonObject().apply {
|
||||
this[mouthSlot] = JsonObject().apply {
|
||||
this["attachment"] = JsonArray(
|
||||
keyframes
|
||||
.toSortedMap()
|
||||
.map { (frameNumber, mouthShape) ->
|
||||
JsonObject().apply {
|
||||
this["time"] = frameNumber / frameRate
|
||||
this["name"] = mouthNaming.getName(mouthShape)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
this["events"] = JsonArray(
|
||||
JsonObject().apply {
|
||||
this["time"] = 0.0
|
||||
this["name"] = eventName
|
||||
this["string"] = ""
|
||||
}
|
||||
)
|
||||
}
|
||||
animations[animationName] = JsonObject().apply {
|
||||
this["slots"] = JsonObject().apply {
|
||||
this[mouthSlot] = JsonObject().apply {
|
||||
this["attachment"] = JsonArray(
|
||||
keyframes
|
||||
.toSortedMap()
|
||||
.map { (frameNumber, mouthShape) ->
|
||||
JsonObject().apply {
|
||||
this["time"] = frameNumber / frameRate
|
||||
this["name"] = mouthNaming.getName(mouthShape)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
this["events"] = JsonArray(
|
||||
JsonObject().apply {
|
||||
this["time"] = 0.0
|
||||
this["name"] = eventName
|
||||
this["string"] = ""
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
animationNames.add(animationName)
|
||||
}
|
||||
animationNames.add(animationName)
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return json.toJsonString(prettyPrint = true)
|
||||
}
|
||||
override fun toString(): String {
|
||||
return json.toJsonString(prettyPrint = true)
|
||||
}
|
||||
|
||||
fun save() {
|
||||
Files.write(filePath, listOf(toString()), StandardCharsets.UTF_8)
|
||||
}
|
||||
fun save() {
|
||||
Files.write(filePath, listOf(toString()), StandardCharsets.UTF_8)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,37 +24,37 @@ import java.nio.file.Paths
|
|||
* @param c The class whose location is desired.
|
||||
*/
|
||||
fun getLocation(c: Class<*>): URL {
|
||||
// Try the easy way first
|
||||
try {
|
||||
val codeSourceLocation = c.protectionDomain.codeSource.location
|
||||
if (codeSourceLocation != null) return codeSourceLocation
|
||||
} catch (e: SecurityException) {
|
||||
// Cannot access protection domain
|
||||
} catch (e: NullPointerException) {
|
||||
// Protection domain or code source is null
|
||||
}
|
||||
// Try the easy way first
|
||||
try {
|
||||
val codeSourceLocation = c.protectionDomain.codeSource.location
|
||||
if (codeSourceLocation != null) return codeSourceLocation
|
||||
} catch (e: SecurityException) {
|
||||
// Cannot access protection domain
|
||||
} catch (e: NullPointerException) {
|
||||
// Protection domain or code source is null
|
||||
}
|
||||
|
||||
// The easy way failed, so we try the hard way. We ask for the class
|
||||
// itself as a resource, then strip the class's path from the URL string,
|
||||
// leaving the base path.
|
||||
// The easy way failed, so we try the hard way. We ask for the class
|
||||
// itself as a resource, then strip the class's path from the URL string,
|
||||
// leaving the base path.
|
||||
|
||||
// Get the class's raw resource path
|
||||
val classResource = c.getResource(c.simpleName + ".class")
|
||||
?: throw Exception("Cannot find class resource.")
|
||||
// Get the class's raw resource path
|
||||
val classResource = c.getResource(c.simpleName + ".class")
|
||||
?: throw Exception("Cannot find class resource.")
|
||||
|
||||
val url = classResource.toString()
|
||||
val suffix = c.canonicalName.replace('.', '/') + ".class"
|
||||
if (!url.endsWith(suffix)) throw Exception("Malformed URL.")
|
||||
val url = classResource.toString()
|
||||
val suffix = c.canonicalName.replace('.', '/') + ".class"
|
||||
if (!url.endsWith(suffix)) throw Exception("Malformed URL.")
|
||||
|
||||
// strip the class's path from the URL string
|
||||
val base = url.substring(0, url.length - suffix.length)
|
||||
// strip the class's path from the URL string
|
||||
val base = url.substring(0, url.length - suffix.length)
|
||||
|
||||
var path = base
|
||||
var path = base
|
||||
|
||||
// remove the "jar:" prefix and "!/" suffix, if present
|
||||
if (path.startsWith("jar:")) path = path.substring(4, path.length - 2)
|
||||
// remove the "jar:" prefix and "!/" suffix, if present
|
||||
if (path.startsWith("jar:")) path = path.substring(4, path.length - 2)
|
||||
|
||||
return URL(path)
|
||||
return URL(path)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -64,29 +64,29 @@ fun getLocation(c: Class<*>): URL {
|
|||
* @return A file path suitable for use with e.g. [FileInputStream]
|
||||
*/
|
||||
fun urlToPath(url: URL): Path {
|
||||
var pathString = url.toString()
|
||||
var pathString = url.toString()
|
||||
|
||||
if (pathString.startsWith("jar:")) {
|
||||
// Remove "jar:" prefix and "!/" suffix
|
||||
val index = pathString.indexOf("!/")
|
||||
pathString = pathString.substring(4, index)
|
||||
}
|
||||
if (pathString.startsWith("jar:")) {
|
||||
// Remove "jar:" prefix and "!/" suffix
|
||||
val index = pathString.indexOf("!/")
|
||||
pathString = pathString.substring(4, index)
|
||||
}
|
||||
|
||||
try {
|
||||
if (IS_OS_WINDOWS && pathString.matches("file:[A-Za-z]:.*".toRegex())) {
|
||||
pathString = "file:/" + pathString.substring(5)
|
||||
}
|
||||
return Paths.get(URL(pathString).toURI())
|
||||
} catch (e: MalformedURLException) {
|
||||
// URL is not completely well-formed.
|
||||
} catch (e: URISyntaxException) {
|
||||
// URL is not completely well-formed.
|
||||
}
|
||||
try {
|
||||
if (IS_OS_WINDOWS && pathString.matches("file:[A-Za-z]:.*".toRegex())) {
|
||||
pathString = "file:/" + pathString.substring(5)
|
||||
}
|
||||
return Paths.get(URL(pathString).toURI())
|
||||
} catch (e: MalformedURLException) {
|
||||
// URL is not completely well-formed.
|
||||
} catch (e: URISyntaxException) {
|
||||
// URL is not completely well-formed.
|
||||
}
|
||||
|
||||
if (pathString.startsWith("file:")) {
|
||||
// Pass through the URL as-is, minus "file:" prefix
|
||||
pathString = pathString.substring(5)
|
||||
return Paths.get(pathString)
|
||||
}
|
||||
throw IllegalArgumentException("Invalid URL: $url")
|
||||
if (pathString.startsWith("file:")) {
|
||||
// Pass through the URL as-is, minus "file:" prefix
|
||||
pathString = pathString.substring(5)
|
||||
return Paths.get(pathString)
|
||||
}
|
||||
throw IllegalArgumentException("Invalid URL: $url")
|
||||
}
|
|
@ -3,5 +3,5 @@ package com.rhubarb_lip_sync.rhubarb_for_spine
|
|||
import javafx.application.Application
|
||||
|
||||
fun main(args: Array<String>) {
|
||||
Application.launch(MainApp::class.java, *args)
|
||||
Application.launch(MainApp::class.java, *args)
|
||||
}
|
||||
|
|
|
@ -8,31 +8,31 @@ import java.io.PrintWriter
|
|||
import java.io.StringWriter
|
||||
|
||||
val List<String>.commonPrefix: String get() {
|
||||
return if (isEmpty()) "" else this.reduce { result, string -> result.commonPrefixWith(string) }
|
||||
return if (isEmpty()) "" else this.reduce { result, string -> result.commonPrefixWith(string) }
|
||||
}
|
||||
|
||||
val List<String>.commonSuffix: String get() {
|
||||
return if (isEmpty()) "" else this.reduce { result, string -> result.commonSuffixWith(string) }
|
||||
return if (isEmpty()) "" else this.reduce { result, string -> result.commonSuffixWith(string) }
|
||||
}
|
||||
|
||||
fun <TValue, TProperty : Property<TValue>> TProperty.alsoListen(listener: (TValue) -> Unit) : TProperty {
|
||||
// Notify the listener of the initial value.
|
||||
// If we did this synchronously, the listener's state would have to be fully initialized the
|
||||
// moment this function is called. So calling this function during object initialization might
|
||||
// result in access to uninitialized state.
|
||||
Platform.runLater { listener(this.value) }
|
||||
// Notify the listener of the initial value.
|
||||
// If we did this synchronously, the listener's state would have to be fully initialized the
|
||||
// moment this function is called. So calling this function during object initialization might
|
||||
// result in access to uninitialized state.
|
||||
Platform.runLater { listener(this.value) }
|
||||
|
||||
addListener({ _, _, newValue -> listener(newValue)})
|
||||
return this
|
||||
addListener({ _, _, newValue -> listener(newValue)})
|
||||
return this
|
||||
}
|
||||
|
||||
fun getExceptionMessage(action: () -> Unit): String? {
|
||||
try {
|
||||
action()
|
||||
} catch (e: Exception) {
|
||||
return e.message
|
||||
}
|
||||
return null
|
||||
try {
|
||||
action()
|
||||
} catch (e: Exception) {
|
||||
return e.message
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -44,32 +44,32 @@ fun getExceptionMessage(action: () -> Unit): String? {
|
|||
* @throws Throwable An exception occurred in the run method of the Runnable
|
||||
*/
|
||||
fun runAndWait(action: () -> Unit) {
|
||||
if (Platform.isFxApplicationThread()) {
|
||||
action()
|
||||
} else {
|
||||
val lock = ReentrantLock()
|
||||
lock.withLock {
|
||||
val doneCondition = lock.newCondition()
|
||||
var throwable: Throwable? = null
|
||||
Platform.runLater {
|
||||
lock.withLock {
|
||||
try {
|
||||
action()
|
||||
} catch (e: Throwable) {
|
||||
throwable = e
|
||||
} finally {
|
||||
doneCondition.signal()
|
||||
}
|
||||
}
|
||||
}
|
||||
doneCondition.await()
|
||||
throwable?.let { throw it }
|
||||
}
|
||||
}
|
||||
if (Platform.isFxApplicationThread()) {
|
||||
action()
|
||||
} else {
|
||||
val lock = ReentrantLock()
|
||||
lock.withLock {
|
||||
val doneCondition = lock.newCondition()
|
||||
var throwable: Throwable? = null
|
||||
Platform.runLater {
|
||||
lock.withLock {
|
||||
try {
|
||||
action()
|
||||
} catch (e: Throwable) {
|
||||
throwable = e
|
||||
} finally {
|
||||
doneCondition.signal()
|
||||
}
|
||||
}
|
||||
}
|
||||
doneCondition.await()
|
||||
throwable?.let { throw it }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun getStackTrace(e: Exception): String {
|
||||
val stringWriter = StringWriter()
|
||||
e.printStackTrace(PrintWriter(stringWriter))
|
||||
return stringWriter.toString()
|
||||
val stringWriter = StringWriter()
|
||||
e.printStackTrace(PrintWriter(stringWriter))
|
||||
return stringWriter.toString()
|
||||
}
|
|
@ -7,63 +7,63 @@ import org.assertj.core.api.Assertions.assertThat
|
|||
import org.assertj.core.api.Assertions.catchThrowable
|
||||
|
||||
class SpineJsonTest {
|
||||
@Nested
|
||||
inner class `file format 3_7` {
|
||||
@Test
|
||||
fun `correctly reads valid file`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.7.json").toAbsolutePath()
|
||||
val spine = SpineJson(path)
|
||||
@Nested
|
||||
inner class `file format 3_7` {
|
||||
@Test
|
||||
fun `correctly reads valid file`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.7.json").toAbsolutePath()
|
||||
val spine = SpineJson(path)
|
||||
|
||||
assertThat(spine.audioDirectoryPath)
|
||||
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
|
||||
assertThat(spine.frameRate).isEqualTo(30.0)
|
||||
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
|
||||
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
|
||||
assertThat(spine.audioEvents).containsExactly(
|
||||
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
|
||||
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
|
||||
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
|
||||
)
|
||||
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
|
||||
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
|
||||
}
|
||||
assertThat(spine.audioDirectoryPath)
|
||||
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
|
||||
assertThat(spine.frameRate).isEqualTo(30.0)
|
||||
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
|
||||
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
|
||||
assertThat(spine.audioEvents).containsExactly(
|
||||
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
|
||||
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
|
||||
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
|
||||
)
|
||||
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
|
||||
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `throws on file without nonessential data`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.7-essential.json").toAbsolutePath()
|
||||
val throwable = catchThrowable { SpineJson(path) }
|
||||
assertThat(throwable)
|
||||
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
|
||||
}
|
||||
}
|
||||
@Test
|
||||
fun `throws on file without nonessential data`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.7-essential.json").toAbsolutePath()
|
||||
val throwable = catchThrowable { SpineJson(path) }
|
||||
assertThat(throwable)
|
||||
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
|
||||
}
|
||||
}
|
||||
|
||||
@Nested
|
||||
inner class `file format 3_8` {
|
||||
@Test
|
||||
fun `correctly reads valid file`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.8.json").toAbsolutePath()
|
||||
val spine = SpineJson(path)
|
||||
@Nested
|
||||
inner class `file format 3_8` {
|
||||
@Test
|
||||
fun `correctly reads valid file`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.8.json").toAbsolutePath()
|
||||
val spine = SpineJson(path)
|
||||
|
||||
assertThat(spine.audioDirectoryPath)
|
||||
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
|
||||
assertThat(spine.frameRate).isEqualTo(30.0)
|
||||
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
|
||||
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
|
||||
assertThat(spine.audioEvents).containsExactly(
|
||||
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
|
||||
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
|
||||
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
|
||||
)
|
||||
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
|
||||
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
|
||||
}
|
||||
assertThat(spine.audioDirectoryPath)
|
||||
.isEqualTo(Paths.get("src/test/data/jsonFiles/audio").toAbsolutePath())
|
||||
assertThat(spine.frameRate).isEqualTo(30.0)
|
||||
assertThat(spine.slots).containsExactly("legs", "torso", "head", "mouth")
|
||||
assertThat(spine.guessMouthSlot()).isEqualTo("mouth")
|
||||
assertThat(spine.audioEvents).containsExactly(
|
||||
SpineJson.AudioEvent("1-have-you-heard", "1-have-you-heard.wav", null),
|
||||
SpineJson.AudioEvent("2-it's-a-tool", "2-it's-a-tool.wav", null),
|
||||
SpineJson.AudioEvent("3-and-now-you-can", "3-and-now-you-can.wav", null)
|
||||
)
|
||||
assertThat(spine.getSlotAttachmentNames("mouth")).isEqualTo(('a'..'h').map{ "mouth_$it" })
|
||||
assertThat(spine.animationNames).containsExactly("shake_head", "walk")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `throws on file without nonessential data`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.8-essential.json").toAbsolutePath()
|
||||
val throwable = catchThrowable { SpineJson(path) }
|
||||
assertThat(throwable)
|
||||
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
|
||||
}
|
||||
}
|
||||
@Test
|
||||
fun `throws on file without nonessential data`() {
|
||||
val path = Paths.get("src/test/data/jsonFiles/matt-3.8-essential.json").toAbsolutePath()
|
||||
val throwable = catchThrowable { SpineJson(path) }
|
||||
assertThat(throwable)
|
||||
.hasMessage("JSON file is incomplete: Images path is missing. Make sure to check 'Nonessential data' when exporting.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
cmake_minimum_required(VERSION 3.2)
|
||||
|
||||
set(vegasFiles
|
||||
"Debug Rhubarb.cs"
|
||||
"Debug Rhubarb.cs.config"
|
||||
"Import Rhubarb.cs"
|
||||
"Import Rhubarb.cs.config"
|
||||
"README.adoc"
|
||||
"Debug Rhubarb.cs"
|
||||
"Debug Rhubarb.cs.config"
|
||||
"Import Rhubarb.cs"
|
||||
"Import Rhubarb.cs.config"
|
||||
"README.adoc"
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${vegasFiles}
|
||||
DESTINATION "extras/MagixVegas"
|
||||
FILES ${vegasFiles}
|
||||
DESTINATION "extras/MagixVegas"
|
||||
)
|
||||
|
|
|
@ -17,329 +17,329 @@ using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas
|
|||
using Region = ScriptPortal.Vegas.Region; // For older versions, this should say Sony.Vegas.Region
|
||||
|
||||
public class EntryPoint {
|
||||
public void FromVegas(Vegas vegas) {
|
||||
Config config = Config.Load();
|
||||
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
|
||||
importDialog.ShowDialog();
|
||||
config.Save();
|
||||
}
|
||||
public void FromVegas(Vegas vegas) {
|
||||
Config config = Config.Load();
|
||||
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
|
||||
importDialog.ShowDialog();
|
||||
config.Save();
|
||||
}
|
||||
|
||||
private void Import(Config config, Vegas vegas) {
|
||||
Project project = vegas.Project;
|
||||
private void Import(Config config, Vegas vegas) {
|
||||
Project project = vegas.Project;
|
||||
|
||||
// Clear markers and regions
|
||||
if (config.ClearMarkers) {
|
||||
project.Markers.Clear();
|
||||
}
|
||||
if (config.ClearRegions) {
|
||||
project.Regions.Clear();
|
||||
}
|
||||
// Clear markers and regions
|
||||
if (config.ClearMarkers) {
|
||||
project.Markers.Clear();
|
||||
}
|
||||
if (config.ClearRegions) {
|
||||
project.Regions.Clear();
|
||||
}
|
||||
|
||||
// Load log file
|
||||
if (!File.Exists(config.LogFile)) {
|
||||
throw new Exception("Log file does not exist.");
|
||||
}
|
||||
Dictionary<EventType, List<TimedEvent>> timedEvents = ParseLogFile(config);
|
||||
// Load log file
|
||||
if (!File.Exists(config.LogFile)) {
|
||||
throw new Exception("Log file does not exist.");
|
||||
}
|
||||
Dictionary<EventType, List<TimedEvent>> timedEvents = ParseLogFile(config);
|
||||
|
||||
// Add markers/regions
|
||||
foreach (EventType eventType in timedEvents.Keys) {
|
||||
foreach (Visualization visualization in config.Visualizations) {
|
||||
if (visualization.EventType != eventType) continue;
|
||||
// Add markers/regions
|
||||
foreach (EventType eventType in timedEvents.Keys) {
|
||||
foreach (Visualization visualization in config.Visualizations) {
|
||||
if (visualization.EventType != eventType) continue;
|
||||
|
||||
List<TimedEvent> filteredEvents = FilterEvents(timedEvents[eventType], visualization.Regex);
|
||||
foreach (TimedEvent timedEvent in filteredEvents) {
|
||||
Timecode start = Timecode.FromSeconds(timedEvent.Start);
|
||||
Timecode end = Timecode.FromSeconds(timedEvent.End);
|
||||
Timecode length = end - start;
|
||||
if (config.LoopRegionOnly) {
|
||||
Timecode loopRegionStart = vegas.Transport.LoopRegionStart;
|
||||
Timecode loopRegionEnd = loopRegionStart + vegas.Transport.LoopRegionLength;
|
||||
if (start < loopRegionStart || start > loopRegionEnd || end < loopRegionStart || end > loopRegionEnd) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch (visualization.VisualizationType) {
|
||||
case VisualizationType.Marker:
|
||||
project.Markers.Add(new Marker(start, timedEvent.Value));
|
||||
break;
|
||||
case VisualizationType.Region:
|
||||
project.Regions.Add(new Region(start, length, timedEvent.Value));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
List<TimedEvent> filteredEvents = FilterEvents(timedEvents[eventType], visualization.Regex);
|
||||
foreach (TimedEvent timedEvent in filteredEvents) {
|
||||
Timecode start = Timecode.FromSeconds(timedEvent.Start);
|
||||
Timecode end = Timecode.FromSeconds(timedEvent.End);
|
||||
Timecode length = end - start;
|
||||
if (config.LoopRegionOnly) {
|
||||
Timecode loopRegionStart = vegas.Transport.LoopRegionStart;
|
||||
Timecode loopRegionEnd = loopRegionStart + vegas.Transport.LoopRegionLength;
|
||||
if (start < loopRegionStart || start > loopRegionEnd || end < loopRegionStart || end > loopRegionEnd) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch (visualization.VisualizationType) {
|
||||
case VisualizationType.Marker:
|
||||
project.Markers.Add(new Marker(start, timedEvent.Value));
|
||||
break;
|
||||
case VisualizationType.Region:
|
||||
project.Regions.Add(new Region(start, length, timedEvent.Value));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<TimedEvent> FilterEvents(List<TimedEvent> timedEvents, Regex filterRegex) {
|
||||
if (filterRegex == null) return timedEvents;
|
||||
private List<TimedEvent> FilterEvents(List<TimedEvent> timedEvents, Regex filterRegex) {
|
||||
if (filterRegex == null) return timedEvents;
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
Dictionary<int, TimedEvent> timedEventsByCharPosition = new Dictionary<int, TimedEvent>();
|
||||
foreach (TimedEvent timedEvent in timedEvents) {
|
||||
string inAngleBrackets = "<" + timedEvent.Value + ">";
|
||||
for (int charPosition = stringBuilder.Length;
|
||||
charPosition < stringBuilder.Length + inAngleBrackets.Length;
|
||||
charPosition++) {
|
||||
timedEventsByCharPosition[charPosition] = timedEvent;
|
||||
}
|
||||
stringBuilder.Append(inAngleBrackets);
|
||||
}
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
Dictionary<int, TimedEvent> timedEventsByCharPosition = new Dictionary<int, TimedEvent>();
|
||||
foreach (TimedEvent timedEvent in timedEvents) {
|
||||
string inAngleBrackets = "<" + timedEvent.Value + ">";
|
||||
for (int charPosition = stringBuilder.Length;
|
||||
charPosition < stringBuilder.Length + inAngleBrackets.Length;
|
||||
charPosition++) {
|
||||
timedEventsByCharPosition[charPosition] = timedEvent;
|
||||
}
|
||||
stringBuilder.Append(inAngleBrackets);
|
||||
}
|
||||
|
||||
MatchCollection matches = filterRegex.Matches(stringBuilder.ToString());
|
||||
List<TimedEvent> result = new List<TimedEvent>();
|
||||
foreach (Match match in matches) {
|
||||
if (match.Length == 0) continue;
|
||||
MatchCollection matches = filterRegex.Matches(stringBuilder.ToString());
|
||||
List<TimedEvent> result = new List<TimedEvent>();
|
||||
foreach (Match match in matches) {
|
||||
if (match.Length == 0) continue;
|
||||
|
||||
for (int charPosition = match.Index; charPosition < match.Index + match.Length; charPosition++) {
|
||||
TimedEvent matchedEvent = timedEventsByCharPosition[charPosition];
|
||||
if (!result.Contains(matchedEvent)) {
|
||||
result.Add(matchedEvent);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
for (int charPosition = match.Index; charPosition < match.Index + match.Length; charPosition++) {
|
||||
TimedEvent matchedEvent = timedEventsByCharPosition[charPosition];
|
||||
if (!result.Contains(matchedEvent)) {
|
||||
result.Add(matchedEvent);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Dictionary<EventType, List<TimedEvent>> ParseLogFile(Config config) {
|
||||
string[] lines = File.ReadAllLines(config.LogFile);
|
||||
Regex structuredLogLine = new Regex(@"##(\w+)\[(\d*\.\d*)-(\d*\.\d*)\]: (.*)");
|
||||
Dictionary<EventType, List<TimedEvent>> timedEvents = new Dictionary<EventType, List<TimedEvent>>();
|
||||
foreach (string line in lines) {
|
||||
Match match = structuredLogLine.Match(line);
|
||||
if (!match.Success) continue;
|
||||
private static Dictionary<EventType, List<TimedEvent>> ParseLogFile(Config config) {
|
||||
string[] lines = File.ReadAllLines(config.LogFile);
|
||||
Regex structuredLogLine = new Regex(@"##(\w+)\[(\d*\.\d*)-(\d*\.\d*)\]: (.*)");
|
||||
Dictionary<EventType, List<TimedEvent>> timedEvents = new Dictionary<EventType, List<TimedEvent>>();
|
||||
foreach (string line in lines) {
|
||||
Match match = structuredLogLine.Match(line);
|
||||
if (!match.Success) continue;
|
||||
|
||||
EventType eventType = (EventType) Enum.Parse(typeof(EventType), match.Groups[1].Value, true);
|
||||
double start = double.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
|
||||
double end = double.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture);
|
||||
string value = match.Groups[4].Value;
|
||||
EventType eventType = (EventType) Enum.Parse(typeof(EventType), match.Groups[1].Value, true);
|
||||
double start = double.Parse(match.Groups[2].Value, CultureInfo.InvariantCulture);
|
||||
double end = double.Parse(match.Groups[3].Value, CultureInfo.InvariantCulture);
|
||||
string value = match.Groups[4].Value;
|
||||
|
||||
if (!timedEvents.ContainsKey(eventType)) {
|
||||
timedEvents[eventType] = new List<TimedEvent>();
|
||||
}
|
||||
timedEvents[eventType].Add(new TimedEvent(eventType, start, end, value));
|
||||
}
|
||||
return timedEvents;
|
||||
}
|
||||
if (!timedEvents.ContainsKey(eventType)) {
|
||||
timedEvents[eventType] = new List<TimedEvent>();
|
||||
}
|
||||
timedEvents[eventType].Add(new TimedEvent(eventType, start, end, value));
|
||||
}
|
||||
return timedEvents;
|
||||
}
|
||||
}
|
||||
|
||||
public class TimedEvent {
|
||||
private readonly EventType eventType;
|
||||
private readonly double start;
|
||||
private readonly double end;
|
||||
private readonly string value;
|
||||
private readonly EventType eventType;
|
||||
private readonly double start;
|
||||
private readonly double end;
|
||||
private readonly string value;
|
||||
|
||||
public TimedEvent(EventType eventType, double start, double end, string value) {
|
||||
this.eventType = eventType;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.value = value;
|
||||
}
|
||||
public TimedEvent(EventType eventType, double start, double end, string value) {
|
||||
this.eventType = eventType;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public EventType EventType {
|
||||
get { return eventType; }
|
||||
}
|
||||
public EventType EventType {
|
||||
get { return eventType; }
|
||||
}
|
||||
|
||||
public double Start {
|
||||
get { return start; }
|
||||
}
|
||||
public double Start {
|
||||
get { return start; }
|
||||
}
|
||||
|
||||
public double End {
|
||||
get { return end; }
|
||||
}
|
||||
public double End {
|
||||
get { return end; }
|
||||
}
|
||||
|
||||
public string Value {
|
||||
get { return value; }
|
||||
}
|
||||
public string Value {
|
||||
get { return value; }
|
||||
}
|
||||
}
|
||||
|
||||
public class Config {
|
||||
private string logFile;
|
||||
private bool clearMarkers;
|
||||
private bool clearRegions;
|
||||
private bool loopRegionOnly;
|
||||
private List<Visualization> visualizations = new List<Visualization>();
|
||||
private string logFile;
|
||||
private bool clearMarkers;
|
||||
private bool clearRegions;
|
||||
private bool loopRegionOnly;
|
||||
private List<Visualization> visualizations = new List<Visualization>();
|
||||
|
||||
[DisplayName("Log File")]
|
||||
[Description("A log file generated by Rhubarb Lip Sync.")]
|
||||
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
|
||||
public string LogFile {
|
||||
get { return logFile; }
|
||||
set { logFile = value; }
|
||||
}
|
||||
[DisplayName("Log File")]
|
||||
[Description("A log file generated by Rhubarb Lip Sync.")]
|
||||
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
|
||||
public string LogFile {
|
||||
get { return logFile; }
|
||||
set { logFile = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Clear Markers")]
|
||||
[Description("Clear all markers in the current project.")]
|
||||
public bool ClearMarkers {
|
||||
get { return clearMarkers; }
|
||||
set { clearMarkers = value; }
|
||||
}
|
||||
[DisplayName("Clear Markers")]
|
||||
[Description("Clear all markers in the current project.")]
|
||||
public bool ClearMarkers {
|
||||
get { return clearMarkers; }
|
||||
set { clearMarkers = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Clear Regions")]
|
||||
[Description("Clear all regions in the current project.")]
|
||||
public bool ClearRegions {
|
||||
get { return clearRegions; }
|
||||
set { clearRegions = value; }
|
||||
}
|
||||
[DisplayName("Clear Regions")]
|
||||
[Description("Clear all regions in the current project.")]
|
||||
public bool ClearRegions {
|
||||
get { return clearRegions; }
|
||||
set { clearRegions = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Loop region only")]
|
||||
[Description("Adds regions or markers to the loop region only.")]
|
||||
public bool LoopRegionOnly {
|
||||
get { return loopRegionOnly; }
|
||||
set { loopRegionOnly = value; }
|
||||
}
|
||||
[DisplayName("Loop region only")]
|
||||
[Description("Adds regions or markers to the loop region only.")]
|
||||
public bool LoopRegionOnly {
|
||||
get { return loopRegionOnly; }
|
||||
set { loopRegionOnly = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Visualization rules")]
|
||||
[Description("Specify how to visualize various log events.")]
|
||||
[Editor(typeof(CollectionEditor), typeof(UITypeEditor))]
|
||||
[XmlIgnore]
|
||||
public List<Visualization> Visualizations {
|
||||
get { return visualizations; }
|
||||
set { visualizations = value; }
|
||||
}
|
||||
[DisplayName("Visualization rules")]
|
||||
[Description("Specify how to visualize various log events.")]
|
||||
[Editor(typeof(CollectionEditor), typeof(UITypeEditor))]
|
||||
[XmlIgnore]
|
||||
public List<Visualization> Visualizations {
|
||||
get { return visualizations; }
|
||||
set { visualizations = value; }
|
||||
}
|
||||
|
||||
[Browsable(false)]
|
||||
public Visualization[] VisualizationArray {
|
||||
get { return visualizations.ToArray(); }
|
||||
set { visualizations = new List<Visualization>(value); }
|
||||
}
|
||||
[Browsable(false)]
|
||||
public Visualization[] VisualizationArray {
|
||||
get { return visualizations.ToArray(); }
|
||||
set { visualizations = new List<Visualization>(value); }
|
||||
}
|
||||
|
||||
private static string ConfigFileName {
|
||||
get {
|
||||
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
|
||||
return Path.Combine(folder, "DebugRhubarbSettings.xml");
|
||||
}
|
||||
}
|
||||
private static string ConfigFileName {
|
||||
get {
|
||||
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
|
||||
return Path.Combine(folder, "DebugRhubarbSettings.xml");
|
||||
}
|
||||
}
|
||||
|
||||
public static Config Load() {
|
||||
try {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (FileStream file = File.OpenRead(ConfigFileName)) {
|
||||
return (Config) serializer.Deserialize(file);
|
||||
}
|
||||
} catch (Exception) {
|
||||
return new Config();
|
||||
}
|
||||
}
|
||||
public static Config Load() {
|
||||
try {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (FileStream file = File.OpenRead(ConfigFileName)) {
|
||||
return (Config) serializer.Deserialize(file);
|
||||
}
|
||||
} catch (Exception) {
|
||||
return new Config();
|
||||
}
|
||||
}
|
||||
|
||||
public void Save() {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (StreamWriter file = File.CreateText(ConfigFileName)) {
|
||||
XmlWriterSettings settings = new XmlWriterSettings();
|
||||
settings.Indent = true;
|
||||
settings.IndentChars = "\t";
|
||||
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
|
||||
serializer.Serialize(writer, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Save() {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (StreamWriter file = File.CreateText(ConfigFileName)) {
|
||||
XmlWriterSettings settings = new XmlWriterSettings();
|
||||
settings.Indent = true;
|
||||
settings.IndentChars = "\t";
|
||||
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
|
||||
serializer.Serialize(writer, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class Visualization {
|
||||
private EventType eventType;
|
||||
private string regexString;
|
||||
private VisualizationType visualizationType = VisualizationType.Marker;
|
||||
private EventType eventType;
|
||||
private string regexString;
|
||||
private VisualizationType visualizationType = VisualizationType.Marker;
|
||||
|
||||
[DisplayName("Event Type")]
|
||||
[Description("The type of event to visualize.")]
|
||||
public EventType EventType {
|
||||
get { return eventType; }
|
||||
set { eventType = value; }
|
||||
}
|
||||
[DisplayName("Event Type")]
|
||||
[Description("The type of event to visualize.")]
|
||||
public EventType EventType {
|
||||
get { return eventType; }
|
||||
set { eventType = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Regular Expression")]
|
||||
[Description("A regular expression used to filter events. Leave empty to disable filtering.\nInput is a string of events in angle brackets. Example: '<AO>(?=<T>)' finds every AO phone followed by a T phone.")]
|
||||
public string RegexString {
|
||||
get { return regexString; }
|
||||
set { regexString = value; }
|
||||
}
|
||||
[DisplayName("Regular Expression")]
|
||||
[Description("A regular expression used to filter events. Leave empty to disable filtering.\nInput is a string of events in angle brackets. Example: '<AO>(?=<T>)' finds every AO phone followed by a T phone.")]
|
||||
public string RegexString {
|
||||
get { return regexString; }
|
||||
set { regexString = value; }
|
||||
}
|
||||
|
||||
[Browsable(false)]
|
||||
public Regex Regex {
|
||||
get { return string.IsNullOrEmpty(RegexString) ? null : new Regex(RegexString); }
|
||||
}
|
||||
[Browsable(false)]
|
||||
public Regex Regex {
|
||||
get { return string.IsNullOrEmpty(RegexString) ? null : new Regex(RegexString); }
|
||||
}
|
||||
|
||||
[DisplayName("Visualization Type")]
|
||||
[Description("Specify how to visualize events.")]
|
||||
public VisualizationType VisualizationType {
|
||||
get { return visualizationType; }
|
||||
set { visualizationType = value; }
|
||||
}
|
||||
[DisplayName("Visualization Type")]
|
||||
[Description("Specify how to visualize events.")]
|
||||
public VisualizationType VisualizationType {
|
||||
get { return visualizationType; }
|
||||
set { visualizationType = value; }
|
||||
}
|
||||
|
||||
public override string ToString() {
|
||||
return string.Format("{0} -> {1}", EventType, VisualizationType);
|
||||
}
|
||||
public override string ToString() {
|
||||
return string.Format("{0} -> {1}", EventType, VisualizationType);
|
||||
}
|
||||
}
|
||||
|
||||
public enum EventType {
|
||||
Utterance,
|
||||
Word,
|
||||
RawPhone,
|
||||
Phone,
|
||||
Shape,
|
||||
Segment
|
||||
Utterance,
|
||||
Word,
|
||||
RawPhone,
|
||||
Phone,
|
||||
Shape,
|
||||
Segment
|
||||
}
|
||||
|
||||
public enum VisualizationType {
|
||||
None,
|
||||
Marker,
|
||||
Region
|
||||
None,
|
||||
Marker,
|
||||
Region
|
||||
}
|
||||
|
||||
public delegate void ImportAction();
|
||||
|
||||
public class ImportDialog : Form {
|
||||
private readonly Config config;
|
||||
private readonly ImportAction import;
|
||||
private readonly Config config;
|
||||
private readonly ImportAction import;
|
||||
|
||||
public ImportDialog(Config config, ImportAction import) {
|
||||
this.config = config;
|
||||
this.import = import;
|
||||
SuspendLayout();
|
||||
InitializeComponent();
|
||||
ResumeLayout(false);
|
||||
}
|
||||
public ImportDialog(Config config, ImportAction import) {
|
||||
this.config = config;
|
||||
this.import = import;
|
||||
SuspendLayout();
|
||||
InitializeComponent();
|
||||
ResumeLayout(false);
|
||||
}
|
||||
|
||||
private void InitializeComponent() {
|
||||
// Configure dialog
|
||||
Text = "Debug Rhubarb";
|
||||
Size = new Size(600, 400);
|
||||
Font = new Font(Font.FontFamily, 10);
|
||||
private void InitializeComponent() {
|
||||
// Configure dialog
|
||||
Text = "Debug Rhubarb";
|
||||
Size = new Size(600, 400);
|
||||
Font = new Font(Font.FontFamily, 10);
|
||||
|
||||
// Add property grid
|
||||
PropertyGrid propertyGrid1 = new PropertyGrid();
|
||||
propertyGrid1.SelectedObject = config;
|
||||
Controls.Add(propertyGrid1);
|
||||
propertyGrid1.Dock = DockStyle.Fill;
|
||||
// Add property grid
|
||||
PropertyGrid propertyGrid1 = new PropertyGrid();
|
||||
propertyGrid1.SelectedObject = config;
|
||||
Controls.Add(propertyGrid1);
|
||||
propertyGrid1.Dock = DockStyle.Fill;
|
||||
|
||||
// Add button panel
|
||||
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
|
||||
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
|
||||
buttonPanel.AutoSize = true;
|
||||
buttonPanel.Dock = DockStyle.Bottom;
|
||||
Controls.Add(buttonPanel);
|
||||
// Add button panel
|
||||
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
|
||||
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
|
||||
buttonPanel.AutoSize = true;
|
||||
buttonPanel.Dock = DockStyle.Bottom;
|
||||
Controls.Add(buttonPanel);
|
||||
|
||||
// Add Cancel button
|
||||
Button cancelButton1 = new Button();
|
||||
cancelButton1.Text = "Cancel";
|
||||
cancelButton1.DialogResult = DialogResult.Cancel;
|
||||
buttonPanel.Controls.Add(cancelButton1);
|
||||
CancelButton = cancelButton1;
|
||||
// Add Cancel button
|
||||
Button cancelButton1 = new Button();
|
||||
cancelButton1.Text = "Cancel";
|
||||
cancelButton1.DialogResult = DialogResult.Cancel;
|
||||
buttonPanel.Controls.Add(cancelButton1);
|
||||
CancelButton = cancelButton1;
|
||||
|
||||
// Add OK button
|
||||
Button okButton1 = new Button();
|
||||
okButton1.Text = "OK";
|
||||
okButton1.Click += OkButtonClickedHandler;
|
||||
buttonPanel.Controls.Add(okButton1);
|
||||
AcceptButton = okButton1;
|
||||
}
|
||||
// Add OK button
|
||||
Button okButton1 = new Button();
|
||||
okButton1.Text = "OK";
|
||||
okButton1.Click += OkButtonClickedHandler;
|
||||
buttonPanel.Controls.Add(okButton1);
|
||||
AcceptButton = okButton1;
|
||||
}
|
||||
|
||||
private void OkButtonClickedHandler(object sender, EventArgs e) {
|
||||
try {
|
||||
import();
|
||||
DialogResult = DialogResult.OK;
|
||||
} catch (Exception exception) {
|
||||
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
private void OkButtonClickedHandler(object sender, EventArgs e) {
|
||||
try {
|
||||
import();
|
||||
DialogResult = DialogResult.OK;
|
||||
} catch (Exception exception) {
|
||||
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -14,161 +14,161 @@ using System.Xml.Serialization;
|
|||
using ScriptPortal.Vegas; // For older versions, this should say Sony.Vegas
|
||||
|
||||
public class EntryPoint {
|
||||
public void FromVegas(Vegas vegas) {
|
||||
Config config = Config.Load();
|
||||
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
|
||||
importDialog.ShowDialog();
|
||||
config.Save();
|
||||
}
|
||||
public void FromVegas(Vegas vegas) {
|
||||
Config config = Config.Load();
|
||||
ImportDialog importDialog = new ImportDialog(config, delegate { Import(config, vegas); });
|
||||
importDialog.ShowDialog();
|
||||
config.Save();
|
||||
}
|
||||
|
||||
private void Import(Config config, Vegas vegas) {
|
||||
// Load XML file
|
||||
if (!File.Exists(config.XmlFile)) {
|
||||
throw new Exception("XML file does not exist.");
|
||||
}
|
||||
XmlDocument xmlDocument = new XmlDocument();
|
||||
xmlDocument.Load(config.XmlFile);
|
||||
private void Import(Config config, Vegas vegas) {
|
||||
// Load XML file
|
||||
if (!File.Exists(config.XmlFile)) {
|
||||
throw new Exception("XML file does not exist.");
|
||||
}
|
||||
XmlDocument xmlDocument = new XmlDocument();
|
||||
xmlDocument.Load(config.XmlFile);
|
||||
|
||||
// Determine image file names
|
||||
XmlNodeList mouthCueElements = xmlDocument.SelectNodes("//mouthCue");
|
||||
List<string> shapeNames = new List<string>();
|
||||
foreach (XmlElement mouthCueElement in mouthCueElements) {
|
||||
if (!shapeNames.Contains(mouthCueElement.InnerText)) {
|
||||
shapeNames.Add(mouthCueElement.InnerText);
|
||||
}
|
||||
}
|
||||
Dictionary<string, string> imageFileNames = GetImageFileNames(config.OneImageFile, shapeNames.ToArray());
|
||||
// Determine image file names
|
||||
XmlNodeList mouthCueElements = xmlDocument.SelectNodes("//mouthCue");
|
||||
List<string> shapeNames = new List<string>();
|
||||
foreach (XmlElement mouthCueElement in mouthCueElements) {
|
||||
if (!shapeNames.Contains(mouthCueElement.InnerText)) {
|
||||
shapeNames.Add(mouthCueElement.InnerText);
|
||||
}
|
||||
}
|
||||
Dictionary<string, string> imageFileNames = GetImageFileNames(config.OneImageFile, shapeNames.ToArray());
|
||||
|
||||
// Create new project
|
||||
bool promptSave = !config.DiscardChanges;
|
||||
bool showDialog = false;
|
||||
Project project = new Project(promptSave, showDialog);
|
||||
// Create new project
|
||||
bool promptSave = !config.DiscardChanges;
|
||||
bool showDialog = false;
|
||||
Project project = new Project(promptSave, showDialog);
|
||||
|
||||
// Set frame size
|
||||
Bitmap testImage = new Bitmap(config.OneImageFile);
|
||||
project.Video.Width = testImage.Width;
|
||||
project.Video.Height = testImage.Height;
|
||||
// Set frame size
|
||||
Bitmap testImage = new Bitmap(config.OneImageFile);
|
||||
project.Video.Width = testImage.Width;
|
||||
project.Video.Height = testImage.Height;
|
||||
|
||||
// Set frame rate
|
||||
if (config.FrameRate < 0.1 || config.FrameRate > 100) {
|
||||
throw new Exception("Invalid frame rate.");
|
||||
}
|
||||
project.Video.FrameRate = config.FrameRate;
|
||||
// Set frame rate
|
||||
if (config.FrameRate < 0.1 || config.FrameRate > 100) {
|
||||
throw new Exception("Invalid frame rate.");
|
||||
}
|
||||
project.Video.FrameRate = config.FrameRate;
|
||||
|
||||
// Set other video settings
|
||||
project.Video.FieldOrder = VideoFieldOrder.ProgressiveScan;
|
||||
project.Video.PixelAspectRatio = 1;
|
||||
// Set other video settings
|
||||
project.Video.FieldOrder = VideoFieldOrder.ProgressiveScan;
|
||||
project.Video.PixelAspectRatio = 1;
|
||||
|
||||
// Add video track with images
|
||||
VideoTrack videoTrack = vegas.Project.AddVideoTrack();
|
||||
foreach (XmlElement mouthCueElement in mouthCueElements) {
|
||||
Timecode start = GetTimecode(mouthCueElement.Attributes["start"]);
|
||||
Timecode length = GetTimecode(mouthCueElement.Attributes["end"]) - start;
|
||||
VideoEvent videoEvent = videoTrack.AddVideoEvent(start, length);
|
||||
Media imageMedia = new Media(imageFileNames[mouthCueElement.InnerText]);
|
||||
videoEvent.AddTake(imageMedia.GetVideoStreamByIndex(0));
|
||||
}
|
||||
// Add video track with images
|
||||
VideoTrack videoTrack = vegas.Project.AddVideoTrack();
|
||||
foreach (XmlElement mouthCueElement in mouthCueElements) {
|
||||
Timecode start = GetTimecode(mouthCueElement.Attributes["start"]);
|
||||
Timecode length = GetTimecode(mouthCueElement.Attributes["end"]) - start;
|
||||
VideoEvent videoEvent = videoTrack.AddVideoEvent(start, length);
|
||||
Media imageMedia = new Media(imageFileNames[mouthCueElement.InnerText]);
|
||||
videoEvent.AddTake(imageMedia.GetVideoStreamByIndex(0));
|
||||
}
|
||||
|
||||
// Add audio track with original sound file
|
||||
AudioTrack audioTrack = vegas.Project.AddAudioTrack();
|
||||
Media audioMedia = new Media(xmlDocument.SelectSingleNode("//soundFile").InnerText);
|
||||
AudioEvent audioEvent = audioTrack.AddAudioEvent(new Timecode(0), audioMedia.Length);
|
||||
audioEvent.AddTake(audioMedia.GetAudioStreamByIndex(0));
|
||||
}
|
||||
// Add audio track with original sound file
|
||||
AudioTrack audioTrack = vegas.Project.AddAudioTrack();
|
||||
Media audioMedia = new Media(xmlDocument.SelectSingleNode("//soundFile").InnerText);
|
||||
AudioEvent audioEvent = audioTrack.AddAudioEvent(new Timecode(0), audioMedia.Length);
|
||||
audioEvent.AddTake(audioMedia.GetAudioStreamByIndex(0));
|
||||
}
|
||||
|
||||
private static Timecode GetTimecode(XmlAttribute valueAttribute) {
|
||||
double seconds = Double.Parse(valueAttribute.Value, CultureInfo.InvariantCulture);
|
||||
return Timecode.FromSeconds(seconds);
|
||||
}
|
||||
private static Timecode GetTimecode(XmlAttribute valueAttribute) {
|
||||
double seconds = Double.Parse(valueAttribute.Value, CultureInfo.InvariantCulture);
|
||||
return Timecode.FromSeconds(seconds);
|
||||
}
|
||||
|
||||
private Dictionary<string, string> GetImageFileNames(string oneImageFile, string[] shapeNames) {
|
||||
if (oneImageFile == null) {
|
||||
throw new Exception("Image file name not set.");
|
||||
}
|
||||
Regex nameRegex = new Regex(@"(?<=-)([^-]*)(?=\.[^.]+$)");
|
||||
if (!nameRegex.IsMatch(oneImageFile)) {
|
||||
throw new Exception("Image file name doesn't have expected format.");
|
||||
}
|
||||
private Dictionary<string, string> GetImageFileNames(string oneImageFile, string[] shapeNames) {
|
||||
if (oneImageFile == null) {
|
||||
throw new Exception("Image file name not set.");
|
||||
}
|
||||
Regex nameRegex = new Regex(@"(?<=-)([^-]*)(?=\.[^.]+$)");
|
||||
if (!nameRegex.IsMatch(oneImageFile)) {
|
||||
throw new Exception("Image file name doesn't have expected format.");
|
||||
}
|
||||
|
||||
Dictionary<string, string> result = new Dictionary<string, string>();
|
||||
foreach (string shapeName in shapeNames) {
|
||||
string imageFileName = nameRegex.Replace(oneImageFile, shapeName);
|
||||
if (!File.Exists(imageFileName)) {
|
||||
throw new Exception(string.Format("Image file '{0}' not found.", imageFileName));
|
||||
}
|
||||
result[shapeName] = imageFileName;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
Dictionary<string, string> result = new Dictionary<string, string>();
|
||||
foreach (string shapeName in shapeNames) {
|
||||
string imageFileName = nameRegex.Replace(oneImageFile, shapeName);
|
||||
if (!File.Exists(imageFileName)) {
|
||||
throw new Exception(string.Format("Image file '{0}' not found.", imageFileName));
|
||||
}
|
||||
result[shapeName] = imageFileName;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public class Config {
|
||||
|
||||
private string xmlFile;
|
||||
private string oneImageFile;
|
||||
private double frameRate = 100;
|
||||
private bool discardChanges = false;
|
||||
private string xmlFile;
|
||||
private string oneImageFile;
|
||||
private double frameRate = 100;
|
||||
private bool discardChanges = false;
|
||||
|
||||
[DisplayName("XML File")]
|
||||
[Description("An XML file generated by Rhubarb Lip Sync.")]
|
||||
[Editor(typeof(XmlFileEditor), typeof(UITypeEditor))]
|
||||
public string XmlFile {
|
||||
get { return xmlFile; }
|
||||
set { xmlFile = value; }
|
||||
}
|
||||
[DisplayName("XML File")]
|
||||
[Description("An XML file generated by Rhubarb Lip Sync.")]
|
||||
[Editor(typeof(XmlFileEditor), typeof(UITypeEditor))]
|
||||
public string XmlFile {
|
||||
get { return xmlFile; }
|
||||
set { xmlFile = value; }
|
||||
}
|
||||
|
||||
[DisplayName("One image file")]
|
||||
[Description("Any image file out of the set of image files representing the mouth chart.")]
|
||||
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
|
||||
public string OneImageFile {
|
||||
get { return oneImageFile; }
|
||||
set { oneImageFile = value; }
|
||||
}
|
||||
[DisplayName("One image file")]
|
||||
[Description("Any image file out of the set of image files representing the mouth chart.")]
|
||||
[Editor(typeof(FileNameEditor), typeof(UITypeEditor))]
|
||||
public string OneImageFile {
|
||||
get { return oneImageFile; }
|
||||
set { oneImageFile = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Frame rate")]
|
||||
[Description("The frame rate for the new project.")]
|
||||
public double FrameRate {
|
||||
get { return frameRate; }
|
||||
set { frameRate = value; }
|
||||
}
|
||||
[DisplayName("Frame rate")]
|
||||
[Description("The frame rate for the new project.")]
|
||||
public double FrameRate {
|
||||
get { return frameRate; }
|
||||
set { frameRate = value; }
|
||||
}
|
||||
|
||||
[DisplayName("Discard Changes")]
|
||||
[Description("Discard all changes to the current project without prompting to save.")]
|
||||
public bool DiscardChanges {
|
||||
get { return discardChanges; }
|
||||
set { discardChanges = value; }
|
||||
}
|
||||
[DisplayName("Discard Changes")]
|
||||
[Description("Discard all changes to the current project without prompting to save.")]
|
||||
public bool DiscardChanges {
|
||||
get { return discardChanges; }
|
||||
set { discardChanges = value; }
|
||||
}
|
||||
|
||||
private static string ConfigFileName {
|
||||
get {
|
||||
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
|
||||
return Path.Combine(folder, "ImportRhubarbSettings.xml");
|
||||
}
|
||||
}
|
||||
private static string ConfigFileName {
|
||||
get {
|
||||
string folder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
|
||||
return Path.Combine(folder, "ImportRhubarbSettings.xml");
|
||||
}
|
||||
}
|
||||
|
||||
public static Config Load() {
|
||||
try {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (FileStream file = File.OpenRead(ConfigFileName)) {
|
||||
return (Config) serializer.Deserialize(file);
|
||||
}
|
||||
} catch (Exception) {
|
||||
return new Config();
|
||||
}
|
||||
}
|
||||
public static Config Load() {
|
||||
try {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (FileStream file = File.OpenRead(ConfigFileName)) {
|
||||
return (Config) serializer.Deserialize(file);
|
||||
}
|
||||
} catch (Exception) {
|
||||
return new Config();
|
||||
}
|
||||
}
|
||||
|
||||
public void Save() {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (StreamWriter file = File.CreateText(ConfigFileName)) {
|
||||
XmlWriterSettings settings = new XmlWriterSettings();
|
||||
settings.Indent = true;
|
||||
settings.IndentChars = "\t";
|
||||
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
|
||||
serializer.Serialize(writer, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Save() {
|
||||
XmlSerializer serializer = new XmlSerializer(typeof(Config));
|
||||
using (StreamWriter file = File.CreateText(ConfigFileName)) {
|
||||
XmlWriterSettings settings = new XmlWriterSettings();
|
||||
settings.Indent = true;
|
||||
settings.IndentChars = "\t";
|
||||
using (XmlWriter writer = XmlWriter.Create(file, settings)) {
|
||||
serializer.Serialize(writer, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -176,58 +176,58 @@ public delegate void ImportAction();
|
|||
|
||||
public class ImportDialog : Form {
|
||||
|
||||
private readonly Config config;
|
||||
private readonly ImportAction import;
|
||||
private readonly Config config;
|
||||
private readonly ImportAction import;
|
||||
|
||||
public ImportDialog(Config config, ImportAction import) {
|
||||
this.config = config;
|
||||
this.import = import;
|
||||
SuspendLayout();
|
||||
InitializeComponent();
|
||||
ResumeLayout(false);
|
||||
}
|
||||
public ImportDialog(Config config, ImportAction import) {
|
||||
this.config = config;
|
||||
this.import = import;
|
||||
SuspendLayout();
|
||||
InitializeComponent();
|
||||
ResumeLayout(false);
|
||||
}
|
||||
|
||||
private void InitializeComponent() {
|
||||
// Configure dialog
|
||||
Text = "Import Rhubarb";
|
||||
Size = new Size(600, 400);
|
||||
Font = new Font(Font.FontFamily, 10);
|
||||
private void InitializeComponent() {
|
||||
// Configure dialog
|
||||
Text = "Import Rhubarb";
|
||||
Size = new Size(600, 400);
|
||||
Font = new Font(Font.FontFamily, 10);
|
||||
|
||||
// Add property grid
|
||||
PropertyGrid propertyGrid1 = new PropertyGrid();
|
||||
propertyGrid1.SelectedObject = config;
|
||||
Controls.Add(propertyGrid1);
|
||||
propertyGrid1.Dock = DockStyle.Fill;
|
||||
// Add property grid
|
||||
PropertyGrid propertyGrid1 = new PropertyGrid();
|
||||
propertyGrid1.SelectedObject = config;
|
||||
Controls.Add(propertyGrid1);
|
||||
propertyGrid1.Dock = DockStyle.Fill;
|
||||
|
||||
// Add button panel
|
||||
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
|
||||
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
|
||||
buttonPanel.AutoSize = true;
|
||||
buttonPanel.Dock = DockStyle.Bottom;
|
||||
Controls.Add(buttonPanel);
|
||||
// Add button panel
|
||||
FlowLayoutPanel buttonPanel = new FlowLayoutPanel();
|
||||
buttonPanel.FlowDirection = FlowDirection.RightToLeft;
|
||||
buttonPanel.AutoSize = true;
|
||||
buttonPanel.Dock = DockStyle.Bottom;
|
||||
Controls.Add(buttonPanel);
|
||||
|
||||
// Add Cancel button
|
||||
Button cancelButton1 = new Button();
|
||||
cancelButton1.Text = "Cancel";
|
||||
cancelButton1.DialogResult = DialogResult.Cancel;
|
||||
buttonPanel.Controls.Add(cancelButton1);
|
||||
CancelButton = cancelButton1;
|
||||
// Add Cancel button
|
||||
Button cancelButton1 = new Button();
|
||||
cancelButton1.Text = "Cancel";
|
||||
cancelButton1.DialogResult = DialogResult.Cancel;
|
||||
buttonPanel.Controls.Add(cancelButton1);
|
||||
CancelButton = cancelButton1;
|
||||
|
||||
// Add OK button
|
||||
Button okButton1 = new Button();
|
||||
okButton1.Text = "OK";
|
||||
okButton1.Click += OkButtonClickedHandler;
|
||||
buttonPanel.Controls.Add(okButton1);
|
||||
AcceptButton = okButton1;
|
||||
}
|
||||
// Add OK button
|
||||
Button okButton1 = new Button();
|
||||
okButton1.Text = "OK";
|
||||
okButton1.Click += OkButtonClickedHandler;
|
||||
buttonPanel.Controls.Add(okButton1);
|
||||
AcceptButton = okButton1;
|
||||
}
|
||||
|
||||
private void OkButtonClickedHandler(object sender, EventArgs e) {
|
||||
try {
|
||||
import();
|
||||
DialogResult = DialogResult.OK;
|
||||
} catch (Exception exception) {
|
||||
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
private void OkButtonClickedHandler(object sender, EventArgs e) {
|
||||
try {
|
||||
import();
|
||||
DialogResult = DialogResult.OK;
|
||||
} catch (Exception exception) {
|
||||
MessageBox.Show(exception.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -11,37 +11,37 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|||
|
||||
# Enable POSIX threads
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
endif()
|
||||
|
||||
# Use static run-time
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||
add_compile_options(/MT$<$<CONFIG:Debug>:d>)
|
||||
add_compile_options(/MT$<$<CONFIG:Debug>:d>)
|
||||
endif()
|
||||
|
||||
# Set global flags and define flags variables for later use
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(enableWarningsFlags "-Wall;-Wextra")
|
||||
set(disableWarningsFlags "-w")
|
||||
set(enableWarningsFlags "-Wall;-Wextra")
|
||||
set(disableWarningsFlags "-w")
|
||||
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||
set(enableWarningsFlags "/W4")
|
||||
set(disableWarningsFlags "/W0")
|
||||
set(enableWarningsFlags "/W4")
|
||||
set(disableWarningsFlags "/W0")
|
||||
|
||||
# Disable warning C4456: declaration of '...' hides previous local declaration
|
||||
# I'm doing that on purpose.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4458")
|
||||
# Disable warning C4456: declaration of '...' hides previous local declaration
|
||||
# I'm doing that on purpose.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4458")
|
||||
|
||||
# Assume UTF-8 encoding for source files and encode string constants in UTF-8
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
|
||||
# Assume UTF-8 encoding for source files and encode string constants in UTF-8
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
|
||||
endif()
|
||||
|
||||
# Use UTF-8 throughout
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||
add_compile_options("/utf-8")
|
||||
add_compile_options("/utf-8")
|
||||
endif()
|
||||
|
||||
if(${UNIX})
|
||||
add_definitions(-DHAVE_UNISTD_H)
|
||||
add_definitions(-DHAVE_UNISTD_H)
|
||||
endif()
|
||||
|
||||
# Enable project folders
|
||||
|
@ -69,9 +69,9 @@ set_target_properties(cppFormat PROPERTIES FOLDER lib)
|
|||
FILE(GLOB_RECURSE sphinxbaseFiles "lib/sphinxbase-rev13216/src/libsphinxbase/*.c")
|
||||
add_library(sphinxbase ${sphinxbaseFiles})
|
||||
target_include_directories(sphinxbase SYSTEM PUBLIC
|
||||
"lib/sphinxbase-rev13216/include"
|
||||
"lib/sphinxbase-rev13216/src"
|
||||
"lib/sphinx_config"
|
||||
"lib/sphinxbase-rev13216/include"
|
||||
"lib/sphinxbase-rev13216/src"
|
||||
"lib/sphinx_config"
|
||||
)
|
||||
target_compile_options(sphinxbase PRIVATE ${disableWarningsFlags})
|
||||
target_compile_definitions(sphinxbase PUBLIC __SPHINXBASE_EXPORT_H__=1 SPHINXBASE_EXPORT=) # Compile as static lib
|
||||
|
@ -81,8 +81,8 @@ set_target_properties(sphinxbase PROPERTIES FOLDER lib)
|
|||
FILE(GLOB pocketSphinxFiles "lib/pocketsphinx-rev13216/src/libpocketsphinx/*.c")
|
||||
add_library(pocketSphinx ${pocketSphinxFiles})
|
||||
target_include_directories(pocketSphinx SYSTEM PUBLIC
|
||||
"lib/pocketsphinx-rev13216/include"
|
||||
"lib/pocketsphinx-rev13216/src/libpocketsphinx"
|
||||
"lib/pocketsphinx-rev13216/include"
|
||||
"lib/pocketsphinx-rev13216/src/libpocketsphinx"
|
||||
)
|
||||
target_link_libraries(pocketSphinx sphinxbase)
|
||||
target_compile_options(pocketSphinx PRIVATE ${disableWarningsFlags})
|
||||
|
@ -108,23 +108,23 @@ include_directories(SYSTEM "lib/gsl/include")
|
|||
|
||||
# ... WebRTC
|
||||
set(webRtcFiles
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/cross_correlation.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/division_operations.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/downsample_fast.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/energy.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/get_scaling_square.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/min_max_operations.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_48khz.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_by_2_internal.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_fractional.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_init.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_inl.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/vector_scaling_operations.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_core.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_filterbank.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_gmm.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_sp.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/webrtc_vad.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/cross_correlation.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/division_operations.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/downsample_fast.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/energy.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/get_scaling_square.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/min_max_operations.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_48khz.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_by_2_internal.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/resample_fractional.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_init.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/spl_inl.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/signal_processing/vector_scaling_operations.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_core.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_filterbank.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_gmm.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/vad_sp.c
|
||||
lib/webrtc-8d2248ff/webrtc/common_audio/vad/webrtc_vad.c
|
||||
)
|
||||
add_library(webRtc ${webRtcFiles})
|
||||
target_include_directories(webRtc SYSTEM PUBLIC "lib/webrtc-8d2248ff")
|
||||
|
@ -133,7 +133,7 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|||
target_compile_options(webRtc PRIVATE -pthread -lpthread)
|
||||
endif()
|
||||
if (NOT WIN32)
|
||||
target_compile_definitions(webRtc PRIVATE WEBRTC_POSIX)
|
||||
target_compile_definitions(webRtc PRIVATE WEBRTC_POSIX)
|
||||
endif()
|
||||
set_target_properties(webRtc PROPERTIES FOLDER lib)
|
||||
|
||||
|
@ -144,76 +144,76 @@ set_target_properties(whereami PROPERTIES FOLDER lib)
|
|||
|
||||
# ... Flite
|
||||
set(fliteFiles
|
||||
lib/flite-1.4/lang/cmulex/cmu_lex.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lex_data.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lex_entries.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lts_model.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lts_rules.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_postlex.c
|
||||
lib/flite-1.4/lang/usenglish/us_aswd.c
|
||||
lib/flite-1.4/lang/usenglish/us_dur_stats.c
|
||||
lib/flite-1.4/lang/usenglish/us_durz_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_expand.c
|
||||
lib/flite-1.4/lang/usenglish/us_f0_model.c
|
||||
lib/flite-1.4/lang/usenglish/us_f0lr.c
|
||||
lib/flite-1.4/lang/usenglish/us_ffeatures.c
|
||||
lib/flite-1.4/lang/usenglish/us_gpos.c
|
||||
lib/flite-1.4/lang/usenglish/us_int_accent_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_int_tone_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_nums_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_phoneset.c
|
||||
lib/flite-1.4/lang/usenglish/us_phrasing_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_pos_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_text.c
|
||||
lib/flite-1.4/lang/usenglish/usenglish.c
|
||||
lib/flite-1.4/src/audio/au_none.c
|
||||
lib/flite-1.4/src/audio/au_streaming.c
|
||||
lib/flite-1.4/src/audio/audio.c
|
||||
lib/flite-1.4/src/hrg/cst_ffeature.c
|
||||
lib/flite-1.4/src/hrg/cst_item.c
|
||||
lib/flite-1.4/src/hrg/cst_relation.c
|
||||
lib/flite-1.4/src/hrg/cst_utterance.c
|
||||
lib/flite-1.4/src/lexicon/cst_lexicon.c
|
||||
lib/flite-1.4/src/lexicon/cst_lts.c
|
||||
lib/flite-1.4/src/regex/cst_regex.c
|
||||
lib/flite-1.4/src/regex/regexp.c
|
||||
lib/flite-1.4/src/speech/cst_lpcres.c
|
||||
lib/flite-1.4/src/speech/cst_track.c
|
||||
lib/flite-1.4/src/speech/cst_wave.c
|
||||
lib/flite-1.4/src/speech/cst_wave_io.c
|
||||
lib/flite-1.4/src/speech/cst_wave_utils.c
|
||||
lib/flite-1.4/src/speech/rateconv.c
|
||||
lib/flite-1.4/src/stats/cst_cart.c
|
||||
lib/flite-1.4/src/synth/cst_ffeatures.c
|
||||
lib/flite-1.4/src/synth/cst_phoneset.c
|
||||
lib/flite-1.4/src/synth/cst_synth.c
|
||||
lib/flite-1.4/src/synth/cst_utt_utils.c
|
||||
lib/flite-1.4/src/synth/cst_voice.c
|
||||
lib/flite-1.4/src/synth/flite.c
|
||||
lib/flite-1.4/src/utils/cst_alloc.c
|
||||
lib/flite-1.4/src/utils/cst_endian.c
|
||||
lib/flite-1.4/src/utils/cst_error.c
|
||||
lib/flite-1.4/src/utils/cst_features.c
|
||||
lib/flite-1.4/src/utils/cst_file_stdio.c
|
||||
lib/flite-1.4/src/utils/cst_string.c
|
||||
lib/flite-1.4/src/utils/cst_tokenstream.c
|
||||
lib/flite-1.4/src/utils/cst_val.c
|
||||
lib/flite-1.4/src/utils/cst_val_const.c
|
||||
lib/flite-1.4/src/utils/cst_val_user.c
|
||||
lib/flite-1.4/src/utils/cst_val_user.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lex.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lex_data.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lex_entries.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lts_model.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_lts_rules.c
|
||||
lib/flite-1.4/lang/cmulex/cmu_postlex.c
|
||||
lib/flite-1.4/lang/usenglish/us_aswd.c
|
||||
lib/flite-1.4/lang/usenglish/us_dur_stats.c
|
||||
lib/flite-1.4/lang/usenglish/us_durz_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_expand.c
|
||||
lib/flite-1.4/lang/usenglish/us_f0_model.c
|
||||
lib/flite-1.4/lang/usenglish/us_f0lr.c
|
||||
lib/flite-1.4/lang/usenglish/us_ffeatures.c
|
||||
lib/flite-1.4/lang/usenglish/us_gpos.c
|
||||
lib/flite-1.4/lang/usenglish/us_int_accent_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_int_tone_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_nums_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_phoneset.c
|
||||
lib/flite-1.4/lang/usenglish/us_phrasing_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_pos_cart.c
|
||||
lib/flite-1.4/lang/usenglish/us_text.c
|
||||
lib/flite-1.4/lang/usenglish/usenglish.c
|
||||
lib/flite-1.4/src/audio/au_none.c
|
||||
lib/flite-1.4/src/audio/au_streaming.c
|
||||
lib/flite-1.4/src/audio/audio.c
|
||||
lib/flite-1.4/src/hrg/cst_ffeature.c
|
||||
lib/flite-1.4/src/hrg/cst_item.c
|
||||
lib/flite-1.4/src/hrg/cst_relation.c
|
||||
lib/flite-1.4/src/hrg/cst_utterance.c
|
||||
lib/flite-1.4/src/lexicon/cst_lexicon.c
|
||||
lib/flite-1.4/src/lexicon/cst_lts.c
|
||||
lib/flite-1.4/src/regex/cst_regex.c
|
||||
lib/flite-1.4/src/regex/regexp.c
|
||||
lib/flite-1.4/src/speech/cst_lpcres.c
|
||||
lib/flite-1.4/src/speech/cst_track.c
|
||||
lib/flite-1.4/src/speech/cst_wave.c
|
||||
lib/flite-1.4/src/speech/cst_wave_io.c
|
||||
lib/flite-1.4/src/speech/cst_wave_utils.c
|
||||
lib/flite-1.4/src/speech/rateconv.c
|
||||
lib/flite-1.4/src/stats/cst_cart.c
|
||||
lib/flite-1.4/src/synth/cst_ffeatures.c
|
||||
lib/flite-1.4/src/synth/cst_phoneset.c
|
||||
lib/flite-1.4/src/synth/cst_synth.c
|
||||
lib/flite-1.4/src/synth/cst_utt_utils.c
|
||||
lib/flite-1.4/src/synth/cst_voice.c
|
||||
lib/flite-1.4/src/synth/flite.c
|
||||
lib/flite-1.4/src/utils/cst_alloc.c
|
||||
lib/flite-1.4/src/utils/cst_endian.c
|
||||
lib/flite-1.4/src/utils/cst_error.c
|
||||
lib/flite-1.4/src/utils/cst_features.c
|
||||
lib/flite-1.4/src/utils/cst_file_stdio.c
|
||||
lib/flite-1.4/src/utils/cst_string.c
|
||||
lib/flite-1.4/src/utils/cst_tokenstream.c
|
||||
lib/flite-1.4/src/utils/cst_val.c
|
||||
lib/flite-1.4/src/utils/cst_val_const.c
|
||||
lib/flite-1.4/src/utils/cst_val_user.c
|
||||
lib/flite-1.4/src/utils/cst_val_user.c
|
||||
)
|
||||
add_library(flite ${fliteFiles})
|
||||
target_include_directories(flite SYSTEM PUBLIC
|
||||
"lib/flite-1.4/include"
|
||||
"lib/flite-1.4"
|
||||
"lib/flite-1.4/include"
|
||||
"lib/flite-1.4"
|
||||
)
|
||||
target_compile_options(flite PRIVATE ${disableWarningsFlags})
|
||||
set_target_properties(flite PROPERTIES FOLDER lib)
|
||||
|
||||
# ... UTF8-CPP
|
||||
add_library(utfcpp
|
||||
lib/header-only.c
|
||||
lib/utfcpp-2.3.5/source/utf8.h
|
||||
lib/header-only.c
|
||||
lib/utfcpp-2.3.5/source/utf8.h
|
||||
)
|
||||
target_include_directories(utfcpp SYSTEM PUBLIC "lib/utfcpp-2.3.5/source")
|
||||
target_compile_options(utfcpp PRIVATE ${disableWarningsFlags})
|
||||
|
@ -221,8 +221,8 @@ set_target_properties(utfcpp PROPERTIES FOLDER lib)
|
|||
|
||||
# ... utf8proc
|
||||
add_library(utf8proc
|
||||
lib/utf8proc-2.2.0/utf8proc.c
|
||||
lib/utf8proc-2.2.0/utf8proc.h
|
||||
lib/utf8proc-2.2.0/utf8proc.c
|
||||
lib/utf8proc-2.2.0/utf8proc.h
|
||||
)
|
||||
target_include_directories(utf8proc SYSTEM PUBLIC "lib/utf8proc-2.2.0")
|
||||
target_compile_options(utf8proc PRIVATE ${disableWarningsFlags})
|
||||
|
@ -231,9 +231,9 @@ set_target_properties(utf8proc PROPERTIES FOLDER lib)
|
|||
|
||||
# ... Ogg
|
||||
add_library(ogg
|
||||
lib/ogg-1.3.3/include/ogg/ogg.h
|
||||
lib/ogg-1.3.3/src/bitwise.c
|
||||
lib/ogg-1.3.3/src/framing.c
|
||||
lib/ogg-1.3.3/include/ogg/ogg.h
|
||||
lib/ogg-1.3.3/src/bitwise.c
|
||||
lib/ogg-1.3.3/src/framing.c
|
||||
)
|
||||
target_include_directories(ogg SYSTEM PUBLIC "lib/ogg-1.3.3/include")
|
||||
target_compile_options(ogg PRIVATE ${disableWarningsFlags})
|
||||
|
@ -241,30 +241,30 @@ set_target_properties(ogg PROPERTIES FOLDER lib)
|
|||
|
||||
# ... Vorbis
|
||||
add_library(vorbis
|
||||
lib/vorbis-1.3.6/include/vorbis/vorbisfile.h
|
||||
lib/vorbis-1.3.6/lib/bitrate.c
|
||||
lib/vorbis-1.3.6/lib/block.c
|
||||
lib/vorbis-1.3.6/lib/codebook.c
|
||||
lib/vorbis-1.3.6/lib/envelope.c
|
||||
lib/vorbis-1.3.6/lib/floor0.c
|
||||
lib/vorbis-1.3.6/lib/floor1.c
|
||||
lib/vorbis-1.3.6/lib/info.c
|
||||
lib/vorbis-1.3.6/lib/lpc.c
|
||||
lib/vorbis-1.3.6/lib/lsp.c
|
||||
lib/vorbis-1.3.6/lib/mapping0.c
|
||||
lib/vorbis-1.3.6/lib/mdct.c
|
||||
lib/vorbis-1.3.6/lib/psy.c
|
||||
lib/vorbis-1.3.6/lib/registry.c
|
||||
lib/vorbis-1.3.6/lib/res0.c
|
||||
lib/vorbis-1.3.6/lib/sharedbook.c
|
||||
lib/vorbis-1.3.6/lib/smallft.c
|
||||
lib/vorbis-1.3.6/lib/synthesis.c
|
||||
lib/vorbis-1.3.6/lib/vorbisfile.c
|
||||
lib/vorbis-1.3.6/lib/window.c
|
||||
lib/vorbis-1.3.6/include/vorbis/vorbisfile.h
|
||||
lib/vorbis-1.3.6/lib/bitrate.c
|
||||
lib/vorbis-1.3.6/lib/block.c
|
||||
lib/vorbis-1.3.6/lib/codebook.c
|
||||
lib/vorbis-1.3.6/lib/envelope.c
|
||||
lib/vorbis-1.3.6/lib/floor0.c
|
||||
lib/vorbis-1.3.6/lib/floor1.c
|
||||
lib/vorbis-1.3.6/lib/info.c
|
||||
lib/vorbis-1.3.6/lib/lpc.c
|
||||
lib/vorbis-1.3.6/lib/lsp.c
|
||||
lib/vorbis-1.3.6/lib/mapping0.c
|
||||
lib/vorbis-1.3.6/lib/mdct.c
|
||||
lib/vorbis-1.3.6/lib/psy.c
|
||||
lib/vorbis-1.3.6/lib/registry.c
|
||||
lib/vorbis-1.3.6/lib/res0.c
|
||||
lib/vorbis-1.3.6/lib/sharedbook.c
|
||||
lib/vorbis-1.3.6/lib/smallft.c
|
||||
lib/vorbis-1.3.6/lib/synthesis.c
|
||||
lib/vorbis-1.3.6/lib/vorbisfile.c
|
||||
lib/vorbis-1.3.6/lib/window.c
|
||||
)
|
||||
target_include_directories(vorbis SYSTEM PUBLIC "lib/vorbis-1.3.6/include")
|
||||
target_link_libraries(vorbis
|
||||
ogg
|
||||
ogg
|
||||
)
|
||||
target_compile_options(vorbis PRIVATE ${disableWarningsFlags})
|
||||
set_target_properties(vorbis PROPERTIES FOLDER lib)
|
||||
|
@ -275,303 +275,303 @@ include_directories("src")
|
|||
|
||||
# ... rhubarb-animation
|
||||
add_library(rhubarb-animation
|
||||
src/animation/animationRules.cpp
|
||||
src/animation/animationRules.h
|
||||
src/animation/mouthAnimation.cpp
|
||||
src/animation/mouthAnimation.h
|
||||
src/animation/pauseAnimation.cpp
|
||||
src/animation/pauseAnimation.h
|
||||
src/animation/roughAnimation.cpp
|
||||
src/animation/roughAnimation.h
|
||||
src/animation/ShapeRule.cpp
|
||||
src/animation/ShapeRule.h
|
||||
src/animation/shapeShorthands.h
|
||||
src/animation/staticSegments.cpp
|
||||
src/animation/staticSegments.h
|
||||
src/animation/targetShapeSet.cpp
|
||||
src/animation/targetShapeSet.h
|
||||
src/animation/timingOptimization.cpp
|
||||
src/animation/timingOptimization.h
|
||||
src/animation/tweening.cpp
|
||||
src/animation/tweening.h
|
||||
src/animation/animationRules.cpp
|
||||
src/animation/animationRules.h
|
||||
src/animation/mouthAnimation.cpp
|
||||
src/animation/mouthAnimation.h
|
||||
src/animation/pauseAnimation.cpp
|
||||
src/animation/pauseAnimation.h
|
||||
src/animation/roughAnimation.cpp
|
||||
src/animation/roughAnimation.h
|
||||
src/animation/ShapeRule.cpp
|
||||
src/animation/ShapeRule.h
|
||||
src/animation/shapeShorthands.h
|
||||
src/animation/staticSegments.cpp
|
||||
src/animation/staticSegments.h
|
||||
src/animation/targetShapeSet.cpp
|
||||
src/animation/targetShapeSet.h
|
||||
src/animation/timingOptimization.cpp
|
||||
src/animation/timingOptimization.h
|
||||
src/animation/tweening.cpp
|
||||
src/animation/tweening.h
|
||||
)
|
||||
target_include_directories(rhubarb-animation PRIVATE "src/animation")
|
||||
target_link_libraries(rhubarb-animation
|
||||
rhubarb-core
|
||||
rhubarb-logging
|
||||
rhubarb-time
|
||||
rhubarb-core
|
||||
rhubarb-logging
|
||||
rhubarb-time
|
||||
)
|
||||
|
||||
# ... rhubarb-audio
|
||||
add_library(rhubarb-audio
|
||||
src/audio/AudioClip.cpp
|
||||
src/audio/AudioClip.h
|
||||
src/audio/audioFileReading.cpp
|
||||
src/audio/audioFileReading.h
|
||||
src/audio/AudioSegment.cpp
|
||||
src/audio/AudioSegment.h
|
||||
src/audio/DcOffset.cpp
|
||||
src/audio/DcOffset.h
|
||||
src/audio/ioTools.h
|
||||
src/audio/OggVorbisFileReader.cpp
|
||||
src/audio/OggVorbisFileReader.h
|
||||
src/audio/processing.cpp
|
||||
src/audio/processing.h
|
||||
src/audio/SampleRateConverter.cpp
|
||||
src/audio/SampleRateConverter.h
|
||||
src/audio/voiceActivityDetection.cpp
|
||||
src/audio/voiceActivityDetection.h
|
||||
src/audio/WaveFileReader.cpp
|
||||
src/audio/WaveFileReader.h
|
||||
src/audio/waveFileWriting.cpp
|
||||
src/audio/waveFileWriting.h
|
||||
src/audio/AudioClip.cpp
|
||||
src/audio/AudioClip.h
|
||||
src/audio/audioFileReading.cpp
|
||||
src/audio/audioFileReading.h
|
||||
src/audio/AudioSegment.cpp
|
||||
src/audio/AudioSegment.h
|
||||
src/audio/DcOffset.cpp
|
||||
src/audio/DcOffset.h
|
||||
src/audio/ioTools.h
|
||||
src/audio/OggVorbisFileReader.cpp
|
||||
src/audio/OggVorbisFileReader.h
|
||||
src/audio/processing.cpp
|
||||
src/audio/processing.h
|
||||
src/audio/SampleRateConverter.cpp
|
||||
src/audio/SampleRateConverter.h
|
||||
src/audio/voiceActivityDetection.cpp
|
||||
src/audio/voiceActivityDetection.h
|
||||
src/audio/WaveFileReader.cpp
|
||||
src/audio/WaveFileReader.h
|
||||
src/audio/waveFileWriting.cpp
|
||||
src/audio/waveFileWriting.h
|
||||
)
|
||||
target_include_directories(rhubarb-audio PRIVATE "src/audio")
|
||||
target_link_libraries(rhubarb-audio
|
||||
webRtc
|
||||
vorbis
|
||||
rhubarb-logging
|
||||
rhubarb-time
|
||||
rhubarb-tools
|
||||
webRtc
|
||||
vorbis
|
||||
rhubarb-logging
|
||||
rhubarb-time
|
||||
rhubarb-tools
|
||||
)
|
||||
|
||||
# ... rhubarb-core
|
||||
configure_file(src/core/appInfo.cpp.in appInfo.cpp ESCAPE_QUOTES)
|
||||
add_library(rhubarb-core
|
||||
${CMAKE_CURRENT_BINARY_DIR}/appInfo.cpp
|
||||
src/core/appInfo.h
|
||||
src/core/Phone.cpp
|
||||
src/core/Phone.h
|
||||
src/core/Shape.cpp
|
||||
src/core/Shape.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/appInfo.cpp
|
||||
src/core/appInfo.h
|
||||
src/core/Phone.cpp
|
||||
src/core/Phone.h
|
||||
src/core/Shape.cpp
|
||||
src/core/Shape.h
|
||||
)
|
||||
target_include_directories(rhubarb-core PRIVATE "src/core")
|
||||
target_link_libraries(rhubarb-core
|
||||
rhubarb-tools
|
||||
rhubarb-tools
|
||||
)
|
||||
|
||||
# ... rhubarb-exporters
|
||||
add_library(rhubarb-exporters
|
||||
src/exporters/DatExporter.cpp
|
||||
src/exporters/DatExporter.h
|
||||
src/exporters/Exporter.h
|
||||
src/exporters/exporterTools.cpp
|
||||
src/exporters/exporterTools.h
|
||||
src/exporters/JsonExporter.cpp
|
||||
src/exporters/JsonExporter.h
|
||||
src/exporters/TsvExporter.cpp
|
||||
src/exporters/TsvExporter.h
|
||||
src/exporters/XmlExporter.cpp
|
||||
src/exporters/XmlExporter.h
|
||||
src/exporters/DatExporter.cpp
|
||||
src/exporters/DatExporter.h
|
||||
src/exporters/Exporter.h
|
||||
src/exporters/exporterTools.cpp
|
||||
src/exporters/exporterTools.h
|
||||
src/exporters/JsonExporter.cpp
|
||||
src/exporters/JsonExporter.h
|
||||
src/exporters/TsvExporter.cpp
|
||||
src/exporters/TsvExporter.h
|
||||
src/exporters/XmlExporter.cpp
|
||||
src/exporters/XmlExporter.h
|
||||
)
|
||||
target_include_directories(rhubarb-exporters PRIVATE "src/exporters")
|
||||
target_link_libraries(rhubarb-exporters
|
||||
rhubarb-animation
|
||||
rhubarb-core
|
||||
rhubarb-time
|
||||
rhubarb-animation
|
||||
rhubarb-core
|
||||
rhubarb-time
|
||||
)
|
||||
|
||||
# ... rhubarb-lib
|
||||
add_library(rhubarb-lib
|
||||
src/lib/rhubarbLib.cpp
|
||||
src/lib/rhubarbLib.h
|
||||
src/lib/rhubarbLib.cpp
|
||||
src/lib/rhubarbLib.h
|
||||
)
|
||||
target_include_directories(rhubarb-lib PRIVATE "src/lib")
|
||||
target_link_libraries(rhubarb-lib
|
||||
rhubarb-animation
|
||||
rhubarb-audio
|
||||
rhubarb-core
|
||||
rhubarb-recognition
|
||||
rhubarb-time
|
||||
rhubarb-tools
|
||||
rhubarb-animation
|
||||
rhubarb-audio
|
||||
rhubarb-core
|
||||
rhubarb-recognition
|
||||
rhubarb-time
|
||||
rhubarb-tools
|
||||
)
|
||||
|
||||
# ... rhubarb-logging
|
||||
add_library(rhubarb-logging
|
||||
src/logging/Entry.cpp
|
||||
src/logging/Entry.h
|
||||
src/logging/Formatter.h
|
||||
src/logging/formatters.cpp
|
||||
src/logging/formatters.h
|
||||
src/logging/Level.cpp
|
||||
src/logging/Level.h
|
||||
src/logging/logging.cpp
|
||||
src/logging/logging.h
|
||||
src/logging/Sink.h
|
||||
src/logging/sinks.cpp
|
||||
src/logging/sinks.h
|
||||
src/logging/Entry.cpp
|
||||
src/logging/Entry.h
|
||||
src/logging/Formatter.h
|
||||
src/logging/formatters.cpp
|
||||
src/logging/formatters.h
|
||||
src/logging/Level.cpp
|
||||
src/logging/Level.h
|
||||
src/logging/logging.cpp
|
||||
src/logging/logging.h
|
||||
src/logging/Sink.h
|
||||
src/logging/sinks.cpp
|
||||
src/logging/sinks.h
|
||||
)
|
||||
target_include_directories(rhubarb-logging PRIVATE "src/logging")
|
||||
target_link_libraries(rhubarb-logging
|
||||
rhubarb-tools
|
||||
rhubarb-tools
|
||||
)
|
||||
|
||||
# ... rhubarb-recognition
|
||||
add_library(rhubarb-recognition
|
||||
src/recognition/g2p.cpp
|
||||
src/recognition/g2p.h
|
||||
src/recognition/languageModels.cpp
|
||||
src/recognition/languageModels.h
|
||||
src/recognition/PhoneticRecognizer.cpp
|
||||
src/recognition/PhoneticRecognizer.h
|
||||
src/recognition/PocketSphinxRecognizer.cpp
|
||||
src/recognition/PocketSphinxRecognizer.h
|
||||
src/recognition/pocketSphinxTools.cpp
|
||||
src/recognition/pocketSphinxTools.h
|
||||
src/recognition/Recognizer.h
|
||||
src/recognition/tokenization.cpp
|
||||
src/recognition/tokenization.h
|
||||
src/recognition/g2p.cpp
|
||||
src/recognition/g2p.h
|
||||
src/recognition/languageModels.cpp
|
||||
src/recognition/languageModels.h
|
||||
src/recognition/PhoneticRecognizer.cpp
|
||||
src/recognition/PhoneticRecognizer.h
|
||||
src/recognition/PocketSphinxRecognizer.cpp
|
||||
src/recognition/PocketSphinxRecognizer.h
|
||||
src/recognition/pocketSphinxTools.cpp
|
||||
src/recognition/pocketSphinxTools.h
|
||||
src/recognition/Recognizer.h
|
||||
src/recognition/tokenization.cpp
|
||||
src/recognition/tokenization.h
|
||||
)
|
||||
target_include_directories(rhubarb-recognition PRIVATE "src/recognition")
|
||||
target_link_libraries(rhubarb-recognition
|
||||
flite
|
||||
pocketSphinx
|
||||
rhubarb-audio
|
||||
rhubarb-core
|
||||
rhubarb-logging
|
||||
flite
|
||||
pocketSphinx
|
||||
rhubarb-audio
|
||||
rhubarb-core
|
||||
rhubarb-logging
|
||||
)
|
||||
|
||||
# ... rhubarb-time
|
||||
add_library(rhubarb-time
|
||||
src/time/BoundedTimeline.h
|
||||
src/time/centiseconds.cpp
|
||||
src/time/centiseconds.h
|
||||
src/time/ContinuousTimeline.h
|
||||
src/time/Timed.h
|
||||
src/time/timedLogging.h
|
||||
src/time/Timeline.h
|
||||
src/time/TimeRange.cpp
|
||||
src/time/TimeRange.h
|
||||
src/time/BoundedTimeline.h
|
||||
src/time/centiseconds.cpp
|
||||
src/time/centiseconds.h
|
||||
src/time/ContinuousTimeline.h
|
||||
src/time/Timed.h
|
||||
src/time/timedLogging.h
|
||||
src/time/Timeline.h
|
||||
src/time/TimeRange.cpp
|
||||
src/time/TimeRange.h
|
||||
)
|
||||
target_include_directories(rhubarb-time PRIVATE "src/time")
|
||||
target_link_libraries(rhubarb-time
|
||||
cppFormat
|
||||
rhubarb-logging
|
||||
cppFormat
|
||||
rhubarb-logging
|
||||
)
|
||||
|
||||
# ... rhubarb-tools
|
||||
add_library(rhubarb-tools
|
||||
src/tools/array.h
|
||||
src/tools/EnumConverter.h
|
||||
src/tools/exceptions.cpp
|
||||
src/tools/exceptions.h
|
||||
src/tools/fileTools.cpp
|
||||
src/tools/fileTools.h
|
||||
src/tools/Lazy.h
|
||||
src/tools/nextCombination.h
|
||||
src/tools/NiceCmdLineOutput.cpp
|
||||
src/tools/NiceCmdLineOutput.h
|
||||
src/tools/ObjectPool.h
|
||||
src/tools/pairs.h
|
||||
src/tools/parallel.h
|
||||
src/tools/platformTools.cpp
|
||||
src/tools/platformTools.h
|
||||
src/tools/progress.cpp
|
||||
src/tools/progress.h
|
||||
src/tools/ProgressBar.cpp
|
||||
src/tools/ProgressBar.h
|
||||
src/tools/stringTools.cpp
|
||||
src/tools/stringTools.h
|
||||
src/tools/TablePrinter.cpp
|
||||
src/tools/TablePrinter.h
|
||||
src/tools/textFiles.cpp
|
||||
src/tools/textFiles.h
|
||||
src/tools/tools.cpp
|
||||
src/tools/tools.h
|
||||
src/tools/tupleHash.h
|
||||
src/tools/array.h
|
||||
src/tools/EnumConverter.h
|
||||
src/tools/exceptions.cpp
|
||||
src/tools/exceptions.h
|
||||
src/tools/fileTools.cpp
|
||||
src/tools/fileTools.h
|
||||
src/tools/Lazy.h
|
||||
src/tools/nextCombination.h
|
||||
src/tools/NiceCmdLineOutput.cpp
|
||||
src/tools/NiceCmdLineOutput.h
|
||||
src/tools/ObjectPool.h
|
||||
src/tools/pairs.h
|
||||
src/tools/parallel.h
|
||||
src/tools/platformTools.cpp
|
||||
src/tools/platformTools.h
|
||||
src/tools/progress.cpp
|
||||
src/tools/progress.h
|
||||
src/tools/ProgressBar.cpp
|
||||
src/tools/ProgressBar.h
|
||||
src/tools/stringTools.cpp
|
||||
src/tools/stringTools.h
|
||||
src/tools/TablePrinter.cpp
|
||||
src/tools/TablePrinter.h
|
||||
src/tools/textFiles.cpp
|
||||
src/tools/textFiles.h
|
||||
src/tools/tools.cpp
|
||||
src/tools/tools.h
|
||||
src/tools/tupleHash.h
|
||||
)
|
||||
target_include_directories(rhubarb-tools PRIVATE "src/tools")
|
||||
target_link_libraries(rhubarb-tools
|
||||
cppFormat
|
||||
whereami
|
||||
utfcpp
|
||||
utf8proc
|
||||
cppFormat
|
||||
whereami
|
||||
utfcpp
|
||||
utf8proc
|
||||
)
|
||||
|
||||
# Define Rhubarb executable
|
||||
add_executable(rhubarb
|
||||
src/rhubarb/main.cpp
|
||||
src/rhubarb/ExportFormat.cpp
|
||||
src/rhubarb/ExportFormat.h
|
||||
src/rhubarb/RecognizerType.cpp
|
||||
src/rhubarb/RecognizerType.h
|
||||
src/rhubarb/semanticEntries.cpp
|
||||
src/rhubarb/semanticEntries.h
|
||||
src/rhubarb/sinks.cpp
|
||||
src/rhubarb/sinks.h
|
||||
src/rhubarb/main.cpp
|
||||
src/rhubarb/ExportFormat.cpp
|
||||
src/rhubarb/ExportFormat.h
|
||||
src/rhubarb/RecognizerType.cpp
|
||||
src/rhubarb/RecognizerType.h
|
||||
src/rhubarb/semanticEntries.cpp
|
||||
src/rhubarb/semanticEntries.h
|
||||
src/rhubarb/sinks.cpp
|
||||
src/rhubarb/sinks.h
|
||||
)
|
||||
target_include_directories(rhubarb PUBLIC "src/rhubarb")
|
||||
target_link_libraries(rhubarb
|
||||
rhubarb-exporters
|
||||
rhubarb-lib
|
||||
rhubarb-exporters
|
||||
rhubarb-lib
|
||||
)
|
||||
target_compile_options(rhubarb PUBLIC ${enableWarningsFlags})
|
||||
|
||||
# Define test project
|
||||
#include_directories("${gtest_SOURCE_DIR}/include")
|
||||
set(TEST_FILES
|
||||
tests/stringToolsTests.cpp
|
||||
tests/TimelineTests.cpp
|
||||
tests/BoundedTimelineTests.cpp
|
||||
tests/ContinuousTimelineTests.cpp
|
||||
tests/pairsTests.cpp
|
||||
tests/tokenizationTests.cpp
|
||||
tests/g2pTests.cpp
|
||||
tests/LazyTests.cpp
|
||||
tests/WaveFileReaderTests.cpp
|
||||
tests/stringToolsTests.cpp
|
||||
tests/TimelineTests.cpp
|
||||
tests/BoundedTimelineTests.cpp
|
||||
tests/ContinuousTimelineTests.cpp
|
||||
tests/pairsTests.cpp
|
||||
tests/tokenizationTests.cpp
|
||||
tests/g2pTests.cpp
|
||||
tests/LazyTests.cpp
|
||||
tests/WaveFileReaderTests.cpp
|
||||
)
|
||||
add_executable(runTests ${TEST_FILES})
|
||||
target_link_libraries(runTests
|
||||
gtest
|
||||
gmock
|
||||
gmock_main
|
||||
rhubarb-recognition
|
||||
rhubarb-time
|
||||
rhubarb-audio
|
||||
gtest
|
||||
gmock
|
||||
gmock_main
|
||||
rhubarb-recognition
|
||||
rhubarb-time
|
||||
rhubarb-audio
|
||||
)
|
||||
|
||||
# Copies the specified files in a post-build event, then installs them
|
||||
function(copy_and_install sourceGlob relativeTargetDirectory)
|
||||
# Set `sourcePaths`
|
||||
file(GLOB sourcePaths "${sourceGlob}")
|
||||
|
||||
foreach(sourcePath ${sourcePaths})
|
||||
if(NOT IS_DIRECTORY ${sourcePath})
|
||||
# Set `fileName`
|
||||
get_filename_component(fileName "${sourcePath}" NAME)
|
||||
# Set `sourcePaths`
|
||||
file(GLOB sourcePaths "${sourceGlob}")
|
||||
|
||||
foreach(sourcePath ${sourcePaths})
|
||||
if(NOT IS_DIRECTORY ${sourcePath})
|
||||
# Set `fileName`
|
||||
get_filename_component(fileName "${sourcePath}" NAME)
|
||||
|
||||
# Copy file during build
|
||||
add_custom_command(TARGET rhubarb POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
|
||||
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
|
||||
)
|
||||
# Copy file during build
|
||||
add_custom_command(TARGET rhubarb POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
|
||||
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
|
||||
)
|
||||
|
||||
# Install file
|
||||
install(
|
||||
FILES "${sourcePath}"
|
||||
DESTINATION "${relativeTargetDirectory}"
|
||||
)
|
||||
endif()
|
||||
endforeach()
|
||||
# Install file
|
||||
install(
|
||||
FILES "${sourcePath}"
|
||||
DESTINATION "${relativeTargetDirectory}"
|
||||
)
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
# Copies the specified files in a post-build event
|
||||
function(copy sourceGlob relativeTargetDirectory)
|
||||
# Set `sourcePaths`
|
||||
file(GLOB sourcePaths "${sourceGlob}")
|
||||
|
||||
foreach(sourcePath ${sourcePaths})
|
||||
if(NOT IS_DIRECTORY ${sourcePath})
|
||||
# Set `fileName`
|
||||
get_filename_component(fileName "${sourcePath}" NAME)
|
||||
# Set `sourcePaths`
|
||||
file(GLOB sourcePaths "${sourceGlob}")
|
||||
|
||||
foreach(sourcePath ${sourcePaths})
|
||||
if(NOT IS_DIRECTORY ${sourcePath})
|
||||
# Set `fileName`
|
||||
get_filename_component(fileName "${sourcePath}" NAME)
|
||||
|
||||
# Copy file during build
|
||||
add_custom_command(TARGET rhubarb POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
|
||||
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
|
||||
)
|
||||
endif()
|
||||
endforeach()
|
||||
# Copy file during build
|
||||
add_custom_command(TARGET rhubarb POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy "${sourcePath}" "$<TARGET_FILE_DIR:rhubarb>/${relativeTargetDirectory}/${fileName}"
|
||||
COMMENT "Creating '${relativeTargetDirectory}/${fileName}'"
|
||||
)
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
copy_and_install("lib/pocketsphinx-rev13216/model/en-us/*" "res/sphinx")
|
||||
|
@ -580,7 +580,7 @@ copy_and_install("lib/cmusphinx-en-us-5.2/*" "res/sphinx/acoustic-model")
|
|||
copy_and_install("tests/resources/*" "tests/resources")
|
||||
|
||||
install(
|
||||
TARGETS rhubarb
|
||||
RUNTIME
|
||||
DESTINATION .
|
||||
TARGETS rhubarb
|
||||
RUNTIME
|
||||
DESTINATION .
|
||||
)
|
||||
|
|
|
@ -8,79 +8,79 @@ using boost::adaptors::transformed;
|
|||
|
||||
template<typename T, bool AutoJoin>
|
||||
ContinuousTimeline<optional<T>, AutoJoin> boundedTimelinetoContinuousOptional(
|
||||
const BoundedTimeline<T, AutoJoin>& timeline
|
||||
const BoundedTimeline<T, AutoJoin>& timeline
|
||||
) {
|
||||
return {
|
||||
timeline.getRange(),
|
||||
boost::none,
|
||||
timeline | transformed([](const Timed<T>& timedValue) {
|
||||
return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue());
|
||||
})
|
||||
};
|
||||
return {
|
||||
timeline.getRange(),
|
||||
boost::none,
|
||||
timeline | transformed([](const Timed<T>& timedValue) {
|
||||
return Timed<optional<T>>(timedValue.getTimeRange(), timedValue.getValue());
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
ShapeRule::ShapeRule(
|
||||
ShapeSet shapeSet,
|
||||
optional<Phone> phone,
|
||||
TimeRange phoneTiming
|
||||
ShapeSet shapeSet,
|
||||
optional<Phone> phone,
|
||||
TimeRange phoneTiming
|
||||
) :
|
||||
shapeSet(std::move(shapeSet)),
|
||||
phone(std::move(phone)),
|
||||
phoneTiming(phoneTiming)
|
||||
shapeSet(std::move(shapeSet)),
|
||||
phone(std::move(phone)),
|
||||
phoneTiming(phoneTiming)
|
||||
{}
|
||||
|
||||
ShapeRule ShapeRule::getInvalid() {
|
||||
return { {}, boost::none, { 0_cs, 0_cs } };
|
||||
return { {}, boost::none, { 0_cs, 0_cs } };
|
||||
}
|
||||
|
||||
bool ShapeRule::operator==(const ShapeRule& rhs) const {
|
||||
return shapeSet == rhs.shapeSet && phone == rhs.phone && phoneTiming == rhs.phoneTiming;
|
||||
return shapeSet == rhs.shapeSet && phone == rhs.phone && phoneTiming == rhs.phoneTiming;
|
||||
}
|
||||
|
||||
bool ShapeRule::operator!=(const ShapeRule& rhs) const {
|
||||
return !operator==(rhs);
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
bool ShapeRule::operator<(const ShapeRule& rhs) const {
|
||||
return shapeSet < rhs.shapeSet
|
||||
|| phone < rhs.phone
|
||||
|| phoneTiming.getStart() < rhs.phoneTiming.getStart()
|
||||
|| phoneTiming.getEnd() < rhs.phoneTiming.getEnd();
|
||||
return shapeSet < rhs.shapeSet
|
||||
|| phone < rhs.phone
|
||||
|| phoneTiming.getStart() < rhs.phoneTiming.getStart()
|
||||
|| phoneTiming.getEnd() < rhs.phoneTiming.getEnd();
|
||||
}
|
||||
|
||||
ContinuousTimeline<ShapeRule> getShapeRules(const BoundedTimeline<Phone>& phones) {
|
||||
// Convert to continuous timeline so that silences aren't skipped when iterating
|
||||
auto continuousPhones = boundedTimelinetoContinuousOptional(phones);
|
||||
// Convert to continuous timeline so that silences aren't skipped when iterating
|
||||
auto continuousPhones = boundedTimelinetoContinuousOptional(phones);
|
||||
|
||||
// Create timeline of shape rules
|
||||
ContinuousTimeline<ShapeRule> shapeRules(
|
||||
phones.getRange(),
|
||||
{ { Shape::X }, boost::none, { 0_cs, 0_cs } }
|
||||
);
|
||||
centiseconds previousDuration = 0_cs;
|
||||
for (const auto& timedPhone : continuousPhones) {
|
||||
optional<Phone> phone = timedPhone.getValue();
|
||||
const centiseconds duration = timedPhone.getDuration();
|
||||
// Create timeline of shape rules
|
||||
ContinuousTimeline<ShapeRule> shapeRules(
|
||||
phones.getRange(),
|
||||
{ { Shape::X }, boost::none, { 0_cs, 0_cs } }
|
||||
);
|
||||
centiseconds previousDuration = 0_cs;
|
||||
for (const auto& timedPhone : continuousPhones) {
|
||||
optional<Phone> phone = timedPhone.getValue();
|
||||
const centiseconds duration = timedPhone.getDuration();
|
||||
|
||||
if (phone) {
|
||||
// Animate one phone
|
||||
Timeline<ShapeSet> phoneShapeSets = getShapeSets(*phone, duration, previousDuration);
|
||||
if (phone) {
|
||||
// Animate one phone
|
||||
Timeline<ShapeSet> phoneShapeSets = getShapeSets(*phone, duration, previousDuration);
|
||||
|
||||
// Result timing is relative to phone. Make absolute.
|
||||
phoneShapeSets.shift(timedPhone.getStart());
|
||||
// Result timing is relative to phone. Make absolute.
|
||||
phoneShapeSets.shift(timedPhone.getStart());
|
||||
|
||||
// Copy to timeline.
|
||||
// Later shape sets may overwrite earlier ones if overlapping.
|
||||
for (const auto& timedShapeSet : phoneShapeSets) {
|
||||
shapeRules.set(
|
||||
timedShapeSet.getTimeRange(),
|
||||
ShapeRule(timedShapeSet.getValue(), phone, timedPhone.getTimeRange())
|
||||
);
|
||||
}
|
||||
}
|
||||
// Copy to timeline.
|
||||
// Later shape sets may overwrite earlier ones if overlapping.
|
||||
for (const auto& timedShapeSet : phoneShapeSets) {
|
||||
shapeRules.set(
|
||||
timedShapeSet.getTimeRange(),
|
||||
ShapeRule(timedShapeSet.getValue(), phone, timedPhone.getTimeRange())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
previousDuration = duration;
|
||||
}
|
||||
previousDuration = duration;
|
||||
}
|
||||
|
||||
return shapeRules;
|
||||
return shapeRules;
|
||||
}
|
||||
|
|
|
@ -7,17 +7,17 @@
|
|||
#include "time/TimeRange.h"
|
||||
|
||||
struct ShapeRule {
|
||||
ShapeSet shapeSet;
|
||||
boost::optional<Phone> phone;
|
||||
TimeRange phoneTiming;
|
||||
ShapeSet shapeSet;
|
||||
boost::optional<Phone> phone;
|
||||
TimeRange phoneTiming;
|
||||
|
||||
ShapeRule(ShapeSet shapeSet, boost::optional<Phone> phone, TimeRange phoneTiming);
|
||||
ShapeRule(ShapeSet shapeSet, boost::optional<Phone> phone, TimeRange phoneTiming);
|
||||
|
||||
static ShapeRule getInvalid();
|
||||
static ShapeRule getInvalid();
|
||||
|
||||
bool operator==(const ShapeRule&) const;
|
||||
bool operator!=(const ShapeRule&) const;
|
||||
bool operator<(const ShapeRule&) const;
|
||||
bool operator==(const ShapeRule&) const;
|
||||
bool operator!=(const ShapeRule&) const;
|
||||
bool operator<(const ShapeRule&) const;
|
||||
};
|
||||
|
||||
// Returns shape rules for an entire timeline of phones.
|
||||
|
|
|
@ -14,153 +14,153 @@ using std::map;
|
|||
constexpr size_t shapeValueCount = static_cast<size_t>(Shape::EndSentinel);
|
||||
|
||||
Shape getBasicShape(Shape shape) {
|
||||
static constexpr array<Shape, shapeValueCount> basicShapes =
|
||||
make_array(A, B, C, D, E, F, A, C, A);
|
||||
return basicShapes[static_cast<size_t>(shape)];
|
||||
static constexpr array<Shape, shapeValueCount> basicShapes =
|
||||
make_array(A, B, C, D, E, F, A, C, A);
|
||||
return basicShapes[static_cast<size_t>(shape)];
|
||||
}
|
||||
|
||||
Shape relax(Shape shape) {
|
||||
static constexpr array<Shape, shapeValueCount> relaxedShapes =
|
||||
make_array(A, B, B, C, C, B, X, B, X);
|
||||
return relaxedShapes[static_cast<size_t>(shape)];
|
||||
static constexpr array<Shape, shapeValueCount> relaxedShapes =
|
||||
make_array(A, B, B, C, C, B, X, B, X);
|
||||
return relaxedShapes[static_cast<size_t>(shape)];
|
||||
}
|
||||
|
||||
Shape getClosestShape(Shape reference, ShapeSet shapes) {
|
||||
if (shapes.empty()) {
|
||||
throw std::invalid_argument("Cannot select from empty set of shapes.");
|
||||
}
|
||||
if (shapes.empty()) {
|
||||
throw std::invalid_argument("Cannot select from empty set of shapes.");
|
||||
}
|
||||
|
||||
// A matrix that for each shape contains all shapes in ascending order of effort required to
|
||||
// move to them
|
||||
constexpr static array<array<Shape, shapeValueCount>, shapeValueCount> effortMatrix = make_array(
|
||||
/* A */ make_array(A, X, G, B, C, H, E, D, F),
|
||||
/* B */ make_array(B, G, A, X, C, H, E, D, F),
|
||||
/* C */ make_array(C, H, B, G, D, A, X, E, F),
|
||||
/* D */ make_array(D, C, H, B, G, A, X, E, F),
|
||||
/* E */ make_array(E, C, H, B, G, A, X, D, F),
|
||||
/* F */ make_array(F, B, G, A, X, C, H, E, D),
|
||||
/* G */ make_array(G, A, B, C, H, X, E, D, F),
|
||||
/* H */ make_array(H, C, B, G, D, A, X, E, F), // Like C
|
||||
/* X */ make_array(X, A, G, B, C, H, E, D, F) // Like A
|
||||
);
|
||||
// A matrix that for each shape contains all shapes in ascending order of effort required to
|
||||
// move to them
|
||||
constexpr static array<array<Shape, shapeValueCount>, shapeValueCount> effortMatrix = make_array(
|
||||
/* A */ make_array(A, X, G, B, C, H, E, D, F),
|
||||
/* B */ make_array(B, G, A, X, C, H, E, D, F),
|
||||
/* C */ make_array(C, H, B, G, D, A, X, E, F),
|
||||
/* D */ make_array(D, C, H, B, G, A, X, E, F),
|
||||
/* E */ make_array(E, C, H, B, G, A, X, D, F),
|
||||
/* F */ make_array(F, B, G, A, X, C, H, E, D),
|
||||
/* G */ make_array(G, A, B, C, H, X, E, D, F),
|
||||
/* H */ make_array(H, C, B, G, D, A, X, E, F), // Like C
|
||||
/* X */ make_array(X, A, G, B, C, H, E, D, F) // Like A
|
||||
);
|
||||
|
||||
auto& closestShapes = effortMatrix.at(static_cast<size_t>(reference));
|
||||
for (Shape closestShape : closestShapes) {
|
||||
if (shapes.find(closestShape) != shapes.end()) {
|
||||
return closestShape;
|
||||
}
|
||||
}
|
||||
auto& closestShapes = effortMatrix.at(static_cast<size_t>(reference));
|
||||
for (Shape closestShape : closestShapes) {
|
||||
if (shapes.find(closestShape) != shapes.end()) {
|
||||
return closestShape;
|
||||
}
|
||||
}
|
||||
|
||||
throw std::invalid_argument("Unable to find closest shape.");
|
||||
throw std::invalid_argument("Unable to find closest shape.");
|
||||
}
|
||||
|
||||
optional<pair<Shape, TweenTiming>> getTween(Shape first, Shape second) {
|
||||
// Note that most of the following rules work in one direction only.
|
||||
// That's because in animation, the mouth should usually "pop" open without inbetweens,
|
||||
// then close slowly.
|
||||
static const map<pair<Shape, Shape>, pair<Shape, TweenTiming>> lookup {
|
||||
{ { D, A }, { C, TweenTiming::Early } },
|
||||
{ { D, B }, { C, TweenTiming::Centered } },
|
||||
{ { D, G }, { C, TweenTiming::Early } },
|
||||
{ { D, X }, { C, TweenTiming::Late } },
|
||||
{ { C, F }, { E, TweenTiming::Centered } }, { { F, C }, { E, TweenTiming::Centered } },
|
||||
{ { D, F }, { E, TweenTiming::Centered } },
|
||||
{ { H, F }, { E, TweenTiming::Late } }, { { F, H }, { E, TweenTiming::Early } }
|
||||
};
|
||||
const auto it = lookup.find({ first, second });
|
||||
return it != lookup.end() ? it->second : optional<pair<Shape, TweenTiming>>();
|
||||
// Note that most of the following rules work in one direction only.
|
||||
// That's because in animation, the mouth should usually "pop" open without inbetweens,
|
||||
// then close slowly.
|
||||
static const map<pair<Shape, Shape>, pair<Shape, TweenTiming>> lookup {
|
||||
{ { D, A }, { C, TweenTiming::Early } },
|
||||
{ { D, B }, { C, TweenTiming::Centered } },
|
||||
{ { D, G }, { C, TweenTiming::Early } },
|
||||
{ { D, X }, { C, TweenTiming::Late } },
|
||||
{ { C, F }, { E, TweenTiming::Centered } }, { { F, C }, { E, TweenTiming::Centered } },
|
||||
{ { D, F }, { E, TweenTiming::Centered } },
|
||||
{ { H, F }, { E, TweenTiming::Late } }, { { F, H }, { E, TweenTiming::Early } }
|
||||
};
|
||||
const auto it = lookup.find({ first, second });
|
||||
return it != lookup.end() ? it->second : optional<pair<Shape, TweenTiming>>();
|
||||
}
|
||||
|
||||
Timeline<ShapeSet> getShapeSets(Phone phone, centiseconds duration, centiseconds previousDuration) {
|
||||
// Returns a timeline with a single shape set
|
||||
const auto single = [duration](ShapeSet value) {
|
||||
return Timeline<ShapeSet> { { 0_cs, duration, value } };
|
||||
};
|
||||
// Returns a timeline with a single shape set
|
||||
const auto single = [duration](ShapeSet value) {
|
||||
return Timeline<ShapeSet> { { 0_cs, duration, value } };
|
||||
};
|
||||
|
||||
// Returns a timeline with two shape sets, timed as a diphthong
|
||||
const auto diphthong = [duration](ShapeSet first, ShapeSet second) {
|
||||
const centiseconds firstDuration = duration_cast<centiseconds>(duration * 0.6);
|
||||
return Timeline<ShapeSet> {
|
||||
{ 0_cs, firstDuration, first },
|
||||
{ firstDuration, duration, second }
|
||||
};
|
||||
};
|
||||
// Returns a timeline with two shape sets, timed as a diphthong
|
||||
const auto diphthong = [duration](ShapeSet first, ShapeSet second) {
|
||||
const centiseconds firstDuration = duration_cast<centiseconds>(duration * 0.6);
|
||||
return Timeline<ShapeSet> {
|
||||
{ 0_cs, firstDuration, first },
|
||||
{ firstDuration, duration, second }
|
||||
};
|
||||
};
|
||||
|
||||
// Returns a timeline with two shape sets, timed as a plosive
|
||||
const auto plosive = [duration, previousDuration](ShapeSet first, ShapeSet second) {
|
||||
const centiseconds minOcclusionDuration = 4_cs;
|
||||
const centiseconds maxOcclusionDuration = 12_cs;
|
||||
const centiseconds occlusionDuration =
|
||||
clamp(previousDuration / 2, minOcclusionDuration, maxOcclusionDuration);
|
||||
return Timeline<ShapeSet> {
|
||||
{ -occlusionDuration, 0_cs, first },
|
||||
{ 0_cs, duration, second }
|
||||
};
|
||||
};
|
||||
// Returns a timeline with two shape sets, timed as a plosive
|
||||
const auto plosive = [duration, previousDuration](ShapeSet first, ShapeSet second) {
|
||||
const centiseconds minOcclusionDuration = 4_cs;
|
||||
const centiseconds maxOcclusionDuration = 12_cs;
|
||||
const centiseconds occlusionDuration =
|
||||
clamp(previousDuration / 2, minOcclusionDuration, maxOcclusionDuration);
|
||||
return Timeline<ShapeSet> {
|
||||
{ -occlusionDuration, 0_cs, first },
|
||||
{ 0_cs, duration, second }
|
||||
};
|
||||
};
|
||||
|
||||
// Returns the result of `getShapeSets` when called with identical arguments
|
||||
// except for a different phone.
|
||||
const auto like = [duration, previousDuration](Phone referencePhone) {
|
||||
return getShapeSets(referencePhone, duration, previousDuration);
|
||||
};
|
||||
// Returns the result of `getShapeSets` when called with identical arguments
|
||||
// except for a different phone.
|
||||
const auto like = [duration, previousDuration](Phone referencePhone) {
|
||||
return getShapeSets(referencePhone, duration, previousDuration);
|
||||
};
|
||||
|
||||
static const ShapeSet any { A, B, C, D, E, F, G, H, X };
|
||||
static const ShapeSet anyOpen { B, C, D, E, F, G, H };
|
||||
static const ShapeSet any { A, B, C, D, E, F, G, H, X };
|
||||
static const ShapeSet anyOpen { B, C, D, E, F, G, H };
|
||||
|
||||
// Note:
|
||||
// The shapes {A, B, G, X} are very similar. You should avoid regular shape sets containing more
|
||||
// than one of these shapes.
|
||||
// Otherwise, the resulting shape may be more or less random and might not be a good fit.
|
||||
// As an exception, a very flexible rule may contain *all* these shapes.
|
||||
// Note:
|
||||
// The shapes {A, B, G, X} are very similar. You should avoid regular shape sets containing more
|
||||
// than one of these shapes.
|
||||
// Otherwise, the resulting shape may be more or less random and might not be a good fit.
|
||||
// As an exception, a very flexible rule may contain *all* these shapes.
|
||||
|
||||
switch (phone) {
|
||||
case Phone::AO: return single({ E });
|
||||
case Phone::AA: return single({ D });
|
||||
case Phone::IY: return single({ B });
|
||||
case Phone::UW: return single({ F });
|
||||
case Phone::EH: return single({ C });
|
||||
case Phone::IH: return single({ B });
|
||||
case Phone::UH: return single({ F });
|
||||
case Phone::AH: return duration < 20_cs ? single({ C }) : single({ D });
|
||||
case Phone::Schwa: return single({ B, C });
|
||||
case Phone::AE: return single({ C });
|
||||
case Phone::EY: return diphthong({ C }, { B });
|
||||
case Phone::AY: return duration < 20_cs ? diphthong({ C }, { B }) : diphthong({ D }, { B });
|
||||
case Phone::OW: return diphthong({ E }, { F });
|
||||
case Phone::AW: return duration < 30_cs ? diphthong({ C }, { E }) : diphthong({ D }, { E });
|
||||
case Phone::OY: return diphthong({ E }, { B });
|
||||
case Phone::ER: return duration < 7_cs ? like(Phone::Schwa) : single({ E });
|
||||
switch (phone) {
|
||||
case Phone::AO: return single({ E });
|
||||
case Phone::AA: return single({ D });
|
||||
case Phone::IY: return single({ B });
|
||||
case Phone::UW: return single({ F });
|
||||
case Phone::EH: return single({ C });
|
||||
case Phone::IH: return single({ B });
|
||||
case Phone::UH: return single({ F });
|
||||
case Phone::AH: return duration < 20_cs ? single({ C }) : single({ D });
|
||||
case Phone::Schwa: return single({ B, C });
|
||||
case Phone::AE: return single({ C });
|
||||
case Phone::EY: return diphthong({ C }, { B });
|
||||
case Phone::AY: return duration < 20_cs ? diphthong({ C }, { B }) : diphthong({ D }, { B });
|
||||
case Phone::OW: return diphthong({ E }, { F });
|
||||
case Phone::AW: return duration < 30_cs ? diphthong({ C }, { E }) : diphthong({ D }, { E });
|
||||
case Phone::OY: return diphthong({ E }, { B });
|
||||
case Phone::ER: return duration < 7_cs ? like(Phone::Schwa) : single({ E });
|
||||
|
||||
case Phone::P:
|
||||
case Phone::B: return plosive({ A }, any);
|
||||
case Phone::T:
|
||||
case Phone::D: return plosive({ B, F }, anyOpen);
|
||||
case Phone::K:
|
||||
case Phone::G: return plosive({ B, C, E, F, H }, anyOpen);
|
||||
case Phone::CH:
|
||||
case Phone::JH: return single({ B, F });
|
||||
case Phone::F:
|
||||
case Phone::V: return single({ G });
|
||||
case Phone::TH:
|
||||
case Phone::DH:
|
||||
case Phone::S:
|
||||
case Phone::Z:
|
||||
case Phone::SH:
|
||||
case Phone::ZH: return single({ B, F });
|
||||
case Phone::HH: return single(any); // think "m-hm"
|
||||
case Phone::M: return single({ A });
|
||||
case Phone::N: return single({ B, C, F, H });
|
||||
case Phone::NG: return single({ B, C, E, F });
|
||||
case Phone::L: return duration < 20_cs ? single({ B, E, F, H }) : single({ H });
|
||||
case Phone::R: return single({ B, E, F });
|
||||
case Phone::Y: return single({ B, C, F });
|
||||
case Phone::W: return single({ F });
|
||||
case Phone::P:
|
||||
case Phone::B: return plosive({ A }, any);
|
||||
case Phone::T:
|
||||
case Phone::D: return plosive({ B, F }, anyOpen);
|
||||
case Phone::K:
|
||||
case Phone::G: return plosive({ B, C, E, F, H }, anyOpen);
|
||||
case Phone::CH:
|
||||
case Phone::JH: return single({ B, F });
|
||||
case Phone::F:
|
||||
case Phone::V: return single({ G });
|
||||
case Phone::TH:
|
||||
case Phone::DH:
|
||||
case Phone::S:
|
||||
case Phone::Z:
|
||||
case Phone::SH:
|
||||
case Phone::ZH: return single({ B, F });
|
||||
case Phone::HH: return single(any); // think "m-hm"
|
||||
case Phone::M: return single({ A });
|
||||
case Phone::N: return single({ B, C, F, H });
|
||||
case Phone::NG: return single({ B, C, E, F });
|
||||
case Phone::L: return duration < 20_cs ? single({ B, E, F, H }) : single({ H });
|
||||
case Phone::R: return single({ B, E, F });
|
||||
case Phone::Y: return single({ B, C, F });
|
||||
case Phone::W: return single({ F });
|
||||
|
||||
case Phone::Breath:
|
||||
case Phone::Cough:
|
||||
case Phone::Smack: return single({ C });
|
||||
case Phone::Noise: return single({ B });
|
||||
case Phone::Breath:
|
||||
case Phone::Cough:
|
||||
case Phone::Smack: return single({ C });
|
||||
case Phone::Noise: return single({ B });
|
||||
|
||||
default: throw std::invalid_argument("Unexpected phone.");
|
||||
}
|
||||
default: throw std::invalid_argument("Unexpected phone.");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,14 +16,14 @@ Shape getClosestShape(Shape reference, ShapeSet shapes);
|
|||
|
||||
// Indicates how to time a tween between two mouth shapes
|
||||
enum class TweenTiming {
|
||||
// Tween should end at the original transition
|
||||
Early,
|
||||
// Tween should end at the original transition
|
||||
Early,
|
||||
|
||||
// Tween should overlap both original mouth shapes equally
|
||||
Centered,
|
||||
// Tween should overlap both original mouth shapes equally
|
||||
Centered,
|
||||
|
||||
// Tween should begin at the original transition
|
||||
Late
|
||||
// Tween should begin at the original transition
|
||||
Late
|
||||
};
|
||||
|
||||
// Returns the tween shape and timing to use to transition between the specified two mouth shapes.
|
||||
|
|
|
@ -9,33 +9,33 @@
|
|||
#include "staticSegments.h"
|
||||
|
||||
JoiningContinuousTimeline<Shape> animate(
|
||||
const BoundedTimeline<Phone>& phones,
|
||||
const ShapeSet& targetShapeSet
|
||||
const BoundedTimeline<Phone>& phones,
|
||||
const ShapeSet& targetShapeSet
|
||||
) {
|
||||
// Create timeline of shape rules
|
||||
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
|
||||
// Create timeline of shape rules
|
||||
ContinuousTimeline<ShapeRule> shapeRules = getShapeRules(phones);
|
||||
|
||||
// Modify shape rules to only contain allowed shapes -- plus X, which is needed for pauses and
|
||||
// will be replaced later
|
||||
ShapeSet targetShapeSetPlusX = targetShapeSet;
|
||||
targetShapeSetPlusX.insert(Shape::X);
|
||||
shapeRules = convertToTargetShapeSet(shapeRules, targetShapeSetPlusX);
|
||||
// Modify shape rules to only contain allowed shapes -- plus X, which is needed for pauses and
|
||||
// will be replaced later
|
||||
ShapeSet targetShapeSetPlusX = targetShapeSet;
|
||||
targetShapeSetPlusX.insert(Shape::X);
|
||||
shapeRules = convertToTargetShapeSet(shapeRules, targetShapeSetPlusX);
|
||||
|
||||
// Animate in multiple steps
|
||||
const auto performMainAnimationSteps = [&targetShapeSet](const auto& shapeRules) {
|
||||
JoiningContinuousTimeline<Shape> animation = animateRough(shapeRules);
|
||||
animation = optimizeTiming(animation);
|
||||
animation = animatePauses(animation);
|
||||
animation = insertTweens(animation);
|
||||
animation = convertToTargetShapeSet(animation, targetShapeSet);
|
||||
return animation;
|
||||
};
|
||||
const JoiningContinuousTimeline<Shape> result =
|
||||
avoidStaticSegments(shapeRules, performMainAnimationSteps);
|
||||
// Animate in multiple steps
|
||||
const auto performMainAnimationSteps = [&targetShapeSet](const auto& shapeRules) {
|
||||
JoiningContinuousTimeline<Shape> animation = animateRough(shapeRules);
|
||||
animation = optimizeTiming(animation);
|
||||
animation = animatePauses(animation);
|
||||
animation = insertTweens(animation);
|
||||
animation = convertToTargetShapeSet(animation, targetShapeSet);
|
||||
return animation;
|
||||
};
|
||||
const JoiningContinuousTimeline<Shape> result =
|
||||
avoidStaticSegments(shapeRules, performMainAnimationSteps);
|
||||
|
||||
for (const auto& timedShape : result) {
|
||||
logTimedEvent("shape", timedShape);
|
||||
}
|
||||
for (const auto& timedShape : result) {
|
||||
logTimedEvent("shape", timedShape);
|
||||
}
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -6,6 +6,6 @@
|
|||
#include "targetShapeSet.h"
|
||||
|
||||
JoiningContinuousTimeline<Shape> animate(
|
||||
const BoundedTimeline<Phone>& phones,
|
||||
const ShapeSet& targetShapeSet
|
||||
const BoundedTimeline<Phone>& phones,
|
||||
const ShapeSet& targetShapeSet
|
||||
);
|
||||
|
|
|
@ -2,47 +2,47 @@
|
|||
#include "animationRules.h"
|
||||
|
||||
Shape getPauseShape(Shape previous, Shape next, centiseconds duration) {
|
||||
// For very short pauses: Just hold the previous shape
|
||||
if (duration < 12_cs) {
|
||||
return previous;
|
||||
}
|
||||
// For very short pauses: Just hold the previous shape
|
||||
if (duration < 12_cs) {
|
||||
return previous;
|
||||
}
|
||||
|
||||
// For short pauses: Relax the mouth
|
||||
if (duration <= 35_cs) {
|
||||
// It looks odd if the pause shape is identical to the next shape.
|
||||
// Make sure we find a relaxed shape that's different from the next one.
|
||||
for (Shape currentRelaxedShape = previous;;) {
|
||||
const Shape nextRelaxedShape = relax(currentRelaxedShape);
|
||||
if (nextRelaxedShape != next) {
|
||||
return nextRelaxedShape;
|
||||
}
|
||||
if (nextRelaxedShape == currentRelaxedShape) {
|
||||
// We're going in circles
|
||||
break;
|
||||
}
|
||||
currentRelaxedShape = nextRelaxedShape;
|
||||
}
|
||||
}
|
||||
// For short pauses: Relax the mouth
|
||||
if (duration <= 35_cs) {
|
||||
// It looks odd if the pause shape is identical to the next shape.
|
||||
// Make sure we find a relaxed shape that's different from the next one.
|
||||
for (Shape currentRelaxedShape = previous;;) {
|
||||
const Shape nextRelaxedShape = relax(currentRelaxedShape);
|
||||
if (nextRelaxedShape != next) {
|
||||
return nextRelaxedShape;
|
||||
}
|
||||
if (nextRelaxedShape == currentRelaxedShape) {
|
||||
// We're going in circles
|
||||
break;
|
||||
}
|
||||
currentRelaxedShape = nextRelaxedShape;
|
||||
}
|
||||
}
|
||||
|
||||
// For longer pauses: Close the mouth
|
||||
return Shape::X;
|
||||
// For longer pauses: Close the mouth
|
||||
return Shape::X;
|
||||
}
|
||||
|
||||
JoiningContinuousTimeline<Shape> animatePauses(const JoiningContinuousTimeline<Shape>& animation) {
|
||||
JoiningContinuousTimeline<Shape> result(animation);
|
||||
|
||||
for_each_adjacent(
|
||||
animation.begin(),
|
||||
animation.end(),
|
||||
[&](const Timed<Shape>& previous, const Timed<Shape>& pause, const Timed<Shape>& next) {
|
||||
if (pause.getValue() != Shape::X) return;
|
||||
JoiningContinuousTimeline<Shape> result(animation);
|
||||
|
||||
for_each_adjacent(
|
||||
animation.begin(),
|
||||
animation.end(),
|
||||
[&](const Timed<Shape>& previous, const Timed<Shape>& pause, const Timed<Shape>& next) {
|
||||
if (pause.getValue() != Shape::X) return;
|
||||
|
||||
result.set(
|
||||
pause.getTimeRange(),
|
||||
getPauseShape(previous.getValue(), next.getValue(), pause.getDuration())
|
||||
);
|
||||
}
|
||||
);
|
||||
result.set(
|
||||
pause.getTimeRange(),
|
||||
getPauseShape(previous.getValue(), next.getValue(), pause.getDuration())
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -13,48 +13,48 @@
|
|||
// So whenever we come across a one-shape vowel, we backtrack a little, spreading that shape to
|
||||
// the left.
|
||||
JoiningContinuousTimeline<Shape> animateRough(const ContinuousTimeline<ShapeRule>& shapeRules) {
|
||||
JoiningContinuousTimeline<Shape> animation(shapeRules.getRange(), Shape::X);
|
||||
JoiningContinuousTimeline<Shape> animation(shapeRules.getRange(), Shape::X);
|
||||
|
||||
Shape referenceShape = Shape::X;
|
||||
// Animate forwards
|
||||
centiseconds lastAnticipatedShapeStart = -1_cs;
|
||||
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
|
||||
const ShapeRule shapeRule = it->getValue();
|
||||
const Shape shape = getClosestShape(referenceShape, shapeRule.shapeSet);
|
||||
animation.set(it->getTimeRange(), shape);
|
||||
const bool anticipateShape = shapeRule.phone
|
||||
&& isVowel(*shapeRule.phone)
|
||||
&& shapeRule.shapeSet.size() == 1;
|
||||
if (anticipateShape) {
|
||||
// Animate backwards a little
|
||||
const Shape anticipatedShape = shape;
|
||||
const centiseconds anticipatedShapeStart = it->getStart();
|
||||
referenceShape = anticipatedShape;
|
||||
for (auto reverseIt = it; reverseIt != shapeRules.begin();) {
|
||||
--reverseIt;
|
||||
Shape referenceShape = Shape::X;
|
||||
// Animate forwards
|
||||
centiseconds lastAnticipatedShapeStart = -1_cs;
|
||||
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
|
||||
const ShapeRule shapeRule = it->getValue();
|
||||
const Shape shape = getClosestShape(referenceShape, shapeRule.shapeSet);
|
||||
animation.set(it->getTimeRange(), shape);
|
||||
const bool anticipateShape = shapeRule.phone
|
||||
&& isVowel(*shapeRule.phone)
|
||||
&& shapeRule.shapeSet.size() == 1;
|
||||
if (anticipateShape) {
|
||||
// Animate backwards a little
|
||||
const Shape anticipatedShape = shape;
|
||||
const centiseconds anticipatedShapeStart = it->getStart();
|
||||
referenceShape = anticipatedShape;
|
||||
for (auto reverseIt = it; reverseIt != shapeRules.begin();) {
|
||||
--reverseIt;
|
||||
|
||||
// Make sure we haven't animated too far back
|
||||
centiseconds anticipatingShapeStart = reverseIt->getStart();
|
||||
if (anticipatingShapeStart == lastAnticipatedShapeStart) break;
|
||||
const centiseconds maxAnticipationDuration = 20_cs;
|
||||
const centiseconds anticipationDuration =
|
||||
anticipatedShapeStart - anticipatingShapeStart;
|
||||
if (anticipationDuration > maxAnticipationDuration) break;
|
||||
// Make sure we haven't animated too far back
|
||||
centiseconds anticipatingShapeStart = reverseIt->getStart();
|
||||
if (anticipatingShapeStart == lastAnticipatedShapeStart) break;
|
||||
const centiseconds maxAnticipationDuration = 20_cs;
|
||||
const centiseconds anticipationDuration =
|
||||
anticipatedShapeStart - anticipatingShapeStart;
|
||||
if (anticipationDuration > maxAnticipationDuration) break;
|
||||
|
||||
// Overwrite forward-animated shape with backwards-animated, anticipating shape
|
||||
const Shape anticipatingShape =
|
||||
getClosestShape(referenceShape, reverseIt->getValue().shapeSet);
|
||||
animation.set(reverseIt->getTimeRange(), anticipatingShape);
|
||||
// Overwrite forward-animated shape with backwards-animated, anticipating shape
|
||||
const Shape anticipatingShape =
|
||||
getClosestShape(referenceShape, reverseIt->getValue().shapeSet);
|
||||
animation.set(reverseIt->getTimeRange(), anticipatingShape);
|
||||
|
||||
// Make sure the new, backwards-animated shape still resembles the anticipated shape
|
||||
if (getBasicShape(anticipatingShape) != getBasicShape(anticipatedShape)) break;
|
||||
// Make sure the new, backwards-animated shape still resembles the anticipated shape
|
||||
if (getBasicShape(anticipatingShape) != getBasicShape(anticipatedShape)) break;
|
||||
|
||||
referenceShape = anticipatingShape;
|
||||
}
|
||||
lastAnticipatedShapeStart = anticipatedShapeStart;
|
||||
}
|
||||
referenceShape = anticipateShape ? shape : relax(shape);
|
||||
}
|
||||
referenceShape = anticipatingShape;
|
||||
}
|
||||
lastAnticipatedShapeStart = anticipatedShapeStart;
|
||||
}
|
||||
referenceShape = anticipateShape ? shape : relax(shape);
|
||||
}
|
||||
|
||||
return animation;
|
||||
return animation;
|
||||
}
|
||||
|
|
|
@ -6,71 +6,71 @@
|
|||
using std::vector;
|
||||
|
||||
int getSyllableCount(const ContinuousTimeline<ShapeRule>& shapeRules, TimeRange timeRange) {
|
||||
if (timeRange.empty()) return 0;
|
||||
if (timeRange.empty()) return 0;
|
||||
|
||||
const auto begin = shapeRules.find(timeRange.getStart());
|
||||
const auto end = std::next(shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft));
|
||||
const auto begin = shapeRules.find(timeRange.getStart());
|
||||
const auto end = std::next(shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft));
|
||||
|
||||
// Treat every vowel as one syllable
|
||||
int syllableCount = 0;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
const ShapeRule shapeRule = it->getValue();
|
||||
// Treat every vowel as one syllable
|
||||
int syllableCount = 0;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
const ShapeRule shapeRule = it->getValue();
|
||||
|
||||
// Disregard phones that are mostly outside the specified time range.
|
||||
const centiseconds phoneMiddle = shapeRule.phoneTiming.getMiddle();
|
||||
if (phoneMiddle < timeRange.getStart() || phoneMiddle >= timeRange.getEnd()) continue;
|
||||
// Disregard phones that are mostly outside the specified time range.
|
||||
const centiseconds phoneMiddle = shapeRule.phoneTiming.getMiddle();
|
||||
if (phoneMiddle < timeRange.getStart() || phoneMiddle >= timeRange.getEnd()) continue;
|
||||
|
||||
auto phone = shapeRule.phone;
|
||||
if (phone && isVowel(*phone)) {
|
||||
++syllableCount;
|
||||
}
|
||||
}
|
||||
auto phone = shapeRule.phone;
|
||||
if (phone && isVowel(*phone)) {
|
||||
++syllableCount;
|
||||
}
|
||||
}
|
||||
|
||||
return syllableCount;
|
||||
return syllableCount;
|
||||
}
|
||||
|
||||
// A static segment is a prolonged period during which the mouth shape doesn't change
|
||||
vector<TimeRange> getStaticSegments(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const JoiningContinuousTimeline<Shape>& animation
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const JoiningContinuousTimeline<Shape>& animation
|
||||
) {
|
||||
// A static segment must contain a certain number of syllables to look distractingly static
|
||||
const int minSyllableCount = 3;
|
||||
// It must also have a minimum duration. The same number of syllables in fast speech usually
|
||||
// looks good.
|
||||
const centiseconds minDuration = 75_cs;
|
||||
// A static segment must contain a certain number of syllables to look distractingly static
|
||||
const int minSyllableCount = 3;
|
||||
// It must also have a minimum duration. The same number of syllables in fast speech usually
|
||||
// looks good.
|
||||
const centiseconds minDuration = 75_cs;
|
||||
|
||||
vector<TimeRange> result;
|
||||
for (const auto& timedShape : animation) {
|
||||
const TimeRange timeRange = timedShape.getTimeRange();
|
||||
const bool isStatic = timeRange.getDuration() >= minDuration
|
||||
&& getSyllableCount(shapeRules, timeRange) >= minSyllableCount;
|
||||
if (isStatic) {
|
||||
result.push_back(timeRange);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
vector<TimeRange> result;
|
||||
for (const auto& timedShape : animation) {
|
||||
const TimeRange timeRange = timedShape.getTimeRange();
|
||||
const bool isStatic = timeRange.getDuration() >= minDuration
|
||||
&& getSyllableCount(shapeRules, timeRange) >= minSyllableCount;
|
||||
if (isStatic) {
|
||||
result.push_back(timeRange);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Indicates whether this shape rule can potentially be replaced by a modified version that breaks
|
||||
// up long static segments
|
||||
bool canChange(const ShapeRule& rule) {
|
||||
return rule.phone && isVowel(*rule.phone) && rule.shapeSet.size() == 1;
|
||||
return rule.phone && isVowel(*rule.phone) && rule.shapeSet.size() == 1;
|
||||
}
|
||||
|
||||
// Returns a new shape rule that is identical to the specified one, except that it leads to a
|
||||
// slightly different visualization
|
||||
ShapeRule getChangedShapeRule(const ShapeRule& rule) {
|
||||
assert(canChange(rule));
|
||||
assert(canChange(rule));
|
||||
|
||||
ShapeRule result(rule);
|
||||
// So far, I've only encountered B as a static shape.
|
||||
// If there is ever a problem with another static shape, this function can easily be extended.
|
||||
if (rule.shapeSet == ShapeSet { Shape::B }) {
|
||||
result.shapeSet = { Shape::C };
|
||||
}
|
||||
return result;
|
||||
ShapeRule result(rule);
|
||||
// So far, I've only encountered B as a static shape.
|
||||
// If there is ever a problem with another static shape, this function can easily be extended.
|
||||
if (rule.shapeSet == ShapeSet { Shape::B }) {
|
||||
result.shapeSet = { Shape::C };
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Contains the start times of all rules to be changed
|
||||
|
@ -78,162 +78,162 @@ using RuleChanges = vector<centiseconds>;
|
|||
|
||||
// Replaces the indicated shape rules with slightly different ones, breaking up long static segments
|
||||
ContinuousTimeline<ShapeRule> applyChanges(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const RuleChanges& changes
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const RuleChanges& changes
|
||||
) {
|
||||
ContinuousTimeline<ShapeRule> result(shapeRules);
|
||||
for (centiseconds changedRuleStart : changes) {
|
||||
const Timed<ShapeRule> timedOriginalRule = *shapeRules.get(changedRuleStart);
|
||||
const ShapeRule changedRule = getChangedShapeRule(timedOriginalRule.getValue());
|
||||
result.set(timedOriginalRule.getTimeRange(), changedRule);
|
||||
}
|
||||
return result;
|
||||
ContinuousTimeline<ShapeRule> result(shapeRules);
|
||||
for (centiseconds changedRuleStart : changes) {
|
||||
const Timed<ShapeRule> timedOriginalRule = *shapeRules.get(changedRuleStart);
|
||||
const ShapeRule changedRule = getChangedShapeRule(timedOriginalRule.getValue());
|
||||
result.set(timedOriginalRule.getTimeRange(), changedRule);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
class RuleChangeScenario {
|
||||
public:
|
||||
RuleChangeScenario(
|
||||
const ContinuousTimeline<ShapeRule>& originalRules,
|
||||
const RuleChanges& changes,
|
||||
const AnimationFunction& animate
|
||||
) :
|
||||
changedRules(applyChanges(originalRules, changes)),
|
||||
animation(animate(changedRules)),
|
||||
staticSegments(getStaticSegments(changedRules, animation))
|
||||
{}
|
||||
RuleChangeScenario(
|
||||
const ContinuousTimeline<ShapeRule>& originalRules,
|
||||
const RuleChanges& changes,
|
||||
const AnimationFunction& animate
|
||||
) :
|
||||
changedRules(applyChanges(originalRules, changes)),
|
||||
animation(animate(changedRules)),
|
||||
staticSegments(getStaticSegments(changedRules, animation))
|
||||
{}
|
||||
|
||||
bool isBetterThan(const RuleChangeScenario& rhs) const {
|
||||
// We want zero static segments
|
||||
if (staticSegments.empty() && !rhs.staticSegments.empty()) return true;
|
||||
bool isBetterThan(const RuleChangeScenario& rhs) const {
|
||||
// We want zero static segments
|
||||
if (staticSegments.empty() && !rhs.staticSegments.empty()) return true;
|
||||
|
||||
// Short shapes are better than long ones. Minimize sum-of-squares.
|
||||
if (getSumOfShapeDurationSquares() < rhs.getSumOfShapeDurationSquares()) return true;
|
||||
// Short shapes are better than long ones. Minimize sum-of-squares.
|
||||
if (getSumOfShapeDurationSquares() < rhs.getSumOfShapeDurationSquares()) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int getStaticSegmentCount() const {
|
||||
return static_cast<int>(staticSegments.size());
|
||||
}
|
||||
int getStaticSegmentCount() const {
|
||||
return static_cast<int>(staticSegments.size());
|
||||
}
|
||||
|
||||
ContinuousTimeline<ShapeRule> getChangedRules() const {
|
||||
return changedRules;
|
||||
}
|
||||
ContinuousTimeline<ShapeRule> getChangedRules() const {
|
||||
return changedRules;
|
||||
}
|
||||
|
||||
private:
|
||||
ContinuousTimeline<ShapeRule> changedRules;
|
||||
JoiningContinuousTimeline<Shape> animation;
|
||||
vector<TimeRange> staticSegments;
|
||||
ContinuousTimeline<ShapeRule> changedRules;
|
||||
JoiningContinuousTimeline<Shape> animation;
|
||||
vector<TimeRange> staticSegments;
|
||||
|
||||
double getSumOfShapeDurationSquares() const {
|
||||
return std::accumulate(
|
||||
animation.begin(),
|
||||
animation.end(),
|
||||
0.0,
|
||||
[](const double sum, const Timed<Shape>& timedShape) {
|
||||
const double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
|
||||
timedShape.getDuration()
|
||||
).count();
|
||||
return sum + duration * duration;
|
||||
}
|
||||
);
|
||||
}
|
||||
double getSumOfShapeDurationSquares() const {
|
||||
return std::accumulate(
|
||||
animation.begin(),
|
||||
animation.end(),
|
||||
0.0,
|
||||
[](const double sum, const Timed<Shape>& timedShape) {
|
||||
const double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
|
||||
timedShape.getDuration()
|
||||
).count();
|
||||
return sum + duration * duration;
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
RuleChanges getPossibleRuleChanges(const ContinuousTimeline<ShapeRule>& shapeRules) {
|
||||
RuleChanges result;
|
||||
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
|
||||
const ShapeRule rule = it->getValue();
|
||||
if (canChange(rule)) {
|
||||
result.push_back(it->getStart());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
RuleChanges result;
|
||||
for (auto it = shapeRules.begin(); it != shapeRules.end(); ++it) {
|
||||
const ShapeRule rule = it->getValue();
|
||||
if (canChange(rule)) {
|
||||
result.push_back(it->getStart());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ContinuousTimeline<ShapeRule> fixStaticSegmentRules(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const AnimationFunction& animate
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const AnimationFunction& animate
|
||||
) {
|
||||
// The complexity of this function is exponential with the number of replacements.
|
||||
// So let's cap that value.
|
||||
const int maxReplacementCount = 3;
|
||||
// The complexity of this function is exponential with the number of replacements.
|
||||
// So let's cap that value.
|
||||
const int maxReplacementCount = 3;
|
||||
|
||||
// All potential changes
|
||||
const RuleChanges possibleRuleChanges = getPossibleRuleChanges(shapeRules);
|
||||
// All potential changes
|
||||
const RuleChanges possibleRuleChanges = getPossibleRuleChanges(shapeRules);
|
||||
|
||||
// Find best solution. Start with a single replacement, then increase as necessary.
|
||||
RuleChangeScenario bestScenario(shapeRules, {}, animate);
|
||||
for (
|
||||
int replacementCount = 1;
|
||||
bestScenario.getStaticSegmentCount() > 0 && replacementCount <= std::min(static_cast<int>(possibleRuleChanges.size()), maxReplacementCount);
|
||||
++replacementCount
|
||||
) {
|
||||
// Only the first <replacementCount> elements of `currentRuleChanges` count
|
||||
auto currentRuleChanges(possibleRuleChanges);
|
||||
do {
|
||||
RuleChangeScenario currentScenario(
|
||||
shapeRules,
|
||||
{ currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount },
|
||||
animate
|
||||
);
|
||||
if (currentScenario.isBetterThan(bestScenario)) {
|
||||
bestScenario = currentScenario;
|
||||
}
|
||||
} while (next_combination(currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount, currentRuleChanges.end()));
|
||||
}
|
||||
// Find best solution. Start with a single replacement, then increase as necessary.
|
||||
RuleChangeScenario bestScenario(shapeRules, {}, animate);
|
||||
for (
|
||||
int replacementCount = 1;
|
||||
bestScenario.getStaticSegmentCount() > 0 && replacementCount <= std::min(static_cast<int>(possibleRuleChanges.size()), maxReplacementCount);
|
||||
++replacementCount
|
||||
) {
|
||||
// Only the first <replacementCount> elements of `currentRuleChanges` count
|
||||
auto currentRuleChanges(possibleRuleChanges);
|
||||
do {
|
||||
RuleChangeScenario currentScenario(
|
||||
shapeRules,
|
||||
{ currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount },
|
||||
animate
|
||||
);
|
||||
if (currentScenario.isBetterThan(bestScenario)) {
|
||||
bestScenario = currentScenario;
|
||||
}
|
||||
} while (next_combination(currentRuleChanges.begin(), currentRuleChanges.begin() + replacementCount, currentRuleChanges.end()));
|
||||
}
|
||||
|
||||
return bestScenario.getChangedRules();
|
||||
return bestScenario.getChangedRules();
|
||||
}
|
||||
|
||||
// Indicates whether the specified shape rule may result in different shapes depending on context
|
||||
bool isFlexible(const ShapeRule& rule) {
|
||||
return rule.shapeSet.size() > 1;
|
||||
return rule.shapeSet.size() > 1;
|
||||
}
|
||||
|
||||
// Extends the specified time range until it starts and ends with a non-flexible shape rule, if
|
||||
// possible
|
||||
TimeRange extendToFixedRules(
|
||||
const TimeRange& timeRange,
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules
|
||||
const TimeRange& timeRange,
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules
|
||||
) {
|
||||
auto first = shapeRules.find(timeRange.getStart());
|
||||
while (first != shapeRules.begin() && isFlexible(first->getValue())) {
|
||||
--first;
|
||||
}
|
||||
auto last = shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft);
|
||||
while (std::next(last) != shapeRules.end() && isFlexible(last->getValue())) {
|
||||
++last;
|
||||
}
|
||||
return { first->getStart(), last->getEnd() };
|
||||
auto first = shapeRules.find(timeRange.getStart());
|
||||
while (first != shapeRules.begin() && isFlexible(first->getValue())) {
|
||||
--first;
|
||||
}
|
||||
auto last = shapeRules.find(timeRange.getEnd(), FindMode::SampleLeft);
|
||||
while (std::next(last) != shapeRules.end() && isFlexible(last->getValue())) {
|
||||
++last;
|
||||
}
|
||||
return { first->getStart(), last->getEnd() };
|
||||
}
|
||||
|
||||
JoiningContinuousTimeline<Shape> avoidStaticSegments(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const AnimationFunction& animate
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const AnimationFunction& animate
|
||||
) {
|
||||
const auto animation = animate(shapeRules);
|
||||
const vector<TimeRange> staticSegments = getStaticSegments(shapeRules, animation);
|
||||
if (staticSegments.empty()) {
|
||||
return animation;
|
||||
}
|
||||
const auto animation = animate(shapeRules);
|
||||
const vector<TimeRange> staticSegments = getStaticSegments(shapeRules, animation);
|
||||
if (staticSegments.empty()) {
|
||||
return animation;
|
||||
}
|
||||
|
||||
// Modify shape rules to eliminate static segments
|
||||
ContinuousTimeline<ShapeRule> fixedShapeRules(shapeRules);
|
||||
for (const TimeRange& staticSegment : staticSegments) {
|
||||
// Extend time range to the left and right so we don't lose adjacent rules that might
|
||||
// influence the animation
|
||||
const TimeRange extendedStaticSegment = extendToFixedRules(staticSegment, shapeRules);
|
||||
// Modify shape rules to eliminate static segments
|
||||
ContinuousTimeline<ShapeRule> fixedShapeRules(shapeRules);
|
||||
for (const TimeRange& staticSegment : staticSegments) {
|
||||
// Extend time range to the left and right so we don't lose adjacent rules that might
|
||||
// influence the animation
|
||||
const TimeRange extendedStaticSegment = extendToFixedRules(staticSegment, shapeRules);
|
||||
|
||||
// Fix shape rules within the static segment
|
||||
const auto fixedSegmentShapeRules = fixStaticSegmentRules(
|
||||
{ extendedStaticSegment, ShapeRule::getInvalid(), fixedShapeRules },
|
||||
animate
|
||||
);
|
||||
for (const auto& timedShapeRule : fixedSegmentShapeRules) {
|
||||
fixedShapeRules.set(timedShapeRule);
|
||||
}
|
||||
}
|
||||
// Fix shape rules within the static segment
|
||||
const auto fixedSegmentShapeRules = fixStaticSegmentRules(
|
||||
{ extendedStaticSegment, ShapeRule::getInvalid(), fixedShapeRules },
|
||||
animate
|
||||
);
|
||||
for (const auto& timedShapeRule : fixedSegmentShapeRules) {
|
||||
fixedShapeRules.set(timedShapeRule);
|
||||
}
|
||||
}
|
||||
|
||||
return animate(fixedShapeRules);
|
||||
return animate(fixedShapeRules);
|
||||
}
|
||||
|
|
|
@ -13,6 +13,6 @@ using AnimationFunction = std::function<JoiningContinuousTimeline<Shape>(const C
|
|||
// Static segments happen rather often.
|
||||
// See http://animateducated.blogspot.de/2016/10/lip-sync-animation-2.html?showComment=1478861729702#c2940729096183546458.
|
||||
JoiningContinuousTimeline<Shape> avoidStaticSegments(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const AnimationFunction& animate
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const AnimationFunction& animate
|
||||
);
|
||||
|
|
|
@ -1,48 +1,48 @@
|
|||
#include "targetShapeSet.h"
|
||||
|
||||
Shape convertToTargetShapeSet(Shape shape, const ShapeSet& targetShapeSet) {
|
||||
if (targetShapeSet.find(shape) != targetShapeSet.end()) {
|
||||
return shape;
|
||||
}
|
||||
const Shape basicShape = getBasicShape(shape);
|
||||
if (targetShapeSet.find(basicShape) == targetShapeSet.end()) {
|
||||
throw std::invalid_argument(
|
||||
fmt::format("Target shape set must contain basic shape {}.", basicShape));
|
||||
}
|
||||
return basicShape;
|
||||
if (targetShapeSet.find(shape) != targetShapeSet.end()) {
|
||||
return shape;
|
||||
}
|
||||
const Shape basicShape = getBasicShape(shape);
|
||||
if (targetShapeSet.find(basicShape) == targetShapeSet.end()) {
|
||||
throw std::invalid_argument(
|
||||
fmt::format("Target shape set must contain basic shape {}.", basicShape));
|
||||
}
|
||||
return basicShape;
|
||||
}
|
||||
|
||||
ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetShapeSet) {
|
||||
ShapeSet result;
|
||||
for (Shape shape : shapes) {
|
||||
result.insert(convertToTargetShapeSet(shape, targetShapeSet));
|
||||
}
|
||||
return result;
|
||||
ShapeSet result;
|
||||
for (Shape shape : shapes) {
|
||||
result.insert(convertToTargetShapeSet(shape, targetShapeSet));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ContinuousTimeline<ShapeRule> convertToTargetShapeSet(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const ShapeSet& targetShapeSet
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const ShapeSet& targetShapeSet
|
||||
) {
|
||||
ContinuousTimeline<ShapeRule> result(shapeRules);
|
||||
for (const auto& timedShapeRule : shapeRules) {
|
||||
ShapeRule rule = timedShapeRule.getValue();
|
||||
rule.shapeSet = convertToTargetShapeSet(rule.shapeSet, targetShapeSet);
|
||||
result.set(timedShapeRule.getTimeRange(), rule);
|
||||
}
|
||||
return result;
|
||||
ContinuousTimeline<ShapeRule> result(shapeRules);
|
||||
for (const auto& timedShapeRule : shapeRules) {
|
||||
ShapeRule rule = timedShapeRule.getValue();
|
||||
rule.shapeSet = convertToTargetShapeSet(rule.shapeSet, targetShapeSet);
|
||||
result.set(timedShapeRule.getTimeRange(), rule);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
JoiningContinuousTimeline<Shape> convertToTargetShapeSet(
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
) {
|
||||
JoiningContinuousTimeline<Shape> result(animation);
|
||||
for (const auto& timedShape : animation) {
|
||||
result.set(
|
||||
timedShape.getTimeRange(),
|
||||
convertToTargetShapeSet(timedShape.getValue(), targetShapeSet)
|
||||
);
|
||||
}
|
||||
return result;
|
||||
JoiningContinuousTimeline<Shape> result(animation);
|
||||
for (const auto& timedShape : animation) {
|
||||
result.set(
|
||||
timedShape.getTimeRange(),
|
||||
convertToTargetShapeSet(timedShape.getValue(), targetShapeSet)
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -12,13 +12,13 @@ ShapeSet convertToTargetShapeSet(const ShapeSet& shapes, const ShapeSet& targetS
|
|||
|
||||
// Replaces each shape in each rule with the closest shape that occurs in the target shape set.
|
||||
ContinuousTimeline<ShapeRule> convertToTargetShapeSet(
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const ShapeSet& targetShapeSet
|
||||
const ContinuousTimeline<ShapeRule>& shapeRules,
|
||||
const ShapeSet& targetShapeSet
|
||||
);
|
||||
|
||||
// Replaces each shape in the specified animation with the closest shape that occurs in the target
|
||||
// shape set.
|
||||
JoiningContinuousTimeline<Shape> convertToTargetShapeSet(
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
);
|
||||
|
|
|
@ -9,255 +9,255 @@ using std::string;
|
|||
using std::map;
|
||||
|
||||
string getShapesString(const JoiningContinuousTimeline<Shape>& shapes) {
|
||||
string result;
|
||||
for (const auto& timedShape : shapes) {
|
||||
if (!result.empty()) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(boost::lexical_cast<std::string>(timedShape.getValue()));
|
||||
}
|
||||
return result;
|
||||
string result;
|
||||
for (const auto& timedShape : shapes) {
|
||||
if (!result.empty()) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(boost::lexical_cast<std::string>(timedShape.getValue()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Shape getRepresentativeShape(const JoiningTimeline<Shape>& timeline) {
|
||||
if (timeline.empty()) {
|
||||
throw std::invalid_argument("Cannot determine representative shape from empty timeline.");
|
||||
}
|
||||
if (timeline.empty()) {
|
||||
throw std::invalid_argument("Cannot determine representative shape from empty timeline.");
|
||||
}
|
||||
|
||||
// Collect candidate shapes with weights
|
||||
map<Shape, centiseconds> candidateShapeWeights;
|
||||
for (const auto& timedShape : timeline) {
|
||||
candidateShapeWeights[timedShape.getValue()] += timedShape.getDuration();
|
||||
}
|
||||
// Collect candidate shapes with weights
|
||||
map<Shape, centiseconds> candidateShapeWeights;
|
||||
for (const auto& timedShape : timeline) {
|
||||
candidateShapeWeights[timedShape.getValue()] += timedShape.getDuration();
|
||||
}
|
||||
|
||||
// Select shape with highest total duration within the candidate range
|
||||
const Shape bestShape = std::max_element(
|
||||
candidateShapeWeights.begin(), candidateShapeWeights.end(),
|
||||
[](auto a, auto b) { return a.second < b.second; }
|
||||
)->first;
|
||||
// Select shape with highest total duration within the candidate range
|
||||
const Shape bestShape = std::max_element(
|
||||
candidateShapeWeights.begin(), candidateShapeWeights.end(),
|
||||
[](auto a, auto b) { return a.second < b.second; }
|
||||
)->first;
|
||||
|
||||
// Shapes C and D are similar, but D is more interesting.
|
||||
const bool substituteD = bestShape == Shape::C && candidateShapeWeights[Shape::D] > 0_cs;
|
||||
return substituteD ? Shape::D : bestShape;
|
||||
// Shapes C and D are similar, but D is more interesting.
|
||||
const bool substituteD = bestShape == Shape::C && candidateShapeWeights[Shape::D] > 0_cs;
|
||||
return substituteD ? Shape::D : bestShape;
|
||||
}
|
||||
|
||||
struct ShapeReduction {
|
||||
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes) :
|
||||
sourceShapes(sourceShapes),
|
||||
shape(getRepresentativeShape(sourceShapes)) {}
|
||||
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes) :
|
||||
sourceShapes(sourceShapes),
|
||||
shape(getRepresentativeShape(sourceShapes)) {}
|
||||
|
||||
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes, TimeRange candidateRange) :
|
||||
ShapeReduction(JoiningBoundedTimeline<Shape>(candidateRange, sourceShapes)) {}
|
||||
ShapeReduction(const JoiningTimeline<Shape>& sourceShapes, TimeRange candidateRange) :
|
||||
ShapeReduction(JoiningBoundedTimeline<Shape>(candidateRange, sourceShapes)) {}
|
||||
|
||||
JoiningTimeline<Shape> sourceShapes;
|
||||
Shape shape;
|
||||
JoiningTimeline<Shape> sourceShapes;
|
||||
Shape shape;
|
||||
};
|
||||
|
||||
// Returns a time range of candidate shapes for the next shape to draw.
|
||||
// Guaranteed to be non-empty.
|
||||
TimeRange getNextMinimalCandidateRange(const JoiningContinuousTimeline<Shape>& sourceShapes,
|
||||
const TimeRange targetRange, const centiseconds writePosition) {
|
||||
if (sourceShapes.empty()) {
|
||||
throw std::invalid_argument("Cannot determine candidate range for empty source timeline.");
|
||||
}
|
||||
const TimeRange targetRange, const centiseconds writePosition) {
|
||||
if (sourceShapes.empty()) {
|
||||
throw std::invalid_argument("Cannot determine candidate range for empty source timeline.");
|
||||
}
|
||||
|
||||
// Too short, and and we get flickering. Too long, and too many shapes are lost.
|
||||
// Good values turn out to be 5 to 7 cs, with 7 cs sometimes looking just marginally better.
|
||||
const centiseconds minShapeDuration = 7_cs;
|
||||
// Too short, and and we get flickering. Too long, and too many shapes are lost.
|
||||
// Good values turn out to be 5 to 7 cs, with 7 cs sometimes looking just marginally better.
|
||||
const centiseconds minShapeDuration = 7_cs;
|
||||
|
||||
// If the remaining time can hold more than one shape, but not two: split it evenly
|
||||
const centiseconds remainingTargetDuration = writePosition - targetRange.getStart();
|
||||
const bool canFitOneOrLess = remainingTargetDuration <= minShapeDuration;
|
||||
const bool canFitTwo = remainingTargetDuration >= 2 * minShapeDuration;
|
||||
const centiseconds duration = canFitOneOrLess || canFitTwo
|
||||
? minShapeDuration
|
||||
: remainingTargetDuration / 2;
|
||||
// If the remaining time can hold more than one shape, but not two: split it evenly
|
||||
const centiseconds remainingTargetDuration = writePosition - targetRange.getStart();
|
||||
const bool canFitOneOrLess = remainingTargetDuration <= minShapeDuration;
|
||||
const bool canFitTwo = remainingTargetDuration >= 2 * minShapeDuration;
|
||||
const centiseconds duration = canFitOneOrLess || canFitTwo
|
||||
? minShapeDuration
|
||||
: remainingTargetDuration / 2;
|
||||
|
||||
TimeRange candidateRange(writePosition - duration, writePosition);
|
||||
if (writePosition == targetRange.getEnd()) {
|
||||
// This is the first iteration.
|
||||
// Extend the candidate range to the right in order to consider all source shapes after the
|
||||
// target range.
|
||||
candidateRange.setEndIfLater(sourceShapes.getRange().getEnd());
|
||||
}
|
||||
if (candidateRange.getStart() >= sourceShapes.getRange().getEnd()) {
|
||||
// We haven't reached the source range yet.
|
||||
// Extend the candidate range to the left in order to encompass the right-most source shape.
|
||||
candidateRange.setStart(sourceShapes.rbegin()->getStart());
|
||||
}
|
||||
if (candidateRange.getEnd() <= sourceShapes.getRange().getStart()) {
|
||||
// We're past the source range. This can happen in corner cases.
|
||||
// Extend the candidate range to the right in order to encompass the left-most source shape
|
||||
candidateRange.setEnd(sourceShapes.begin()->getEnd());
|
||||
}
|
||||
TimeRange candidateRange(writePosition - duration, writePosition);
|
||||
if (writePosition == targetRange.getEnd()) {
|
||||
// This is the first iteration.
|
||||
// Extend the candidate range to the right in order to consider all source shapes after the
|
||||
// target range.
|
||||
candidateRange.setEndIfLater(sourceShapes.getRange().getEnd());
|
||||
}
|
||||
if (candidateRange.getStart() >= sourceShapes.getRange().getEnd()) {
|
||||
// We haven't reached the source range yet.
|
||||
// Extend the candidate range to the left in order to encompass the right-most source shape.
|
||||
candidateRange.setStart(sourceShapes.rbegin()->getStart());
|
||||
}
|
||||
if (candidateRange.getEnd() <= sourceShapes.getRange().getStart()) {
|
||||
// We're past the source range. This can happen in corner cases.
|
||||
// Extend the candidate range to the right in order to encompass the left-most source shape
|
||||
candidateRange.setEnd(sourceShapes.begin()->getEnd());
|
||||
}
|
||||
|
||||
return candidateRange;
|
||||
return candidateRange;
|
||||
}
|
||||
|
||||
ShapeReduction getNextShapeReduction(
|
||||
const JoiningContinuousTimeline<Shape>& sourceShapes,
|
||||
const TimeRange targetRange,
|
||||
centiseconds writePosition
|
||||
const JoiningContinuousTimeline<Shape>& sourceShapes,
|
||||
const TimeRange targetRange,
|
||||
centiseconds writePosition
|
||||
) {
|
||||
// Determine the next time range of candidate shapes. Consider two scenarios:
|
||||
// Determine the next time range of candidate shapes. Consider two scenarios:
|
||||
|
||||
// ... the shortest-possible candidate range
|
||||
const ShapeReduction minReduction(sourceShapes,
|
||||
getNextMinimalCandidateRange(sourceShapes, targetRange, writePosition));
|
||||
// ... the shortest-possible candidate range
|
||||
const ShapeReduction minReduction(sourceShapes,
|
||||
getNextMinimalCandidateRange(sourceShapes, targetRange, writePosition));
|
||||
|
||||
// ... a candidate range extended to the left to fully encompass its left-most shape
|
||||
const ShapeReduction extendedReduction(sourceShapes,
|
||||
{
|
||||
minReduction.sourceShapes.begin()->getStart(),
|
||||
minReduction.sourceShapes.getRange().getEnd()
|
||||
}
|
||||
);
|
||||
// ... a candidate range extended to the left to fully encompass its left-most shape
|
||||
const ShapeReduction extendedReduction(sourceShapes,
|
||||
{
|
||||
minReduction.sourceShapes.begin()->getStart(),
|
||||
minReduction.sourceShapes.getRange().getEnd()
|
||||
}
|
||||
);
|
||||
|
||||
// Determine the shape that might be picked *next* if we choose the shortest-possible candidate
|
||||
// range now
|
||||
const ShapeReduction nextReduction(
|
||||
sourceShapes,
|
||||
getNextMinimalCandidateRange(sourceShapes, targetRange, minReduction.sourceShapes.getRange().getStart())
|
||||
);
|
||||
// Determine the shape that might be picked *next* if we choose the shortest-possible candidate
|
||||
// range now
|
||||
const ShapeReduction nextReduction(
|
||||
sourceShapes,
|
||||
getNextMinimalCandidateRange(sourceShapes, targetRange, minReduction.sourceShapes.getRange().getStart())
|
||||
);
|
||||
|
||||
const bool minEqualsExtended = minReduction.shape == extendedReduction.shape;
|
||||
const bool extendedIsSpecial = extendedReduction.shape != minReduction.shape
|
||||
&& extendedReduction.shape != nextReduction.shape;
|
||||
const bool minEqualsExtended = minReduction.shape == extendedReduction.shape;
|
||||
const bool extendedIsSpecial = extendedReduction.shape != minReduction.shape
|
||||
&& extendedReduction.shape != nextReduction.shape;
|
||||
|
||||
return minEqualsExtended || extendedIsSpecial ? extendedReduction : minReduction;
|
||||
return minEqualsExtended || extendedIsSpecial ? extendedReduction : minReduction;
|
||||
}
|
||||
|
||||
// Modifies the timing of the given animation to fit into the specified target time range without
|
||||
// jitter.
|
||||
JoiningContinuousTimeline<Shape> retime(const JoiningContinuousTimeline<Shape>& sourceShapes,
|
||||
const TimeRange targetRange) {
|
||||
logTimedEvent("segment", targetRange, getShapesString(sourceShapes));
|
||||
const TimeRange targetRange) {
|
||||
logTimedEvent("segment", targetRange, getShapesString(sourceShapes));
|
||||
|
||||
JoiningContinuousTimeline<Shape> result(targetRange, Shape::X);
|
||||
if (sourceShapes.empty()) return result;
|
||||
JoiningContinuousTimeline<Shape> result(targetRange, Shape::X);
|
||||
if (sourceShapes.empty()) return result;
|
||||
|
||||
// Animate backwards
|
||||
centiseconds writePosition = targetRange.getEnd();
|
||||
while (writePosition > targetRange.getStart()) {
|
||||
// Animate backwards
|
||||
centiseconds writePosition = targetRange.getEnd();
|
||||
while (writePosition > targetRange.getStart()) {
|
||||
|
||||
// Decide which shape to show next, possibly discarding short shapes
|
||||
const ShapeReduction shapeReduction =
|
||||
getNextShapeReduction(sourceShapes, targetRange, writePosition);
|
||||
// Decide which shape to show next, possibly discarding short shapes
|
||||
const ShapeReduction shapeReduction =
|
||||
getNextShapeReduction(sourceShapes, targetRange, writePosition);
|
||||
|
||||
// Determine how long to display the shape
|
||||
TimeRange targetShapeRange(shapeReduction.sourceShapes.getRange());
|
||||
if (targetShapeRange.getStart() <= sourceShapes.getRange().getStart()) {
|
||||
// We've used up the left-most source shape. Fill the entire remaining target range.
|
||||
targetShapeRange.setStartIfEarlier(targetRange.getStart());
|
||||
}
|
||||
targetShapeRange.trimRight(writePosition);
|
||||
// Determine how long to display the shape
|
||||
TimeRange targetShapeRange(shapeReduction.sourceShapes.getRange());
|
||||
if (targetShapeRange.getStart() <= sourceShapes.getRange().getStart()) {
|
||||
// We've used up the left-most source shape. Fill the entire remaining target range.
|
||||
targetShapeRange.setStartIfEarlier(targetRange.getStart());
|
||||
}
|
||||
targetShapeRange.trimRight(writePosition);
|
||||
|
||||
// Draw shape
|
||||
result.set(targetShapeRange, shapeReduction.shape);
|
||||
// Draw shape
|
||||
result.set(targetShapeRange, shapeReduction.shape);
|
||||
|
||||
writePosition = targetShapeRange.getStart();
|
||||
}
|
||||
writePosition = targetShapeRange.getStart();
|
||||
}
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
JoiningContinuousTimeline<Shape> retime(
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
TimeRange sourceRange,
|
||||
TimeRange targetRange
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
TimeRange sourceRange,
|
||||
TimeRange targetRange
|
||||
) {
|
||||
const auto sourceShapes = JoiningContinuousTimeline<Shape>(sourceRange, Shape::X, animation);
|
||||
return retime(sourceShapes, targetRange);
|
||||
const auto sourceShapes = JoiningContinuousTimeline<Shape>(sourceRange, Shape::X, animation);
|
||||
return retime(sourceShapes, targetRange);
|
||||
}
|
||||
|
||||
enum class MouthState {
|
||||
Idle,
|
||||
Closed,
|
||||
Open
|
||||
Idle,
|
||||
Closed,
|
||||
Open
|
||||
};
|
||||
|
||||
JoiningContinuousTimeline<Shape> optimizeTiming(const JoiningContinuousTimeline<Shape>& animation) {
|
||||
// Identify segments with idle, closed, and open mouth shapes
|
||||
JoiningContinuousTimeline<MouthState> segments(animation.getRange(), MouthState::Idle);
|
||||
for (const auto& timedShape : animation) {
|
||||
const Shape shape = timedShape.getValue();
|
||||
const MouthState mouthState =
|
||||
shape == Shape::X
|
||||
? MouthState::Idle
|
||||
: shape == Shape::A
|
||||
? MouthState::Closed
|
||||
: MouthState::Open;
|
||||
segments.set(timedShape.getTimeRange(), mouthState);
|
||||
}
|
||||
// Identify segments with idle, closed, and open mouth shapes
|
||||
JoiningContinuousTimeline<MouthState> segments(animation.getRange(), MouthState::Idle);
|
||||
for (const auto& timedShape : animation) {
|
||||
const Shape shape = timedShape.getValue();
|
||||
const MouthState mouthState =
|
||||
shape == Shape::X
|
||||
? MouthState::Idle
|
||||
: shape == Shape::A
|
||||
? MouthState::Closed
|
||||
: MouthState::Open;
|
||||
segments.set(timedShape.getTimeRange(), mouthState);
|
||||
}
|
||||
|
||||
// The minimum duration a segment of open or closed mouth shapes must have to visually register
|
||||
const centiseconds minSegmentDuration = 8_cs;
|
||||
// The maximum amount by which the start of a shape can be brought forward
|
||||
const centiseconds maxExtensionDuration = 6_cs;
|
||||
// The minimum duration a segment of open or closed mouth shapes must have to visually register
|
||||
const centiseconds minSegmentDuration = 8_cs;
|
||||
// The maximum amount by which the start of a shape can be brought forward
|
||||
const centiseconds maxExtensionDuration = 6_cs;
|
||||
|
||||
// Make sure all open and closed segments are long enough to register visually.
|
||||
JoiningContinuousTimeline<Shape> result(animation.getRange(), Shape::X);
|
||||
// ... we're filling the result timeline from right to left, so `resultStart` points to the
|
||||
// earliest shape already written
|
||||
centiseconds resultStart = result.getRange().getEnd();
|
||||
for (auto segmentIt = segments.rbegin(); segmentIt != segments.rend(); ++segmentIt) {
|
||||
// We don't care about idle shapes at this point.
|
||||
if (segmentIt->getValue() == MouthState::Idle) continue;
|
||||
// Make sure all open and closed segments are long enough to register visually.
|
||||
JoiningContinuousTimeline<Shape> result(animation.getRange(), Shape::X);
|
||||
// ... we're filling the result timeline from right to left, so `resultStart` points to the
|
||||
// earliest shape already written
|
||||
centiseconds resultStart = result.getRange().getEnd();
|
||||
for (auto segmentIt = segments.rbegin(); segmentIt != segments.rend(); ++segmentIt) {
|
||||
// We don't care about idle shapes at this point.
|
||||
if (segmentIt->getValue() == MouthState::Idle) continue;
|
||||
|
||||
resultStart = std::min(segmentIt->getEnd(), resultStart);
|
||||
if (resultStart - segmentIt->getStart() >= minSegmentDuration) {
|
||||
// The segment is long enough; we don't have to extend it to the left.
|
||||
const TimeRange targetRange(segmentIt->getStart(), resultStart);
|
||||
const auto retimedSegment = retime(animation, segmentIt->getTimeRange(), targetRange);
|
||||
for (const auto& timedShape : retimedSegment) {
|
||||
result.set(timedShape);
|
||||
}
|
||||
resultStart = targetRange.getStart();
|
||||
} else {
|
||||
// The segment is too short; we have to extend it to the left.
|
||||
// Find all adjacent segments to our left that are also too short, then distribute them
|
||||
// evenly.
|
||||
const auto begin = segmentIt;
|
||||
auto end = std::next(begin);
|
||||
while (
|
||||
end != segments.rend()
|
||||
&& end->getValue() != MouthState::Idle
|
||||
&& end->getDuration() < minSegmentDuration
|
||||
) {
|
||||
++end;
|
||||
}
|
||||
resultStart = std::min(segmentIt->getEnd(), resultStart);
|
||||
if (resultStart - segmentIt->getStart() >= minSegmentDuration) {
|
||||
// The segment is long enough; we don't have to extend it to the left.
|
||||
const TimeRange targetRange(segmentIt->getStart(), resultStart);
|
||||
const auto retimedSegment = retime(animation, segmentIt->getTimeRange(), targetRange);
|
||||
for (const auto& timedShape : retimedSegment) {
|
||||
result.set(timedShape);
|
||||
}
|
||||
resultStart = targetRange.getStart();
|
||||
} else {
|
||||
// The segment is too short; we have to extend it to the left.
|
||||
// Find all adjacent segments to our left that are also too short, then distribute them
|
||||
// evenly.
|
||||
const auto begin = segmentIt;
|
||||
auto end = std::next(begin);
|
||||
while (
|
||||
end != segments.rend()
|
||||
&& end->getValue() != MouthState::Idle
|
||||
&& end->getDuration() < minSegmentDuration
|
||||
) {
|
||||
++end;
|
||||
}
|
||||
|
||||
// Determine how much we should extend the entire set of short segments to the left
|
||||
const size_t shortSegmentCount = std::distance(begin, end);
|
||||
const centiseconds desiredDuration = minSegmentDuration * shortSegmentCount;
|
||||
const centiseconds currentDuration = begin->getEnd() - std::prev(end)->getStart();
|
||||
const centiseconds desiredExtensionDuration = desiredDuration - currentDuration;
|
||||
const centiseconds availableExtensionDuration = end != segments.rend()
|
||||
? end->getDuration() - 1_cs
|
||||
: 0_cs;
|
||||
const centiseconds extensionDuration = std::min({
|
||||
desiredExtensionDuration, availableExtensionDuration, maxExtensionDuration
|
||||
});
|
||||
// Determine how much we should extend the entire set of short segments to the left
|
||||
const size_t shortSegmentCount = std::distance(begin, end);
|
||||
const centiseconds desiredDuration = minSegmentDuration * shortSegmentCount;
|
||||
const centiseconds currentDuration = begin->getEnd() - std::prev(end)->getStart();
|
||||
const centiseconds desiredExtensionDuration = desiredDuration - currentDuration;
|
||||
const centiseconds availableExtensionDuration = end != segments.rend()
|
||||
? end->getDuration() - 1_cs
|
||||
: 0_cs;
|
||||
const centiseconds extensionDuration = std::min({
|
||||
desiredExtensionDuration, availableExtensionDuration, maxExtensionDuration
|
||||
});
|
||||
|
||||
// Distribute available time range evenly among all short segments
|
||||
const centiseconds shortSegmentsTargetStart =
|
||||
std::prev(end)->getStart() - extensionDuration;
|
||||
for (auto shortSegmentIt = begin; shortSegmentIt != end; ++shortSegmentIt) {
|
||||
size_t remainingShortSegmentCount = std::distance(shortSegmentIt, end);
|
||||
const centiseconds segmentDuration = (resultStart - shortSegmentsTargetStart) /
|
||||
remainingShortSegmentCount;
|
||||
const TimeRange segmentTargetRange(resultStart - segmentDuration, resultStart);
|
||||
const auto retimedSegment =
|
||||
retime(animation, shortSegmentIt->getTimeRange(), segmentTargetRange);
|
||||
for (const auto& timedShape : retimedSegment) {
|
||||
result.set(timedShape);
|
||||
}
|
||||
resultStart = segmentTargetRange.getStart();
|
||||
}
|
||||
// Distribute available time range evenly among all short segments
|
||||
const centiseconds shortSegmentsTargetStart =
|
||||
std::prev(end)->getStart() - extensionDuration;
|
||||
for (auto shortSegmentIt = begin; shortSegmentIt != end; ++shortSegmentIt) {
|
||||
size_t remainingShortSegmentCount = std::distance(shortSegmentIt, end);
|
||||
const centiseconds segmentDuration = (resultStart - shortSegmentsTargetStart) /
|
||||
remainingShortSegmentCount;
|
||||
const TimeRange segmentTargetRange(resultStart - segmentDuration, resultStart);
|
||||
const auto retimedSegment =
|
||||
retime(animation, shortSegmentIt->getTimeRange(), segmentTargetRange);
|
||||
for (const auto& timedShape : retimedSegment) {
|
||||
result.set(timedShape);
|
||||
}
|
||||
resultStart = segmentTargetRange.getStart();
|
||||
}
|
||||
|
||||
segmentIt = std::prev(end);
|
||||
}
|
||||
}
|
||||
segmentIt = std::prev(end);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -2,53 +2,53 @@
|
|||
#include "animationRules.h"
|
||||
|
||||
JoiningContinuousTimeline<Shape> insertTweens(const JoiningContinuousTimeline<Shape>& animation) {
|
||||
const centiseconds minTweenDuration = 4_cs;
|
||||
const centiseconds maxTweenDuration = 8_cs;
|
||||
const centiseconds minTweenDuration = 4_cs;
|
||||
const centiseconds maxTweenDuration = 8_cs;
|
||||
|
||||
JoiningContinuousTimeline<Shape> result(animation);
|
||||
JoiningContinuousTimeline<Shape> result(animation);
|
||||
|
||||
for_each_adjacent(animation.begin(), animation.end(), [&](const auto& first, const auto& second) {
|
||||
auto pair = getTween(first.getValue(), second.getValue());
|
||||
if (!pair) return;
|
||||
for_each_adjacent(animation.begin(), animation.end(), [&](const auto& first, const auto& second) {
|
||||
auto pair = getTween(first.getValue(), second.getValue());
|
||||
if (!pair) return;
|
||||
|
||||
Shape tweenShape;
|
||||
TweenTiming tweenTiming;
|
||||
std::tie(tweenShape, tweenTiming) = *pair;
|
||||
TimeRange firstTimeRange = first.getTimeRange();
|
||||
TimeRange secondTimeRange = second.getTimeRange();
|
||||
Shape tweenShape;
|
||||
TweenTiming tweenTiming;
|
||||
std::tie(tweenShape, tweenTiming) = *pair;
|
||||
TimeRange firstTimeRange = first.getTimeRange();
|
||||
TimeRange secondTimeRange = second.getTimeRange();
|
||||
|
||||
centiseconds tweenStart, tweenDuration;
|
||||
switch (tweenTiming) {
|
||||
case TweenTiming::Early:
|
||||
{
|
||||
tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration);
|
||||
tweenStart = firstTimeRange.getEnd() - tweenDuration;
|
||||
break;
|
||||
}
|
||||
case TweenTiming::Centered:
|
||||
{
|
||||
tweenDuration = std::min({
|
||||
firstTimeRange.getDuration() / 4, secondTimeRange.getDuration() / 4, maxTweenDuration
|
||||
});
|
||||
tweenStart = firstTimeRange.getEnd() - tweenDuration / 2;
|
||||
break;
|
||||
}
|
||||
case TweenTiming::Late:
|
||||
{
|
||||
tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration);
|
||||
tweenStart = secondTimeRange.getStart();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
throw std::runtime_error("Unexpected tween timing.");
|
||||
}
|
||||
}
|
||||
centiseconds tweenStart, tweenDuration;
|
||||
switch (tweenTiming) {
|
||||
case TweenTiming::Early:
|
||||
{
|
||||
tweenDuration = std::min(firstTimeRange.getDuration() / 3, maxTweenDuration);
|
||||
tweenStart = firstTimeRange.getEnd() - tweenDuration;
|
||||
break;
|
||||
}
|
||||
case TweenTiming::Centered:
|
||||
{
|
||||
tweenDuration = std::min({
|
||||
firstTimeRange.getDuration() / 4, secondTimeRange.getDuration() / 4, maxTweenDuration
|
||||
});
|
||||
tweenStart = firstTimeRange.getEnd() - tweenDuration / 2;
|
||||
break;
|
||||
}
|
||||
case TweenTiming::Late:
|
||||
{
|
||||
tweenDuration = std::min(secondTimeRange.getDuration() / 3, maxTweenDuration);
|
||||
tweenStart = secondTimeRange.getStart();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
throw std::runtime_error("Unexpected tween timing.");
|
||||
}
|
||||
}
|
||||
|
||||
if (tweenDuration < minTweenDuration) return;
|
||||
if (tweenDuration < minTweenDuration) return;
|
||||
|
||||
result.set(tweenStart, tweenStart + tweenDuration, tweenShape);
|
||||
});
|
||||
result.set(tweenStart, tweenStart + tweenDuration, tweenShape);
|
||||
});
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -4,66 +4,66 @@
|
|||
using std::invalid_argument;
|
||||
|
||||
TimeRange AudioClip::getTruncatedRange() const {
|
||||
return TimeRange(0_cs, centiseconds(100 * size() / getSampleRate()));
|
||||
return TimeRange(0_cs, centiseconds(100 * size() / getSampleRate()));
|
||||
}
|
||||
|
||||
class SafeSampleReader {
|
||||
public:
|
||||
SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size);
|
||||
AudioClip::value_type operator()(AudioClip::size_type index);
|
||||
SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size);
|
||||
AudioClip::value_type operator()(AudioClip::size_type index);
|
||||
private:
|
||||
SampleReader unsafeRead;
|
||||
AudioClip::size_type size;
|
||||
AudioClip::size_type lastIndex = -1;
|
||||
AudioClip::value_type lastSample = 0;
|
||||
SampleReader unsafeRead;
|
||||
AudioClip::size_type size;
|
||||
AudioClip::size_type lastIndex = -1;
|
||||
AudioClip::value_type lastSample = 0;
|
||||
};
|
||||
|
||||
SafeSampleReader::SafeSampleReader(SampleReader unsafeRead, AudioClip::size_type size) :
|
||||
unsafeRead(unsafeRead),
|
||||
size(size)
|
||||
unsafeRead(unsafeRead),
|
||||
size(size)
|
||||
{}
|
||||
|
||||
inline AudioClip::value_type SafeSampleReader::operator()(AudioClip::size_type index) {
|
||||
if (index < 0) {
|
||||
throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index));
|
||||
}
|
||||
if (index >= size) {
|
||||
throw invalid_argument(fmt::format(
|
||||
"Cannot read from sample index {}. Clip size is {}.",
|
||||
index,
|
||||
size
|
||||
));
|
||||
}
|
||||
if (index == lastIndex) {
|
||||
return lastSample;
|
||||
}
|
||||
if (index < 0) {
|
||||
throw invalid_argument(fmt::format("Cannot read from sample index {}. Index < 0.", index));
|
||||
}
|
||||
if (index >= size) {
|
||||
throw invalid_argument(fmt::format(
|
||||
"Cannot read from sample index {}. Clip size is {}.",
|
||||
index,
|
||||
size
|
||||
));
|
||||
}
|
||||
if (index == lastIndex) {
|
||||
return lastSample;
|
||||
}
|
||||
|
||||
lastIndex = index;
|
||||
lastSample = unsafeRead(index);
|
||||
return lastSample;
|
||||
lastIndex = index;
|
||||
lastSample = unsafeRead(index);
|
||||
return lastSample;
|
||||
}
|
||||
|
||||
SampleReader AudioClip::createSampleReader() const {
|
||||
return SafeSampleReader(createUnsafeSampleReader(), size());
|
||||
return SafeSampleReader(createUnsafeSampleReader(), size());
|
||||
}
|
||||
|
||||
AudioClip::iterator AudioClip::begin() const {
|
||||
return SampleIterator(*this, 0);
|
||||
return SampleIterator(*this, 0);
|
||||
}
|
||||
|
||||
AudioClip::iterator AudioClip::end() const {
|
||||
return SampleIterator(*this, size());
|
||||
return SampleIterator(*this, size());
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioClip> operator|(std::unique_ptr<AudioClip> clip, const AudioEffect& effect) {
|
||||
return effect(std::move(clip));
|
||||
return effect(std::move(clip));
|
||||
}
|
||||
|
||||
SampleIterator::SampleIterator() :
|
||||
sampleIndex(0)
|
||||
sampleIndex(0)
|
||||
{}
|
||||
|
||||
SampleIterator::SampleIterator(const AudioClip& audioClip, size_type sampleIndex) :
|
||||
sampleReader([&audioClip] { return audioClip.createSampleReader(); }),
|
||||
sampleIndex(sampleIndex)
|
||||
sampleReader([&audioClip] { return audioClip.createSampleReader(); }),
|
||||
sampleIndex(sampleIndex)
|
||||
{}
|
||||
|
|
|
@ -10,22 +10,22 @@ class SampleIterator;
|
|||
|
||||
class AudioClip {
|
||||
public:
|
||||
using value_type = float;
|
||||
using size_type = int64_t;
|
||||
using difference_type = int64_t;
|
||||
using iterator = SampleIterator;
|
||||
using SampleReader = std::function<value_type(size_type)>;
|
||||
using value_type = float;
|
||||
using size_type = int64_t;
|
||||
using difference_type = int64_t;
|
||||
using iterator = SampleIterator;
|
||||
using SampleReader = std::function<value_type(size_type)>;
|
||||
|
||||
virtual ~AudioClip() {}
|
||||
virtual std::unique_ptr<AudioClip> clone() const = 0;
|
||||
virtual int getSampleRate() const = 0;
|
||||
virtual size_type size() const = 0;
|
||||
TimeRange getTruncatedRange() const;
|
||||
SampleReader createSampleReader() const;
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
virtual ~AudioClip() {}
|
||||
virtual std::unique_ptr<AudioClip> clone() const = 0;
|
||||
virtual int getSampleRate() const = 0;
|
||||
virtual size_type size() const = 0;
|
||||
TimeRange getTruncatedRange() const;
|
||||
SampleReader createSampleReader() const;
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
private:
|
||||
virtual SampleReader createUnsafeSampleReader() const = 0;
|
||||
virtual SampleReader createUnsafeSampleReader() const = 0;
|
||||
};
|
||||
|
||||
using AudioEffect = std::function<std::unique_ptr<AudioClip>(std::unique_ptr<AudioClip>)>;
|
||||
|
@ -36,107 +36,107 @@ using SampleReader = AudioClip::SampleReader;
|
|||
|
||||
class SampleIterator {
|
||||
public:
|
||||
using value_type = AudioClip::value_type;
|
||||
using size_type = AudioClip::size_type;
|
||||
using difference_type = AudioClip::difference_type;
|
||||
using value_type = AudioClip::value_type;
|
||||
using size_type = AudioClip::size_type;
|
||||
using difference_type = AudioClip::difference_type;
|
||||
|
||||
SampleIterator();
|
||||
SampleIterator();
|
||||
|
||||
size_type getSampleIndex() const;
|
||||
void seek(size_type sampleIndex);
|
||||
value_type operator*() const;
|
||||
value_type operator[](difference_type n) const;
|
||||
size_type getSampleIndex() const;
|
||||
void seek(size_type sampleIndex);
|
||||
value_type operator*() const;
|
||||
value_type operator[](difference_type n) const;
|
||||
|
||||
private:
|
||||
friend AudioClip;
|
||||
SampleIterator(const AudioClip& audioClip, size_type sampleIndex);
|
||||
friend AudioClip;
|
||||
SampleIterator(const AudioClip& audioClip, size_type sampleIndex);
|
||||
|
||||
Lazy<SampleReader> sampleReader;
|
||||
size_type sampleIndex;
|
||||
Lazy<SampleReader> sampleReader;
|
||||
size_type sampleIndex;
|
||||
};
|
||||
|
||||
inline SampleIterator::size_type SampleIterator::getSampleIndex() const {
|
||||
return sampleIndex;
|
||||
return sampleIndex;
|
||||
}
|
||||
|
||||
inline void SampleIterator::seek(size_type sampleIndex) {
|
||||
this->sampleIndex = sampleIndex;
|
||||
this->sampleIndex = sampleIndex;
|
||||
}
|
||||
|
||||
inline SampleIterator::value_type SampleIterator::operator*() const {
|
||||
return (*sampleReader)(sampleIndex);
|
||||
return (*sampleReader)(sampleIndex);
|
||||
}
|
||||
|
||||
inline SampleIterator::value_type SampleIterator::operator[](difference_type n) const {
|
||||
return (*sampleReader)(sampleIndex + n);
|
||||
return (*sampleReader)(sampleIndex + n);
|
||||
}
|
||||
|
||||
inline bool operator==(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() == rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() == rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator!=(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() != rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() != rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator<(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() < rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() < rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator>(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() > rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() > rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator<=(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() <= rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() <= rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline bool operator>=(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() >= rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() >= rhs.getSampleIndex();
|
||||
}
|
||||
|
||||
inline SampleIterator& operator+=(SampleIterator& it, SampleIterator::difference_type n) {
|
||||
it.seek(it.getSampleIndex() + n);
|
||||
return it;
|
||||
it.seek(it.getSampleIndex() + n);
|
||||
return it;
|
||||
}
|
||||
|
||||
inline SampleIterator& operator-=(SampleIterator& it, SampleIterator::difference_type n) {
|
||||
it.seek(it.getSampleIndex() - n);
|
||||
return it;
|
||||
it.seek(it.getSampleIndex() - n);
|
||||
return it;
|
||||
}
|
||||
|
||||
inline SampleIterator& operator++(SampleIterator& it) {
|
||||
return operator+=(it, 1);
|
||||
return operator+=(it, 1);
|
||||
}
|
||||
|
||||
inline SampleIterator operator++(SampleIterator& it, int) {
|
||||
SampleIterator tmp(it);
|
||||
operator++(it);
|
||||
return tmp;
|
||||
SampleIterator tmp(it);
|
||||
operator++(it);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
inline SampleIterator& operator--(SampleIterator& it) {
|
||||
return operator-=(it, 1);
|
||||
return operator-=(it, 1);
|
||||
}
|
||||
|
||||
inline SampleIterator operator--(SampleIterator& it, int) {
|
||||
SampleIterator tmp(it);
|
||||
operator--(it);
|
||||
return tmp;
|
||||
SampleIterator tmp(it);
|
||||
operator--(it);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
inline SampleIterator operator+(const SampleIterator& it, SampleIterator::difference_type n) {
|
||||
SampleIterator result(it);
|
||||
result += n;
|
||||
return result;
|
||||
SampleIterator result(it);
|
||||
result += n;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline SampleIterator operator-(const SampleIterator& it, SampleIterator::difference_type n) {
|
||||
SampleIterator result(it);
|
||||
result -= n;
|
||||
return result;
|
||||
SampleIterator result(it);
|
||||
result -= n;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline SampleIterator::difference_type operator-(const SampleIterator& lhs, const SampleIterator& rhs) {
|
||||
return lhs.getSampleIndex() - rhs.getSampleIndex();
|
||||
return lhs.getSampleIndex() - rhs.getSampleIndex();
|
||||
}
|
||||
|
|
|
@ -4,27 +4,27 @@ using std::unique_ptr;
|
|||
using std::make_unique;
|
||||
|
||||
AudioSegment::AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range) :
|
||||
inputClip(std::move(inputClip)),
|
||||
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
|
||||
sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100)
|
||||
inputClip(std::move(inputClip)),
|
||||
sampleOffset(static_cast<int64_t>(range.getStart().count()) * this->inputClip->getSampleRate() / 100),
|
||||
sampleCount(static_cast<int64_t>(range.getDuration().count()) * this->inputClip->getSampleRate() / 100)
|
||||
{
|
||||
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
|
||||
throw std::invalid_argument("Segment extends beyond input clip.");
|
||||
}
|
||||
if (sampleOffset < 0 || sampleOffset + sampleCount > this->inputClip->size()) {
|
||||
throw std::invalid_argument("Segment extends beyond input clip.");
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<AudioClip> AudioSegment::clone() const {
|
||||
return make_unique<AudioSegment>(*this);
|
||||
return make_unique<AudioSegment>(*this);
|
||||
}
|
||||
|
||||
SampleReader AudioSegment::createUnsafeSampleReader() const {
|
||||
return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) {
|
||||
return read(index + sampleOffset);
|
||||
};
|
||||
return [read = inputClip->createSampleReader(), sampleOffset = sampleOffset](size_type index) {
|
||||
return read(index + sampleOffset);
|
||||
};
|
||||
}
|
||||
|
||||
AudioEffect segment(const TimeRange& range) {
|
||||
return [range](unique_ptr<AudioClip> inputClip) {
|
||||
return make_unique<AudioSegment>(std::move(inputClip), range);
|
||||
};
|
||||
return [range](unique_ptr<AudioClip> inputClip) {
|
||||
return make_unique<AudioSegment>(std::move(inputClip), range);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -4,24 +4,24 @@
|
|||
|
||||
class AudioSegment : public AudioClip {
|
||||
public:
|
||||
AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
AudioSegment(std::unique_ptr<AudioClip> inputClip, const TimeRange& range);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
|
||||
private:
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
size_type sampleOffset, sampleCount;
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
size_type sampleOffset, sampleCount;
|
||||
};
|
||||
|
||||
inline int AudioSegment::getSampleRate() const {
|
||||
return inputClip->getSampleRate();
|
||||
return inputClip->getSampleRate();
|
||||
}
|
||||
|
||||
inline AudioClip::size_type AudioSegment::size() const {
|
||||
return sampleCount;
|
||||
return sampleCount;
|
||||
}
|
||||
|
||||
AudioEffect segment(const TimeRange& range);
|
||||
|
|
|
@ -5,65 +5,65 @@ using std::unique_ptr;
|
|||
using std::make_unique;
|
||||
|
||||
DcOffset::DcOffset(unique_ptr<AudioClip> inputClip, float offset) :
|
||||
inputClip(std::move(inputClip)),
|
||||
offset(offset),
|
||||
factor(1 / (1 + std::abs(offset)))
|
||||
inputClip(std::move(inputClip)),
|
||||
offset(offset),
|
||||
factor(1 / (1 + std::abs(offset)))
|
||||
{}
|
||||
|
||||
unique_ptr<AudioClip> DcOffset::clone() const {
|
||||
return make_unique<DcOffset>(*this);
|
||||
return make_unique<DcOffset>(*this);
|
||||
}
|
||||
|
||||
SampleReader DcOffset::createUnsafeSampleReader() const {
|
||||
return [
|
||||
read = inputClip->createSampleReader(),
|
||||
factor = factor,
|
||||
offset = offset
|
||||
](size_type index) {
|
||||
const float sample = read(index);
|
||||
return sample * factor + offset;
|
||||
};
|
||||
return [
|
||||
read = inputClip->createSampleReader(),
|
||||
factor = factor,
|
||||
offset = offset
|
||||
](size_type index) {
|
||||
const float sample = read(index);
|
||||
return sample * factor + offset;
|
||||
};
|
||||
}
|
||||
|
||||
float getDcOffset(const AudioClip& audioClip) {
|
||||
int flatMeanSampleCount, fadingMeanSampleCount;
|
||||
const int sampleRate = audioClip.getSampleRate();
|
||||
if (audioClip.size() > 4 * sampleRate) {
|
||||
// Long audio file. Average over the first 3 seconds, then fade out over the 4th.
|
||||
flatMeanSampleCount = 3 * sampleRate;
|
||||
fadingMeanSampleCount = 1 * sampleRate;
|
||||
} else {
|
||||
// Short audio file. Average over the entire duration.
|
||||
flatMeanSampleCount = static_cast<int>(audioClip.size());
|
||||
fadingMeanSampleCount = 0;
|
||||
}
|
||||
int flatMeanSampleCount, fadingMeanSampleCount;
|
||||
const int sampleRate = audioClip.getSampleRate();
|
||||
if (audioClip.size() > 4 * sampleRate) {
|
||||
// Long audio file. Average over the first 3 seconds, then fade out over the 4th.
|
||||
flatMeanSampleCount = 3 * sampleRate;
|
||||
fadingMeanSampleCount = 1 * sampleRate;
|
||||
} else {
|
||||
// Short audio file. Average over the entire duration.
|
||||
flatMeanSampleCount = static_cast<int>(audioClip.size());
|
||||
fadingMeanSampleCount = 0;
|
||||
}
|
||||
|
||||
const auto read = audioClip.createSampleReader();
|
||||
double sum = 0;
|
||||
for (int i = 0; i < flatMeanSampleCount; ++i) {
|
||||
sum += read(i);
|
||||
}
|
||||
for (int i = 0; i < fadingMeanSampleCount; ++i) {
|
||||
const double weight =
|
||||
static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount;
|
||||
sum += read(flatMeanSampleCount + i) * weight;
|
||||
}
|
||||
const auto read = audioClip.createSampleReader();
|
||||
double sum = 0;
|
||||
for (int i = 0; i < flatMeanSampleCount; ++i) {
|
||||
sum += read(i);
|
||||
}
|
||||
for (int i = 0; i < fadingMeanSampleCount; ++i) {
|
||||
const double weight =
|
||||
static_cast<double>(fadingMeanSampleCount - i) / fadingMeanSampleCount;
|
||||
sum += read(flatMeanSampleCount + i) * weight;
|
||||
}
|
||||
|
||||
const double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0;
|
||||
const double offset = sum / totalWeight;
|
||||
return static_cast<float>(offset);
|
||||
const double totalWeight = flatMeanSampleCount + (fadingMeanSampleCount + 1) / 2.0;
|
||||
const double offset = sum / totalWeight;
|
||||
return static_cast<float>(offset);
|
||||
}
|
||||
|
||||
AudioEffect addDcOffset(float offset, float epsilon) {
|
||||
return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> {
|
||||
if (std::abs(offset) < epsilon) return inputClip;
|
||||
return make_unique<DcOffset>(std::move(inputClip), offset);
|
||||
};
|
||||
return [offset, epsilon](unique_ptr<AudioClip> inputClip) -> unique_ptr<AudioClip> {
|
||||
if (std::abs(offset) < epsilon) return inputClip;
|
||||
return make_unique<DcOffset>(std::move(inputClip), offset);
|
||||
};
|
||||
}
|
||||
|
||||
AudioEffect removeDcOffset(float epsilon) {
|
||||
return [epsilon](unique_ptr<AudioClip> inputClip) {
|
||||
const float offset = getDcOffset(*inputClip);
|
||||
return std::move(inputClip) | addDcOffset(-offset, epsilon);
|
||||
};
|
||||
return [epsilon](unique_ptr<AudioClip> inputClip) {
|
||||
const float offset = getDcOffset(*inputClip);
|
||||
return std::move(inputClip) | addDcOffset(-offset, epsilon);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -6,24 +6,24 @@
|
|||
// to prevent clipping
|
||||
class DcOffset : public AudioClip {
|
||||
public:
|
||||
DcOffset(std::unique_ptr<AudioClip> inputClip, float offset);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
DcOffset(std::unique_ptr<AudioClip> inputClip, float offset);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
private:
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
float offset;
|
||||
float factor;
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
float offset;
|
||||
float factor;
|
||||
};
|
||||
|
||||
inline int DcOffset::getSampleRate() const {
|
||||
return inputClip->getSampleRate();
|
||||
return inputClip->getSampleRate();
|
||||
}
|
||||
|
||||
inline AudioClip::size_type DcOffset::size() const {
|
||||
return inputClip->size();
|
||||
return inputClip->size();
|
||||
}
|
||||
|
||||
float getDcOffset(const AudioClip& audioClip);
|
||||
|
|
|
@ -13,154 +13,154 @@ using std::ifstream;
|
|||
using std::ios_base;
|
||||
|
||||
std::string vorbisErrorToString(int64_t errorCode) {
|
||||
switch (errorCode) {
|
||||
case OV_EREAD:
|
||||
return "Read error while fetching compressed data for decode.";
|
||||
case OV_EFAULT:
|
||||
return "Internal logic fault; indicates a bug or heap/stack corruption.";
|
||||
case OV_EIMPL:
|
||||
return "Feature not implemented";
|
||||
case OV_EINVAL:
|
||||
return "Either an invalid argument, or incompletely initialized argument passed to a call.";
|
||||
case OV_ENOTVORBIS:
|
||||
return "The given file/data was not recognized as Ogg Vorbis data.";
|
||||
case OV_EBADHEADER:
|
||||
return "The file/data is apparently an Ogg Vorbis stream, but contains a corrupted or undecipherable header.";
|
||||
case OV_EVERSION:
|
||||
return "The bitstream format revision of the given Vorbis stream is not supported.";
|
||||
case OV_ENOTAUDIO:
|
||||
return "Packet is not an audio packet.";
|
||||
case OV_EBADPACKET:
|
||||
return "Error in packet.";
|
||||
case OV_EBADLINK:
|
||||
return "The given link exists in the Vorbis data stream, but is not decipherable due to garbage or corruption.";
|
||||
case OV_ENOSEEK:
|
||||
return "The given stream is not seekable.";
|
||||
default:
|
||||
return "An unexpected Vorbis error occurred.";
|
||||
}
|
||||
switch (errorCode) {
|
||||
case OV_EREAD:
|
||||
return "Read error while fetching compressed data for decode.";
|
||||
case OV_EFAULT:
|
||||
return "Internal logic fault; indicates a bug or heap/stack corruption.";
|
||||
case OV_EIMPL:
|
||||
return "Feature not implemented";
|
||||
case OV_EINVAL:
|
||||
return "Either an invalid argument, or incompletely initialized argument passed to a call.";
|
||||
case OV_ENOTVORBIS:
|
||||
return "The given file/data was not recognized as Ogg Vorbis data.";
|
||||
case OV_EBADHEADER:
|
||||
return "The file/data is apparently an Ogg Vorbis stream, but contains a corrupted or undecipherable header.";
|
||||
case OV_EVERSION:
|
||||
return "The bitstream format revision of the given Vorbis stream is not supported.";
|
||||
case OV_ENOTAUDIO:
|
||||
return "Packet is not an audio packet.";
|
||||
case OV_EBADPACKET:
|
||||
return "Error in packet.";
|
||||
case OV_EBADLINK:
|
||||
return "The given link exists in the Vorbis data stream, but is not decipherable due to garbage or corruption.";
|
||||
case OV_ENOSEEK:
|
||||
return "The given stream is not seekable.";
|
||||
default:
|
||||
return "An unexpected Vorbis error occurred.";
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T throwOnError(T code) {
|
||||
// OV_HOLE, though technically an error code, is only informational
|
||||
const bool error = code < 0 && code != OV_HOLE;
|
||||
if (error) {
|
||||
const std::string message =
|
||||
fmt::format("{} (Vorbis error {})", vorbisErrorToString(code), code);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
return code;
|
||||
// OV_HOLE, though technically an error code, is only informational
|
||||
const bool error = code < 0 && code != OV_HOLE;
|
||||
if (error) {
|
||||
const std::string message =
|
||||
fmt::format("{} (Vorbis error {})", vorbisErrorToString(code), code);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
size_t readCallback(void* buffer, size_t elementSize, size_t elementCount, void* dataSource) {
|
||||
assert(elementSize == 1);
|
||||
assert(elementSize == 1);
|
||||
|
||||
ifstream& stream = *static_cast<ifstream*>(dataSource);
|
||||
stream.read(static_cast<char*>(buffer), elementCount);
|
||||
const std::streamsize bytesRead = stream.gcount();
|
||||
stream.clear(); // In case we read past EOF
|
||||
return static_cast<size_t>(bytesRead);
|
||||
ifstream& stream = *static_cast<ifstream*>(dataSource);
|
||||
stream.read(static_cast<char*>(buffer), elementCount);
|
||||
const std::streamsize bytesRead = stream.gcount();
|
||||
stream.clear(); // In case we read past EOF
|
||||
return static_cast<size_t>(bytesRead);
|
||||
}
|
||||
|
||||
int seekCallback(void* dataSource, ogg_int64_t offset, int origin) {
|
||||
static const vector<ios_base::seekdir> seekDirections {
|
||||
ios_base::beg, ios_base::cur, ios_base::end
|
||||
};
|
||||
static const vector<ios_base::seekdir> seekDirections {
|
||||
ios_base::beg, ios_base::cur, ios_base::end
|
||||
};
|
||||
|
||||
ifstream& stream = *static_cast<ifstream*>(dataSource);
|
||||
stream.seekg(offset, seekDirections.at(origin));
|
||||
stream.clear(); // In case we sought to EOF
|
||||
return 0;
|
||||
ifstream& stream = *static_cast<ifstream*>(dataSource);
|
||||
stream.seekg(offset, seekDirections.at(origin));
|
||||
stream.clear(); // In case we sought to EOF
|
||||
return 0;
|
||||
}
|
||||
|
||||
long tellCallback(void* dataSource) {
|
||||
ifstream& stream = *static_cast<ifstream*>(dataSource);
|
||||
const auto position = stream.tellg();
|
||||
assert(position >= 0);
|
||||
return static_cast<long>(position);
|
||||
ifstream& stream = *static_cast<ifstream*>(dataSource);
|
||||
const auto position = stream.tellg();
|
||||
assert(position >= 0);
|
||||
return static_cast<long>(position);
|
||||
}
|
||||
|
||||
// RAII wrapper around OggVorbis_File
|
||||
class OggVorbisFile final {
|
||||
public:
|
||||
OggVorbisFile(const path& filePath);
|
||||
OggVorbisFile(const path& filePath);
|
||||
|
||||
OggVorbisFile(const OggVorbisFile&) = delete;
|
||||
OggVorbisFile& operator=(const OggVorbisFile&) = delete;
|
||||
OggVorbisFile(const OggVorbisFile&) = delete;
|
||||
OggVorbisFile& operator=(const OggVorbisFile&) = delete;
|
||||
|
||||
OggVorbis_File* get() {
|
||||
return &oggVorbisHandle;
|
||||
}
|
||||
OggVorbis_File* get() {
|
||||
return &oggVorbisHandle;
|
||||
}
|
||||
|
||||
~OggVorbisFile() {
|
||||
ov_clear(&oggVorbisHandle);
|
||||
}
|
||||
~OggVorbisFile() {
|
||||
ov_clear(&oggVorbisHandle);
|
||||
}
|
||||
|
||||
private:
|
||||
OggVorbis_File oggVorbisHandle;
|
||||
ifstream stream;
|
||||
OggVorbis_File oggVorbisHandle;
|
||||
ifstream stream;
|
||||
};
|
||||
|
||||
OggVorbisFile::OggVorbisFile(const path& filePath) :
|
||||
oggVorbisHandle(),
|
||||
stream(openFile(filePath))
|
||||
oggVorbisHandle(),
|
||||
stream(openFile(filePath))
|
||||
{
|
||||
// Throw only on badbit, not on failbit.
|
||||
// Ogg Vorbis expects read operations past the end of the file to
|
||||
// succeed, not to throw.
|
||||
stream.exceptions(ifstream::badbit);
|
||||
// Throw only on badbit, not on failbit.
|
||||
// Ogg Vorbis expects read operations past the end of the file to
|
||||
// succeed, not to throw.
|
||||
stream.exceptions(ifstream::badbit);
|
||||
|
||||
// Ogg Vorbis normally uses the `FILE` API from the C standard library.
|
||||
// This doesn't handle Unicode paths on Windows.
|
||||
// Use wrapper functions around `ifstream` instead.
|
||||
const ov_callbacks callbacks { readCallback, seekCallback, nullptr, tellCallback };
|
||||
throwOnError(ov_open_callbacks(&stream, &oggVorbisHandle, nullptr, 0, callbacks));
|
||||
// Ogg Vorbis normally uses the `FILE` API from the C standard library.
|
||||
// This doesn't handle Unicode paths on Windows.
|
||||
// Use wrapper functions around `ifstream` instead.
|
||||
const ov_callbacks callbacks { readCallback, seekCallback, nullptr, tellCallback };
|
||||
throwOnError(ov_open_callbacks(&stream, &oggVorbisHandle, nullptr, 0, callbacks));
|
||||
}
|
||||
|
||||
OggVorbisFileReader::OggVorbisFileReader(const path& filePath) :
|
||||
filePath(filePath)
|
||||
filePath(filePath)
|
||||
{
|
||||
OggVorbisFile file(filePath);
|
||||
|
||||
vorbis_info* vorbisInfo = ov_info(file.get(), -1);
|
||||
sampleRate = vorbisInfo->rate;
|
||||
channelCount = vorbisInfo->channels;
|
||||
|
||||
sampleCount = throwOnError(ov_pcm_total(file.get(), -1));
|
||||
OggVorbisFile file(filePath);
|
||||
|
||||
vorbis_info* vorbisInfo = ov_info(file.get(), -1);
|
||||
sampleRate = vorbisInfo->rate;
|
||||
channelCount = vorbisInfo->channels;
|
||||
|
||||
sampleCount = throwOnError(ov_pcm_total(file.get(), -1));
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioClip> OggVorbisFileReader::clone() const {
|
||||
return std::make_unique<OggVorbisFileReader>(*this);
|
||||
return std::make_unique<OggVorbisFileReader>(*this);
|
||||
}
|
||||
|
||||
SampleReader OggVorbisFileReader::createUnsafeSampleReader() const {
|
||||
return [
|
||||
channelCount = channelCount,
|
||||
file = make_shared<OggVorbisFile>(filePath),
|
||||
buffer = static_cast<value_type**>(nullptr),
|
||||
bufferStart = size_type(0),
|
||||
bufferSize = size_type(0)
|
||||
](size_type index) mutable {
|
||||
if (index < bufferStart || index >= bufferStart + bufferSize) {
|
||||
// Seek
|
||||
throwOnError(ov_pcm_seek(file->get(), index));
|
||||
return [
|
||||
channelCount = channelCount,
|
||||
file = make_shared<OggVorbisFile>(filePath),
|
||||
buffer = static_cast<value_type**>(nullptr),
|
||||
bufferStart = size_type(0),
|
||||
bufferSize = size_type(0)
|
||||
](size_type index) mutable {
|
||||
if (index < bufferStart || index >= bufferStart + bufferSize) {
|
||||
// Seek
|
||||
throwOnError(ov_pcm_seek(file->get(), index));
|
||||
|
||||
// Read a block of samples
|
||||
constexpr int maxSize = 1024;
|
||||
bufferStart = index;
|
||||
bufferSize = throwOnError(ov_read_float(file->get(), &buffer, maxSize, nullptr));
|
||||
if (bufferSize == 0) {
|
||||
throw std::runtime_error("Unexpected end of file.");
|
||||
}
|
||||
}
|
||||
// Read a block of samples
|
||||
constexpr int maxSize = 1024;
|
||||
bufferStart = index;
|
||||
bufferSize = throwOnError(ov_read_float(file->get(), &buffer, maxSize, nullptr));
|
||||
if (bufferSize == 0) {
|
||||
throw std::runtime_error("Unexpected end of file.");
|
||||
}
|
||||
}
|
||||
|
||||
// Downmix channels
|
||||
const size_type bufferIndex = index - bufferStart;
|
||||
value_type sum = 0.0f;
|
||||
for (int channel = 0; channel < channelCount; ++channel) {
|
||||
sum += buffer[channel][bufferIndex];
|
||||
}
|
||||
return sum / channelCount;
|
||||
};
|
||||
// Downmix channels
|
||||
const size_type bufferIndex = index - bufferStart;
|
||||
value_type sum = 0.0f;
|
||||
for (int channel = 0; channel < channelCount; ++channel) {
|
||||
sum += buffer[channel][bufferIndex];
|
||||
}
|
||||
return sum / channelCount;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -5,16 +5,16 @@
|
|||
|
||||
class OggVorbisFileReader : public AudioClip {
|
||||
public:
|
||||
OggVorbisFileReader(const std::filesystem::path& filePath);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override { return sampleRate; }
|
||||
size_type size() const override { return sampleCount; }
|
||||
OggVorbisFileReader(const std::filesystem::path& filePath);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override { return sampleRate; }
|
||||
size_type size() const override { return sampleCount; }
|
||||
|
||||
private:
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::filesystem::path filePath;
|
||||
int sampleRate;
|
||||
int channelCount;
|
||||
size_type sampleCount;
|
||||
std::filesystem::path filePath;
|
||||
int sampleRate;
|
||||
int channelCount;
|
||||
size_type sampleCount;
|
||||
};
|
||||
|
|
|
@ -8,63 +8,63 @@ using std::unique_ptr;
|
|||
using std::make_unique;
|
||||
|
||||
SampleRateConverter::SampleRateConverter(unique_ptr<AudioClip> inputClip, int outputSampleRate) :
|
||||
inputClip(std::move(inputClip)),
|
||||
downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate),
|
||||
outputSampleRate(outputSampleRate),
|
||||
outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor))
|
||||
inputClip(std::move(inputClip)),
|
||||
downscalingFactor(static_cast<double>(this->inputClip->getSampleRate()) / outputSampleRate),
|
||||
outputSampleRate(outputSampleRate),
|
||||
outputSampleCount(std::lround(this->inputClip->size() / downscalingFactor))
|
||||
{
|
||||
if (outputSampleRate <= 0) {
|
||||
throw invalid_argument("Sample rate must be positive.");
|
||||
}
|
||||
if (this->inputClip->getSampleRate() < outputSampleRate) {
|
||||
throw invalid_argument(fmt::format(
|
||||
"Upsampling not supported. Input sample rate must not be below {}Hz.",
|
||||
outputSampleRate
|
||||
));
|
||||
}
|
||||
if (outputSampleRate <= 0) {
|
||||
throw invalid_argument("Sample rate must be positive.");
|
||||
}
|
||||
if (this->inputClip->getSampleRate() < outputSampleRate) {
|
||||
throw invalid_argument(fmt::format(
|
||||
"Upsampling not supported. Input sample rate must not be below {}Hz.",
|
||||
outputSampleRate
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<AudioClip> SampleRateConverter::clone() const {
|
||||
return make_unique<SampleRateConverter>(*this);
|
||||
return make_unique<SampleRateConverter>(*this);
|
||||
}
|
||||
|
||||
float mean(double inputStart, double inputEnd, const SampleReader& read) {
|
||||
// Calculate weighted sum...
|
||||
double sum = 0;
|
||||
// Calculate weighted sum...
|
||||
double sum = 0;
|
||||
|
||||
// ... first sample (weight <= 1)
|
||||
const int64_t startIndex = static_cast<int64_t>(inputStart);
|
||||
sum += read(startIndex) * ((startIndex + 1) - inputStart);
|
||||
// ... first sample (weight <= 1)
|
||||
const int64_t startIndex = static_cast<int64_t>(inputStart);
|
||||
sum += read(startIndex) * ((startIndex + 1) - inputStart);
|
||||
|
||||
// ... middle samples (weight 1 each)
|
||||
const int64_t endIndex = static_cast<int64_t>(inputEnd);
|
||||
for (int64_t index = startIndex + 1; index < endIndex; ++index) {
|
||||
sum += read(index);
|
||||
}
|
||||
// ... middle samples (weight 1 each)
|
||||
const int64_t endIndex = static_cast<int64_t>(inputEnd);
|
||||
for (int64_t index = startIndex + 1; index < endIndex; ++index) {
|
||||
sum += read(index);
|
||||
}
|
||||
|
||||
// ... last sample (weight < 1)
|
||||
if (endIndex < inputEnd) {
|
||||
sum += read(endIndex) * (inputEnd - endIndex);
|
||||
}
|
||||
// ... last sample (weight < 1)
|
||||
if (endIndex < inputEnd) {
|
||||
sum += read(endIndex) * (inputEnd - endIndex);
|
||||
}
|
||||
|
||||
return static_cast<float>(sum / (inputEnd - inputStart));
|
||||
return static_cast<float>(sum / (inputEnd - inputStart));
|
||||
}
|
||||
|
||||
SampleReader SampleRateConverter::createUnsafeSampleReader() const {
|
||||
return [
|
||||
read = inputClip->createSampleReader(),
|
||||
downscalingFactor = downscalingFactor,
|
||||
size = inputClip->size()
|
||||
](size_type index) {
|
||||
const double inputStart = index * downscalingFactor;
|
||||
const double inputEnd =
|
||||
std::min((index + 1) * downscalingFactor, static_cast<double>(size));
|
||||
return mean(inputStart, inputEnd, read);
|
||||
};
|
||||
return [
|
||||
read = inputClip->createSampleReader(),
|
||||
downscalingFactor = downscalingFactor,
|
||||
size = inputClip->size()
|
||||
](size_type index) {
|
||||
const double inputStart = index * downscalingFactor;
|
||||
const double inputEnd =
|
||||
std::min((index + 1) * downscalingFactor, static_cast<double>(size));
|
||||
return mean(inputStart, inputEnd, read);
|
||||
};
|
||||
}
|
||||
|
||||
AudioEffect resample(int sampleRate) {
|
||||
return [sampleRate](unique_ptr<AudioClip> inputClip) {
|
||||
return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate);
|
||||
};
|
||||
return [sampleRate](unique_ptr<AudioClip> inputClip) {
|
||||
return make_unique<SampleRateConverter>(std::move(inputClip), sampleRate);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -5,25 +5,25 @@
|
|||
|
||||
class SampleRateConverter : public AudioClip {
|
||||
public:
|
||||
SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
SampleRateConverter(std::unique_ptr<AudioClip> inputClip, int outputSampleRate);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
private:
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
double downscalingFactor; // input sample rate / output sample rate
|
||||
int outputSampleRate;
|
||||
int64_t outputSampleCount;
|
||||
std::shared_ptr<AudioClip> inputClip;
|
||||
double downscalingFactor; // input sample rate / output sample rate
|
||||
int outputSampleRate;
|
||||
int64_t outputSampleCount;
|
||||
};
|
||||
|
||||
AudioEffect resample(int sampleRate);
|
||||
|
||||
inline int SampleRateConverter::getSampleRate() const {
|
||||
return outputSampleRate;
|
||||
return outputSampleRate;
|
||||
}
|
||||
|
||||
inline AudioClip::size_type SampleRateConverter::size() const {
|
||||
return outputSampleCount;
|
||||
return outputSampleCount;
|
||||
}
|
||||
|
|
|
@ -20,478 +20,478 @@ using std::streamoff;
|
|||
|
||||
// Converts an int in the range min..max to a float in the range -1..1
|
||||
float toNormalizedFloat(int value, int min, int max) {
|
||||
const float fMin = static_cast<float>(min);
|
||||
const float fMax = static_cast<float>(max);
|
||||
const float fValue = static_cast<float>(value);
|
||||
return ((fValue - fMin) / (fMax - fMin) * 2) - 1;
|
||||
const float fMin = static_cast<float>(min);
|
||||
const float fMax = static_cast<float>(max);
|
||||
const float fValue = static_cast<float>(value);
|
||||
return ((fValue - fMin) / (fMax - fMin) * 2) - 1;
|
||||
}
|
||||
|
||||
streamoff roundUpToEven(streamoff i) {
|
||||
return (i + 1) & (~1);
|
||||
return (i + 1) & (~1);
|
||||
}
|
||||
|
||||
namespace Codec {
|
||||
constexpr int Pcm = 0x01;
|
||||
constexpr int Float = 0x03;
|
||||
constexpr int Extensible = 0xFFFE;
|
||||
constexpr int Pcm = 0x01;
|
||||
constexpr int Float = 0x03;
|
||||
constexpr int Extensible = 0xFFFE;
|
||||
};
|
||||
|
||||
string codecToString(int codec);
|
||||
|
||||
WaveFormatInfo getWaveFormatInfo(const path& filePath) {
|
||||
WaveFormatInfo formatInfo {};
|
||||
WaveFormatInfo formatInfo {};
|
||||
|
||||
auto file = openFile(filePath);
|
||||
auto file = openFile(filePath);
|
||||
|
||||
file.seekg(0, std::ios_base::end);
|
||||
const streamoff fileSize = file.tellg();
|
||||
file.seekg(0);
|
||||
file.seekg(0, std::ios_base::end);
|
||||
const streamoff fileSize = file.tellg();
|
||||
file.seekg(0);
|
||||
|
||||
auto remaining = [&](int byteCount) {
|
||||
const streamoff filePosition = file.tellg();
|
||||
return byteCount <= fileSize - filePosition;
|
||||
};
|
||||
auto remaining = [&](int byteCount) {
|
||||
const streamoff filePosition = file.tellg();
|
||||
return byteCount <= fileSize - filePosition;
|
||||
};
|
||||
|
||||
// Read header
|
||||
if (!remaining(10)) {
|
||||
throw runtime_error("WAVE file is corrupt. Header not found.");
|
||||
}
|
||||
const auto rootChunkId = read<uint32_t>(file);
|
||||
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
|
||||
throw runtime_error("Unknown file format. Only WAVE files are supported.");
|
||||
}
|
||||
read<uint32_t>(file); // Chunk size
|
||||
const uint32_t waveId = read<uint32_t>(file);
|
||||
if (waveId != fourcc('W', 'A', 'V', 'E')) {
|
||||
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
|
||||
}
|
||||
// Read header
|
||||
if (!remaining(10)) {
|
||||
throw runtime_error("WAVE file is corrupt. Header not found.");
|
||||
}
|
||||
const auto rootChunkId = read<uint32_t>(file);
|
||||
if (rootChunkId != fourcc('R', 'I', 'F', 'F')) {
|
||||
throw runtime_error("Unknown file format. Only WAVE files are supported.");
|
||||
}
|
||||
read<uint32_t>(file); // Chunk size
|
||||
const uint32_t waveId = read<uint32_t>(file);
|
||||
if (waveId != fourcc('W', 'A', 'V', 'E')) {
|
||||
throw runtime_error(format("File format is not WAVE, but {}.", fourccToString(waveId)));
|
||||
}
|
||||
|
||||
// Read chunks until we reach the data chunk
|
||||
bool processedFormatChunk = false;
|
||||
bool processedDataChunk = false;
|
||||
while ((!processedFormatChunk || !processedDataChunk) && remaining(8)) {
|
||||
const uint32_t chunkId = read<uint32_t>(file);
|
||||
const streamoff chunkSize = read<int32_t>(file);
|
||||
const streamoff chunkEnd = roundUpToEven(file.tellg() + chunkSize);
|
||||
switch (chunkId) {
|
||||
case fourcc('f', 'm', 't', ' '):
|
||||
{
|
||||
// Read relevant data
|
||||
uint16_t codec = read<uint16_t>(file);
|
||||
formatInfo.channelCount = read<uint16_t>(file);
|
||||
formatInfo.frameRate = read<int32_t>(file);
|
||||
read<uint32_t>(file); // Bytes per second
|
||||
const int bytesPerFrame = read<uint16_t>(file);
|
||||
const int bitsPerSampleOnDisk = read<uint16_t>(file);
|
||||
int bitsPerSample = bitsPerSampleOnDisk;
|
||||
if (chunkSize > 16) {
|
||||
const int extensionSize = read<uint16_t>(file);
|
||||
if (extensionSize >= 22) {
|
||||
// Read extension fields
|
||||
bitsPerSample = read<uint16_t>(file);
|
||||
read<uint32_t>(file); // Skip channel mask
|
||||
const uint16_t codecOverride = read<uint16_t>(file);
|
||||
if (codec == Codec::Extensible) {
|
||||
codec = codecOverride;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Read chunks until we reach the data chunk
|
||||
bool processedFormatChunk = false;
|
||||
bool processedDataChunk = false;
|
||||
while ((!processedFormatChunk || !processedDataChunk) && remaining(8)) {
|
||||
const uint32_t chunkId = read<uint32_t>(file);
|
||||
const streamoff chunkSize = read<int32_t>(file);
|
||||
const streamoff chunkEnd = roundUpToEven(file.tellg() + chunkSize);
|
||||
switch (chunkId) {
|
||||
case fourcc('f', 'm', 't', ' '):
|
||||
{
|
||||
// Read relevant data
|
||||
uint16_t codec = read<uint16_t>(file);
|
||||
formatInfo.channelCount = read<uint16_t>(file);
|
||||
formatInfo.frameRate = read<int32_t>(file);
|
||||
read<uint32_t>(file); // Bytes per second
|
||||
const int bytesPerFrame = read<uint16_t>(file);
|
||||
const int bitsPerSampleOnDisk = read<uint16_t>(file);
|
||||
int bitsPerSample = bitsPerSampleOnDisk;
|
||||
if (chunkSize > 16) {
|
||||
const int extensionSize = read<uint16_t>(file);
|
||||
if (extensionSize >= 22) {
|
||||
// Read extension fields
|
||||
bitsPerSample = read<uint16_t>(file);
|
||||
read<uint32_t>(file); // Skip channel mask
|
||||
const uint16_t codecOverride = read<uint16_t>(file);
|
||||
if (codec == Codec::Extensible) {
|
||||
codec = codecOverride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine sample format
|
||||
int bytesPerSample;
|
||||
switch (codec) {
|
||||
case Codec::Pcm:
|
||||
// Determine sample size.
|
||||
// According to the WAVE standard, sample sizes that are not multiples of 8
|
||||
// bits (e.g. 12 bits) can be treated like the next-larger byte size.
|
||||
if (bitsPerSample == 8) {
|
||||
formatInfo.sampleFormat = SampleFormat::UInt8;
|
||||
bytesPerSample = 1;
|
||||
} else if (bitsPerSample <= 16) {
|
||||
formatInfo.sampleFormat = SampleFormat::Int16;
|
||||
bytesPerSample = 2;
|
||||
} else if (bitsPerSample <= 24) {
|
||||
formatInfo.sampleFormat = SampleFormat::Int24;
|
||||
bytesPerSample = 3;
|
||||
} else if (bitsPerSample <= 32) {
|
||||
formatInfo.sampleFormat = SampleFormat::Int32;
|
||||
bytesPerSample = 4;
|
||||
} else {
|
||||
throw runtime_error(
|
||||
format("Unsupported sample format: {}-bit PCM.", bitsPerSample));
|
||||
}
|
||||
if (bytesPerSample != bytesPerFrame / formatInfo.channelCount) {
|
||||
throw runtime_error("Unsupported sample organization.");
|
||||
}
|
||||
break;
|
||||
case Codec::Float:
|
||||
if (bitsPerSample == 32) {
|
||||
formatInfo.sampleFormat = SampleFormat::Float32;
|
||||
bytesPerSample = 4;
|
||||
} else if (bitsPerSample == 64) {
|
||||
formatInfo.sampleFormat = SampleFormat::Float64;
|
||||
bytesPerSample = 8;
|
||||
} else {
|
||||
throw runtime_error(
|
||||
format("Unsupported sample format: {}-bit IEEE Float.", bitsPerSample)
|
||||
);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw runtime_error(format(
|
||||
"Unsupported audio codec: '{}'. Only uncompressed codecs ('{}' and '{}') are supported.",
|
||||
codecToString(codec), codecToString(Codec::Pcm), codecToString(Codec::Float)
|
||||
));
|
||||
}
|
||||
formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount;
|
||||
processedFormatChunk = true;
|
||||
break;
|
||||
}
|
||||
case fourcc('d', 'a', 't', 'a'):
|
||||
{
|
||||
formatInfo.dataOffset = file.tellg();
|
||||
formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame;
|
||||
processedDataChunk = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
// Ignore unknown chunk
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Determine sample format
|
||||
int bytesPerSample;
|
||||
switch (codec) {
|
||||
case Codec::Pcm:
|
||||
// Determine sample size.
|
||||
// According to the WAVE standard, sample sizes that are not multiples of 8
|
||||
// bits (e.g. 12 bits) can be treated like the next-larger byte size.
|
||||
if (bitsPerSample == 8) {
|
||||
formatInfo.sampleFormat = SampleFormat::UInt8;
|
||||
bytesPerSample = 1;
|
||||
} else if (bitsPerSample <= 16) {
|
||||
formatInfo.sampleFormat = SampleFormat::Int16;
|
||||
bytesPerSample = 2;
|
||||
} else if (bitsPerSample <= 24) {
|
||||
formatInfo.sampleFormat = SampleFormat::Int24;
|
||||
bytesPerSample = 3;
|
||||
} else if (bitsPerSample <= 32) {
|
||||
formatInfo.sampleFormat = SampleFormat::Int32;
|
||||
bytesPerSample = 4;
|
||||
} else {
|
||||
throw runtime_error(
|
||||
format("Unsupported sample format: {}-bit PCM.", bitsPerSample));
|
||||
}
|
||||
if (bytesPerSample != bytesPerFrame / formatInfo.channelCount) {
|
||||
throw runtime_error("Unsupported sample organization.");
|
||||
}
|
||||
break;
|
||||
case Codec::Float:
|
||||
if (bitsPerSample == 32) {
|
||||
formatInfo.sampleFormat = SampleFormat::Float32;
|
||||
bytesPerSample = 4;
|
||||
} else if (bitsPerSample == 64) {
|
||||
formatInfo.sampleFormat = SampleFormat::Float64;
|
||||
bytesPerSample = 8;
|
||||
} else {
|
||||
throw runtime_error(
|
||||
format("Unsupported sample format: {}-bit IEEE Float.", bitsPerSample)
|
||||
);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw runtime_error(format(
|
||||
"Unsupported audio codec: '{}'. Only uncompressed codecs ('{}' and '{}') are supported.",
|
||||
codecToString(codec), codecToString(Codec::Pcm), codecToString(Codec::Float)
|
||||
));
|
||||
}
|
||||
formatInfo.bytesPerFrame = bytesPerSample * formatInfo.channelCount;
|
||||
processedFormatChunk = true;
|
||||
break;
|
||||
}
|
||||
case fourcc('d', 'a', 't', 'a'):
|
||||
{
|
||||
formatInfo.dataOffset = file.tellg();
|
||||
formatInfo.frameCount = chunkSize / formatInfo.bytesPerFrame;
|
||||
processedDataChunk = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
// Ignore unknown chunk
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Seek to end of chunk
|
||||
file.seekg(chunkEnd, std::ios_base::beg);
|
||||
}
|
||||
// Seek to end of chunk
|
||||
file.seekg(chunkEnd, std::ios_base::beg);
|
||||
}
|
||||
|
||||
if (!processedFormatChunk) throw runtime_error("Missing format chunk.");
|
||||
if (!processedDataChunk) throw runtime_error("Missing data chunk.");
|
||||
if (!processedFormatChunk) throw runtime_error("Missing format chunk.");
|
||||
if (!processedDataChunk) throw runtime_error("Missing data chunk.");
|
||||
|
||||
return formatInfo;
|
||||
return formatInfo;
|
||||
}
|
||||
|
||||
WaveFileReader::WaveFileReader(const path& filePath) :
|
||||
filePath(filePath),
|
||||
formatInfo(getWaveFormatInfo(filePath)) {}
|
||||
filePath(filePath),
|
||||
formatInfo(getWaveFormatInfo(filePath)) {}
|
||||
|
||||
unique_ptr<AudioClip> WaveFileReader::clone() const {
|
||||
return make_unique<WaveFileReader>(*this);
|
||||
return make_unique<WaveFileReader>(*this);
|
||||
}
|
||||
|
||||
inline AudioClip::value_type readSample(
|
||||
std::ifstream& file,
|
||||
SampleFormat sampleFormat,
|
||||
int channelCount
|
||||
std::ifstream& file,
|
||||
SampleFormat sampleFormat,
|
||||
int channelCount
|
||||
) {
|
||||
float sum = 0;
|
||||
for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) {
|
||||
switch (sampleFormat) {
|
||||
case SampleFormat::UInt8:
|
||||
{
|
||||
const uint8_t raw = read<uint8_t>(file);
|
||||
sum += toNormalizedFloat(raw, 0, UINT8_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int16:
|
||||
{
|
||||
const int16_t raw = read<int16_t>(file);
|
||||
sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int24:
|
||||
{
|
||||
int raw = read<int, 24>(file);
|
||||
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
|
||||
sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int32:
|
||||
{
|
||||
const int32_t raw = read<int32_t>(file);
|
||||
sum += toNormalizedFloat(raw, INT32_MIN, INT32_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Float32:
|
||||
{
|
||||
sum += read<float>(file);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Float64:
|
||||
{
|
||||
sum += static_cast<float>(read<double>(file));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
float sum = 0;
|
||||
for (int channelIndex = 0; channelIndex < channelCount; channelIndex++) {
|
||||
switch (sampleFormat) {
|
||||
case SampleFormat::UInt8:
|
||||
{
|
||||
const uint8_t raw = read<uint8_t>(file);
|
||||
sum += toNormalizedFloat(raw, 0, UINT8_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int16:
|
||||
{
|
||||
const int16_t raw = read<int16_t>(file);
|
||||
sum += toNormalizedFloat(raw, INT16_MIN, INT16_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int24:
|
||||
{
|
||||
int raw = read<int, 24>(file);
|
||||
if (raw & 0x800000) raw |= 0xFF000000; // Fix two's complement
|
||||
sum += toNormalizedFloat(raw, INT24_MIN, INT24_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Int32:
|
||||
{
|
||||
const int32_t raw = read<int32_t>(file);
|
||||
sum += toNormalizedFloat(raw, INT32_MIN, INT32_MAX);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Float32:
|
||||
{
|
||||
sum += read<float>(file);
|
||||
break;
|
||||
}
|
||||
case SampleFormat::Float64:
|
||||
{
|
||||
sum += static_cast<float>(read<double>(file));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum / channelCount;
|
||||
return sum / channelCount;
|
||||
}
|
||||
|
||||
SampleReader WaveFileReader::createUnsafeSampleReader() const {
|
||||
return
|
||||
[
|
||||
formatInfo = formatInfo,
|
||||
file = std::make_shared<std::ifstream>(openFile(filePath)),
|
||||
filePos = std::streampos(0)
|
||||
](size_type index) mutable {
|
||||
const std::streampos newFilePos = formatInfo.dataOffset
|
||||
+ static_cast<streamoff>(index * formatInfo.bytesPerFrame);
|
||||
if (newFilePos != filePos) {
|
||||
file->seekg(newFilePos);
|
||||
}
|
||||
const value_type result =
|
||||
readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount);
|
||||
filePos = newFilePos + static_cast<streamoff>(formatInfo.bytesPerFrame);
|
||||
return result;
|
||||
};
|
||||
return
|
||||
[
|
||||
formatInfo = formatInfo,
|
||||
file = std::make_shared<std::ifstream>(openFile(filePath)),
|
||||
filePos = std::streampos(0)
|
||||
](size_type index) mutable {
|
||||
const std::streampos newFilePos = formatInfo.dataOffset
|
||||
+ static_cast<streamoff>(index * formatInfo.bytesPerFrame);
|
||||
if (newFilePos != filePos) {
|
||||
file->seekg(newFilePos);
|
||||
}
|
||||
const value_type result =
|
||||
readSample(*file, formatInfo.sampleFormat, formatInfo.channelCount);
|
||||
filePos = newFilePos + static_cast<streamoff>(formatInfo.bytesPerFrame);
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
string codecToString(int codec) {
|
||||
switch (codec) {
|
||||
case 0x0001: return "PCM";
|
||||
case 0x0002: return "Microsoft ADPCM";
|
||||
case 0x0003: return "IEEE Float";
|
||||
case 0x0004: return "Compaq VSELP";
|
||||
case 0x0005: return "IBM CVSD";
|
||||
case 0x0006: return "Microsoft a-Law";
|
||||
case 0x0007: return "Microsoft u-Law";
|
||||
case 0x0008: return "Microsoft DTS";
|
||||
case 0x0009: return "DRM";
|
||||
case 0x000a: return "WMA 9 Speech";
|
||||
case 0x000b: return "Microsoft Windows Media RT Voice";
|
||||
case 0x0010: return "OKI-ADPCM";
|
||||
case 0x0011: return "Intel IMA/DVI-ADPCM";
|
||||
case 0x0012: return "Videologic Mediaspace ADPCM";
|
||||
case 0x0013: return "Sierra ADPCM";
|
||||
case 0x0014: return "Antex G.723 ADPCM";
|
||||
case 0x0015: return "DSP Solutions DIGISTD";
|
||||
case 0x0016: return "DSP Solutions DIGIFIX";
|
||||
case 0x0017: return "Dialoic OKI ADPCM";
|
||||
case 0x0018: return "Media Vision ADPCM";
|
||||
case 0x0019: return "HP CU";
|
||||
case 0x001a: return "HP Dynamic Voice";
|
||||
case 0x0020: return "Yamaha ADPCM";
|
||||
case 0x0021: return "SONARC Speech Compression";
|
||||
case 0x0022: return "DSP Group True Speech";
|
||||
case 0x0023: return "Echo Speech Corp.";
|
||||
case 0x0024: return "Virtual Music Audiofile AF36";
|
||||
case 0x0025: return "Audio Processing Tech.";
|
||||
case 0x0026: return "Virtual Music Audiofile AF10";
|
||||
case 0x0027: return "Aculab Prosody 1612";
|
||||
case 0x0028: return "Merging Tech. LRC";
|
||||
case 0x0030: return "Dolby AC2";
|
||||
case 0x0031: return "Microsoft GSM610";
|
||||
case 0x0032: return "MSN Audio";
|
||||
case 0x0033: return "Antex ADPCME";
|
||||
case 0x0034: return "Control Resources VQLPC";
|
||||
case 0x0035: return "DSP Solutions DIGIREAL";
|
||||
case 0x0036: return "DSP Solutions DIGIADPCM";
|
||||
case 0x0037: return "Control Resources CR10";
|
||||
case 0x0038: return "Natural MicroSystems VBX ADPCM";
|
||||
case 0x0039: return "Crystal Semiconductor IMA ADPCM";
|
||||
case 0x003a: return "Echo Speech ECHOSC3";
|
||||
case 0x003b: return "Rockwell ADPCM";
|
||||
case 0x003c: return "Rockwell DIGITALK";
|
||||
case 0x003d: return "Xebec Multimedia";
|
||||
case 0x0040: return "Antex G.721 ADPCM";
|
||||
case 0x0041: return "Antex G.728 CELP";
|
||||
case 0x0042: return "Microsoft MSG723";
|
||||
case 0x0043: return "IBM AVC ADPCM";
|
||||
case 0x0045: return "ITU-T G.726";
|
||||
case 0x0050: return "Microsoft MPEG";
|
||||
case 0x0051: return "RT23 or PAC";
|
||||
case 0x0052: return "InSoft RT24";
|
||||
case 0x0053: return "InSoft PAC";
|
||||
case 0x0055: return "MP3";
|
||||
case 0x0059: return "Cirrus";
|
||||
case 0x0060: return "Cirrus Logic";
|
||||
case 0x0061: return "ESS Tech. PCM";
|
||||
case 0x0062: return "Voxware Inc.";
|
||||
case 0x0063: return "Canopus ATRAC";
|
||||
case 0x0064: return "APICOM G.726 ADPCM";
|
||||
case 0x0065: return "APICOM G.722 ADPCM";
|
||||
case 0x0066: return "Microsoft DSAT";
|
||||
case 0x0067: return "Micorsoft DSAT DISPLAY";
|
||||
case 0x0069: return "Voxware Byte Aligned";
|
||||
case 0x0070: return "Voxware AC8";
|
||||
case 0x0071: return "Voxware AC10";
|
||||
case 0x0072: return "Voxware AC16";
|
||||
case 0x0073: return "Voxware AC20";
|
||||
case 0x0074: return "Voxware MetaVoice";
|
||||
case 0x0075: return "Voxware MetaSound";
|
||||
case 0x0076: return "Voxware RT29HW";
|
||||
case 0x0077: return "Voxware VR12";
|
||||
case 0x0078: return "Voxware VR18";
|
||||
case 0x0079: return "Voxware TQ40";
|
||||
case 0x007a: return "Voxware SC3";
|
||||
case 0x007b: return "Voxware SC3";
|
||||
case 0x0080: return "Soundsoft";
|
||||
case 0x0081: return "Voxware TQ60";
|
||||
case 0x0082: return "Microsoft MSRT24";
|
||||
case 0x0083: return "AT&T G.729A";
|
||||
case 0x0084: return "Motion Pixels MVI MV12";
|
||||
case 0x0085: return "DataFusion G.726";
|
||||
case 0x0086: return "DataFusion GSM610";
|
||||
case 0x0088: return "Iterated Systems Audio";
|
||||
case 0x0089: return "Onlive";
|
||||
case 0x008a: return "Multitude, Inc. FT SX20";
|
||||
case 0x008b: return "Infocom ITS A/S G.721 ADPCM";
|
||||
case 0x008c: return "Convedia G729";
|
||||
case 0x008d: return "Not specified congruency, Inc.";
|
||||
case 0x0091: return "Siemens SBC24";
|
||||
case 0x0092: return "Sonic Foundry Dolby AC3 APDIF";
|
||||
case 0x0093: return "MediaSonic G.723";
|
||||
case 0x0094: return "Aculab Prosody 8kbps";
|
||||
case 0x0097: return "ZyXEL ADPCM";
|
||||
case 0x0098: return "Philips LPCBB";
|
||||
case 0x0099: return "Studer Professional Audio Packed";
|
||||
case 0x00a0: return "Malden PhonyTalk";
|
||||
case 0x00a1: return "Racal Recorder GSM";
|
||||
case 0x00a2: return "Racal Recorder G720.a";
|
||||
case 0x00a3: return "Racal G723.1";
|
||||
case 0x00a4: return "Racal Tetra ACELP";
|
||||
case 0x00b0: return "NEC AAC NEC Corporation";
|
||||
case 0x00ff: return "AAC";
|
||||
case 0x0100: return "Rhetorex ADPCM";
|
||||
case 0x0101: return "IBM u-Law";
|
||||
case 0x0102: return "IBM a-Law";
|
||||
case 0x0103: return "IBM ADPCM";
|
||||
case 0x0111: return "Vivo G.723";
|
||||
case 0x0112: return "Vivo Siren";
|
||||
case 0x0120: return "Philips Speech Processing CELP";
|
||||
case 0x0121: return "Philips Speech Processing GRUNDIG";
|
||||
case 0x0123: return "Digital G.723";
|
||||
case 0x0125: return "Sanyo LD ADPCM";
|
||||
case 0x0130: return "Sipro Lab ACEPLNET";
|
||||
case 0x0131: return "Sipro Lab ACELP4800";
|
||||
case 0x0132: return "Sipro Lab ACELP8V3";
|
||||
case 0x0133: return "Sipro Lab G.729";
|
||||
case 0x0134: return "Sipro Lab G.729A";
|
||||
case 0x0135: return "Sipro Lab Kelvin";
|
||||
case 0x0136: return "VoiceAge AMR";
|
||||
case 0x0140: return "Dictaphone G.726 ADPCM";
|
||||
case 0x0150: return "Qualcomm PureVoice";
|
||||
case 0x0151: return "Qualcomm HalfRate";
|
||||
case 0x0155: return "Ring Zero Systems TUBGSM";
|
||||
case 0x0160: return "Microsoft Audio1";
|
||||
case 0x0161: return "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio";
|
||||
case 0x0162: return "Windows Media Audio Professional V9";
|
||||
case 0x0163: return "Windows Media Audio Lossless V9";
|
||||
case 0x0164: return "WMA Pro over S/PDIF";
|
||||
case 0x0170: return "UNISYS NAP ADPCM";
|
||||
case 0x0171: return "UNISYS NAP ULAW";
|
||||
case 0x0172: return "UNISYS NAP ALAW";
|
||||
case 0x0173: return "UNISYS NAP 16K";
|
||||
case 0x0174: return "MM SYCOM ACM SYC008 SyCom Technologies";
|
||||
case 0x0175: return "MM SYCOM ACM SYC701 G726L SyCom Technologies";
|
||||
case 0x0176: return "MM SYCOM ACM SYC701 CELP54 SyCom Technologies";
|
||||
case 0x0177: return "MM SYCOM ACM SYC701 CELP68 SyCom Technologies";
|
||||
case 0x0178: return "Knowledge Adventure ADPCM";
|
||||
case 0x0180: return "Fraunhofer IIS MPEG2AAC";
|
||||
case 0x0190: return "Digital Theater Systems DTS DS";
|
||||
case 0x0200: return "Creative Labs ADPCM";
|
||||
case 0x0202: return "Creative Labs FASTSPEECH8";
|
||||
case 0x0203: return "Creative Labs FASTSPEECH10";
|
||||
case 0x0210: return "UHER ADPCM";
|
||||
case 0x0215: return "Ulead DV ACM";
|
||||
case 0x0216: return "Ulead DV ACM";
|
||||
case 0x0220: return "Quarterdeck Corp.";
|
||||
case 0x0230: return "I-Link VC";
|
||||
case 0x0240: return "Aureal Semiconductor Raw Sport";
|
||||
case 0x0241: return "ESST AC3";
|
||||
case 0x0250: return "Interactive Products HSX";
|
||||
case 0x0251: return "Interactive Products RPELP";
|
||||
case 0x0260: return "Consistent CS2";
|
||||
case 0x0270: return "Sony SCX";
|
||||
case 0x0271: return "Sony SCY";
|
||||
case 0x0272: return "Sony ATRAC3";
|
||||
case 0x0273: return "Sony SPC";
|
||||
case 0x0280: return "TELUM Telum Inc.";
|
||||
case 0x0281: return "TELUMIA Telum Inc.";
|
||||
case 0x0285: return "Norcom Voice Systems ADPCM";
|
||||
case 0x0300: return "Fujitsu FM TOWNS SND";
|
||||
case 0x0301:
|
||||
case 0x0302:
|
||||
case 0x0303:
|
||||
case 0x0304:
|
||||
case 0x0305:
|
||||
case 0x0306:
|
||||
case 0x0307:
|
||||
case 0x0308: return "Fujitsu (not specified)";
|
||||
case 0x0350: return "Micronas Semiconductors, Inc. Development";
|
||||
case 0x0351: return "Micronas Semiconductors, Inc. CELP833";
|
||||
case 0x0400: return "Brooktree Digital";
|
||||
case 0x0401: return "Intel Music Coder (IMC)";
|
||||
case 0x0402: return "Ligos Indeo Audio";
|
||||
case 0x0450: return "QDesign Music";
|
||||
case 0x0500: return "On2 VP7 On2 Technologies";
|
||||
case 0x0501: return "On2 VP6 On2 Technologies";
|
||||
case 0x0680: return "AT&T VME VMPCM";
|
||||
case 0x0681: return "AT&T TCP";
|
||||
case 0x0700: return "YMPEG Alpha (dummy for MPEG-2 compressor)";
|
||||
case 0x08ae: return "ClearJump LiteWave (lossless)";
|
||||
case 0x1000: return "Olivetti GSM";
|
||||
case 0x1001: return "Olivetti ADPCM";
|
||||
case 0x1002: return "Olivetti CELP";
|
||||
case 0x1003: return "Olivetti SBC";
|
||||
case 0x1004: return "Olivetti OPR";
|
||||
case 0x1100: return "Lernout & Hauspie";
|
||||
case 0x1101: return "Lernout & Hauspie CELP codec";
|
||||
case 0x1102:
|
||||
case 0x1103:
|
||||
case 0x1104: return "Lernout & Hauspie SBC codec";
|
||||
case 0x1400: return "Norris Comm. Inc.";
|
||||
case 0x1401: return "ISIAudio";
|
||||
case 0x1500: return "AT&T Soundspace Music Compression";
|
||||
case 0x181c: return "VoxWare RT24 speech codec";
|
||||
case 0x181e: return "Lucent elemedia AX24000P Music codec";
|
||||
case 0x1971: return "Sonic Foundry LOSSLESS";
|
||||
case 0x1979: return "Innings Telecom Inc. ADPCM";
|
||||
case 0x1c07: return "Lucent SX8300P speech codec";
|
||||
case 0x1c0c: return "Lucent SX5363S G.723 compliant codec";
|
||||
case 0x1f03: return "CUseeMe DigiTalk (ex-Rocwell)";
|
||||
case 0x1fc4: return "NCT Soft ALF2CD ACM";
|
||||
case 0x2000: return "FAST Multimedia DVM";
|
||||
case 0x2001: return "Dolby DTS (Digital Theater System)";
|
||||
case 0x2002: return "RealAudio 1 / 2 14.4";
|
||||
case 0x2003: return "RealAudio 1 / 2 28.8";
|
||||
case 0x2004: return "RealAudio G2 / 8 Cook (low bitrate)";
|
||||
case 0x2005: return "RealAudio 3 / 4 / 5 Music (DNET)";
|
||||
case 0x2006: return "RealAudio 10 AAC (RAAC)";
|
||||
case 0x2007: return "RealAudio 10 AAC+ (RACP)";
|
||||
case 0x2500: return "Reserved range to 0x2600 Microsoft";
|
||||
case 0x3313: return "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)";
|
||||
case 0x4143: return "Divio MPEG-4 AAC audio";
|
||||
case 0x4201: return "Nokia adaptive multirate";
|
||||
case 0x4243: return "Divio G726 Divio, Inc.";
|
||||
case 0x434c: return "LEAD Speech";
|
||||
case 0x564c: return "LEAD Vorbis";
|
||||
case 0x5756: return "WavPack Audio";
|
||||
case 0x674f: return "Ogg Vorbis (mode 1)";
|
||||
case 0x6750: return "Ogg Vorbis (mode 2)";
|
||||
case 0x6751: return "Ogg Vorbis (mode 3)";
|
||||
case 0x676f: return "Ogg Vorbis (mode 1+)";
|
||||
case 0x6770: return "Ogg Vorbis (mode 2+)";
|
||||
case 0x6771: return "Ogg Vorbis (mode 3+)";
|
||||
case 0x7000: return "3COM NBX 3Com Corporation";
|
||||
case 0x706d: return "FAAD AAC";
|
||||
case 0x7a21: return "GSM-AMR (CBR, no SID)";
|
||||
case 0x7a22: return "GSM-AMR (VBR, including SID)";
|
||||
case 0xa100: return "Comverse Infosys Ltd. G723 1";
|
||||
case 0xa101: return "Comverse Infosys Ltd. AVQSBC";
|
||||
case 0xa102: return "Comverse Infosys Ltd. OLDSBC";
|
||||
case 0xa103: return "Symbol Technologies G729A";
|
||||
case 0xa104: return "VoiceAge AMR WB VoiceAge Corporation";
|
||||
case 0xa105: return "Ingenient Technologies Inc. G726";
|
||||
case 0xa106: return "ISO/MPEG-4 advanced audio Coding";
|
||||
case 0xa107: return "Encore Software Ltd G726";
|
||||
case 0xa109: return "Speex ACM Codec xiph.org";
|
||||
case 0xdfac: return "DebugMode SonicFoundry Vegas FrameServer ACM Codec";
|
||||
case 0xf1ac: return "Free Lossless Audio Codec FLAC";
|
||||
case 0xfffe: return "Extensible";
|
||||
case 0xffff: return "Development";
|
||||
default:
|
||||
return format("{0:#x}", codec);
|
||||
}
|
||||
switch (codec) {
|
||||
case 0x0001: return "PCM";
|
||||
case 0x0002: return "Microsoft ADPCM";
|
||||
case 0x0003: return "IEEE Float";
|
||||
case 0x0004: return "Compaq VSELP";
|
||||
case 0x0005: return "IBM CVSD";
|
||||
case 0x0006: return "Microsoft a-Law";
|
||||
case 0x0007: return "Microsoft u-Law";
|
||||
case 0x0008: return "Microsoft DTS";
|
||||
case 0x0009: return "DRM";
|
||||
case 0x000a: return "WMA 9 Speech";
|
||||
case 0x000b: return "Microsoft Windows Media RT Voice";
|
||||
case 0x0010: return "OKI-ADPCM";
|
||||
case 0x0011: return "Intel IMA/DVI-ADPCM";
|
||||
case 0x0012: return "Videologic Mediaspace ADPCM";
|
||||
case 0x0013: return "Sierra ADPCM";
|
||||
case 0x0014: return "Antex G.723 ADPCM";
|
||||
case 0x0015: return "DSP Solutions DIGISTD";
|
||||
case 0x0016: return "DSP Solutions DIGIFIX";
|
||||
case 0x0017: return "Dialoic OKI ADPCM";
|
||||
case 0x0018: return "Media Vision ADPCM";
|
||||
case 0x0019: return "HP CU";
|
||||
case 0x001a: return "HP Dynamic Voice";
|
||||
case 0x0020: return "Yamaha ADPCM";
|
||||
case 0x0021: return "SONARC Speech Compression";
|
||||
case 0x0022: return "DSP Group True Speech";
|
||||
case 0x0023: return "Echo Speech Corp.";
|
||||
case 0x0024: return "Virtual Music Audiofile AF36";
|
||||
case 0x0025: return "Audio Processing Tech.";
|
||||
case 0x0026: return "Virtual Music Audiofile AF10";
|
||||
case 0x0027: return "Aculab Prosody 1612";
|
||||
case 0x0028: return "Merging Tech. LRC";
|
||||
case 0x0030: return "Dolby AC2";
|
||||
case 0x0031: return "Microsoft GSM610";
|
||||
case 0x0032: return "MSN Audio";
|
||||
case 0x0033: return "Antex ADPCME";
|
||||
case 0x0034: return "Control Resources VQLPC";
|
||||
case 0x0035: return "DSP Solutions DIGIREAL";
|
||||
case 0x0036: return "DSP Solutions DIGIADPCM";
|
||||
case 0x0037: return "Control Resources CR10";
|
||||
case 0x0038: return "Natural MicroSystems VBX ADPCM";
|
||||
case 0x0039: return "Crystal Semiconductor IMA ADPCM";
|
||||
case 0x003a: return "Echo Speech ECHOSC3";
|
||||
case 0x003b: return "Rockwell ADPCM";
|
||||
case 0x003c: return "Rockwell DIGITALK";
|
||||
case 0x003d: return "Xebec Multimedia";
|
||||
case 0x0040: return "Antex G.721 ADPCM";
|
||||
case 0x0041: return "Antex G.728 CELP";
|
||||
case 0x0042: return "Microsoft MSG723";
|
||||
case 0x0043: return "IBM AVC ADPCM";
|
||||
case 0x0045: return "ITU-T G.726";
|
||||
case 0x0050: return "Microsoft MPEG";
|
||||
case 0x0051: return "RT23 or PAC";
|
||||
case 0x0052: return "InSoft RT24";
|
||||
case 0x0053: return "InSoft PAC";
|
||||
case 0x0055: return "MP3";
|
||||
case 0x0059: return "Cirrus";
|
||||
case 0x0060: return "Cirrus Logic";
|
||||
case 0x0061: return "ESS Tech. PCM";
|
||||
case 0x0062: return "Voxware Inc.";
|
||||
case 0x0063: return "Canopus ATRAC";
|
||||
case 0x0064: return "APICOM G.726 ADPCM";
|
||||
case 0x0065: return "APICOM G.722 ADPCM";
|
||||
case 0x0066: return "Microsoft DSAT";
|
||||
case 0x0067: return "Micorsoft DSAT DISPLAY";
|
||||
case 0x0069: return "Voxware Byte Aligned";
|
||||
case 0x0070: return "Voxware AC8";
|
||||
case 0x0071: return "Voxware AC10";
|
||||
case 0x0072: return "Voxware AC16";
|
||||
case 0x0073: return "Voxware AC20";
|
||||
case 0x0074: return "Voxware MetaVoice";
|
||||
case 0x0075: return "Voxware MetaSound";
|
||||
case 0x0076: return "Voxware RT29HW";
|
||||
case 0x0077: return "Voxware VR12";
|
||||
case 0x0078: return "Voxware VR18";
|
||||
case 0x0079: return "Voxware TQ40";
|
||||
case 0x007a: return "Voxware SC3";
|
||||
case 0x007b: return "Voxware SC3";
|
||||
case 0x0080: return "Soundsoft";
|
||||
case 0x0081: return "Voxware TQ60";
|
||||
case 0x0082: return "Microsoft MSRT24";
|
||||
case 0x0083: return "AT&T G.729A";
|
||||
case 0x0084: return "Motion Pixels MVI MV12";
|
||||
case 0x0085: return "DataFusion G.726";
|
||||
case 0x0086: return "DataFusion GSM610";
|
||||
case 0x0088: return "Iterated Systems Audio";
|
||||
case 0x0089: return "Onlive";
|
||||
case 0x008a: return "Multitude, Inc. FT SX20";
|
||||
case 0x008b: return "Infocom ITS A/S G.721 ADPCM";
|
||||
case 0x008c: return "Convedia G729";
|
||||
case 0x008d: return "Not specified congruency, Inc.";
|
||||
case 0x0091: return "Siemens SBC24";
|
||||
case 0x0092: return "Sonic Foundry Dolby AC3 APDIF";
|
||||
case 0x0093: return "MediaSonic G.723";
|
||||
case 0x0094: return "Aculab Prosody 8kbps";
|
||||
case 0x0097: return "ZyXEL ADPCM";
|
||||
case 0x0098: return "Philips LPCBB";
|
||||
case 0x0099: return "Studer Professional Audio Packed";
|
||||
case 0x00a0: return "Malden PhonyTalk";
|
||||
case 0x00a1: return "Racal Recorder GSM";
|
||||
case 0x00a2: return "Racal Recorder G720.a";
|
||||
case 0x00a3: return "Racal G723.1";
|
||||
case 0x00a4: return "Racal Tetra ACELP";
|
||||
case 0x00b0: return "NEC AAC NEC Corporation";
|
||||
case 0x00ff: return "AAC";
|
||||
case 0x0100: return "Rhetorex ADPCM";
|
||||
case 0x0101: return "IBM u-Law";
|
||||
case 0x0102: return "IBM a-Law";
|
||||
case 0x0103: return "IBM ADPCM";
|
||||
case 0x0111: return "Vivo G.723";
|
||||
case 0x0112: return "Vivo Siren";
|
||||
case 0x0120: return "Philips Speech Processing CELP";
|
||||
case 0x0121: return "Philips Speech Processing GRUNDIG";
|
||||
case 0x0123: return "Digital G.723";
|
||||
case 0x0125: return "Sanyo LD ADPCM";
|
||||
case 0x0130: return "Sipro Lab ACEPLNET";
|
||||
case 0x0131: return "Sipro Lab ACELP4800";
|
||||
case 0x0132: return "Sipro Lab ACELP8V3";
|
||||
case 0x0133: return "Sipro Lab G.729";
|
||||
case 0x0134: return "Sipro Lab G.729A";
|
||||
case 0x0135: return "Sipro Lab Kelvin";
|
||||
case 0x0136: return "VoiceAge AMR";
|
||||
case 0x0140: return "Dictaphone G.726 ADPCM";
|
||||
case 0x0150: return "Qualcomm PureVoice";
|
||||
case 0x0151: return "Qualcomm HalfRate";
|
||||
case 0x0155: return "Ring Zero Systems TUBGSM";
|
||||
case 0x0160: return "Microsoft Audio1";
|
||||
case 0x0161: return "Windows Media Audio V2 V7 V8 V9 / DivX audio (WMA) / Alex AC3 Audio";
|
||||
case 0x0162: return "Windows Media Audio Professional V9";
|
||||
case 0x0163: return "Windows Media Audio Lossless V9";
|
||||
case 0x0164: return "WMA Pro over S/PDIF";
|
||||
case 0x0170: return "UNISYS NAP ADPCM";
|
||||
case 0x0171: return "UNISYS NAP ULAW";
|
||||
case 0x0172: return "UNISYS NAP ALAW";
|
||||
case 0x0173: return "UNISYS NAP 16K";
|
||||
case 0x0174: return "MM SYCOM ACM SYC008 SyCom Technologies";
|
||||
case 0x0175: return "MM SYCOM ACM SYC701 G726L SyCom Technologies";
|
||||
case 0x0176: return "MM SYCOM ACM SYC701 CELP54 SyCom Technologies";
|
||||
case 0x0177: return "MM SYCOM ACM SYC701 CELP68 SyCom Technologies";
|
||||
case 0x0178: return "Knowledge Adventure ADPCM";
|
||||
case 0x0180: return "Fraunhofer IIS MPEG2AAC";
|
||||
case 0x0190: return "Digital Theater Systems DTS DS";
|
||||
case 0x0200: return "Creative Labs ADPCM";
|
||||
case 0x0202: return "Creative Labs FASTSPEECH8";
|
||||
case 0x0203: return "Creative Labs FASTSPEECH10";
|
||||
case 0x0210: return "UHER ADPCM";
|
||||
case 0x0215: return "Ulead DV ACM";
|
||||
case 0x0216: return "Ulead DV ACM";
|
||||
case 0x0220: return "Quarterdeck Corp.";
|
||||
case 0x0230: return "I-Link VC";
|
||||
case 0x0240: return "Aureal Semiconductor Raw Sport";
|
||||
case 0x0241: return "ESST AC3";
|
||||
case 0x0250: return "Interactive Products HSX";
|
||||
case 0x0251: return "Interactive Products RPELP";
|
||||
case 0x0260: return "Consistent CS2";
|
||||
case 0x0270: return "Sony SCX";
|
||||
case 0x0271: return "Sony SCY";
|
||||
case 0x0272: return "Sony ATRAC3";
|
||||
case 0x0273: return "Sony SPC";
|
||||
case 0x0280: return "TELUM Telum Inc.";
|
||||
case 0x0281: return "TELUMIA Telum Inc.";
|
||||
case 0x0285: return "Norcom Voice Systems ADPCM";
|
||||
case 0x0300: return "Fujitsu FM TOWNS SND";
|
||||
case 0x0301:
|
||||
case 0x0302:
|
||||
case 0x0303:
|
||||
case 0x0304:
|
||||
case 0x0305:
|
||||
case 0x0306:
|
||||
case 0x0307:
|
||||
case 0x0308: return "Fujitsu (not specified)";
|
||||
case 0x0350: return "Micronas Semiconductors, Inc. Development";
|
||||
case 0x0351: return "Micronas Semiconductors, Inc. CELP833";
|
||||
case 0x0400: return "Brooktree Digital";
|
||||
case 0x0401: return "Intel Music Coder (IMC)";
|
||||
case 0x0402: return "Ligos Indeo Audio";
|
||||
case 0x0450: return "QDesign Music";
|
||||
case 0x0500: return "On2 VP7 On2 Technologies";
|
||||
case 0x0501: return "On2 VP6 On2 Technologies";
|
||||
case 0x0680: return "AT&T VME VMPCM";
|
||||
case 0x0681: return "AT&T TCP";
|
||||
case 0x0700: return "YMPEG Alpha (dummy for MPEG-2 compressor)";
|
||||
case 0x08ae: return "ClearJump LiteWave (lossless)";
|
||||
case 0x1000: return "Olivetti GSM";
|
||||
case 0x1001: return "Olivetti ADPCM";
|
||||
case 0x1002: return "Olivetti CELP";
|
||||
case 0x1003: return "Olivetti SBC";
|
||||
case 0x1004: return "Olivetti OPR";
|
||||
case 0x1100: return "Lernout & Hauspie";
|
||||
case 0x1101: return "Lernout & Hauspie CELP codec";
|
||||
case 0x1102:
|
||||
case 0x1103:
|
||||
case 0x1104: return "Lernout & Hauspie SBC codec";
|
||||
case 0x1400: return "Norris Comm. Inc.";
|
||||
case 0x1401: return "ISIAudio";
|
||||
case 0x1500: return "AT&T Soundspace Music Compression";
|
||||
case 0x181c: return "VoxWare RT24 speech codec";
|
||||
case 0x181e: return "Lucent elemedia AX24000P Music codec";
|
||||
case 0x1971: return "Sonic Foundry LOSSLESS";
|
||||
case 0x1979: return "Innings Telecom Inc. ADPCM";
|
||||
case 0x1c07: return "Lucent SX8300P speech codec";
|
||||
case 0x1c0c: return "Lucent SX5363S G.723 compliant codec";
|
||||
case 0x1f03: return "CUseeMe DigiTalk (ex-Rocwell)";
|
||||
case 0x1fc4: return "NCT Soft ALF2CD ACM";
|
||||
case 0x2000: return "FAST Multimedia DVM";
|
||||
case 0x2001: return "Dolby DTS (Digital Theater System)";
|
||||
case 0x2002: return "RealAudio 1 / 2 14.4";
|
||||
case 0x2003: return "RealAudio 1 / 2 28.8";
|
||||
case 0x2004: return "RealAudio G2 / 8 Cook (low bitrate)";
|
||||
case 0x2005: return "RealAudio 3 / 4 / 5 Music (DNET)";
|
||||
case 0x2006: return "RealAudio 10 AAC (RAAC)";
|
||||
case 0x2007: return "RealAudio 10 AAC+ (RACP)";
|
||||
case 0x2500: return "Reserved range to 0x2600 Microsoft";
|
||||
case 0x3313: return "makeAVIS (ffvfw fake AVI sound from AviSynth scripts)";
|
||||
case 0x4143: return "Divio MPEG-4 AAC audio";
|
||||
case 0x4201: return "Nokia adaptive multirate";
|
||||
case 0x4243: return "Divio G726 Divio, Inc.";
|
||||
case 0x434c: return "LEAD Speech";
|
||||
case 0x564c: return "LEAD Vorbis";
|
||||
case 0x5756: return "WavPack Audio";
|
||||
case 0x674f: return "Ogg Vorbis (mode 1)";
|
||||
case 0x6750: return "Ogg Vorbis (mode 2)";
|
||||
case 0x6751: return "Ogg Vorbis (mode 3)";
|
||||
case 0x676f: return "Ogg Vorbis (mode 1+)";
|
||||
case 0x6770: return "Ogg Vorbis (mode 2+)";
|
||||
case 0x6771: return "Ogg Vorbis (mode 3+)";
|
||||
case 0x7000: return "3COM NBX 3Com Corporation";
|
||||
case 0x706d: return "FAAD AAC";
|
||||
case 0x7a21: return "GSM-AMR (CBR, no SID)";
|
||||
case 0x7a22: return "GSM-AMR (VBR, including SID)";
|
||||
case 0xa100: return "Comverse Infosys Ltd. G723 1";
|
||||
case 0xa101: return "Comverse Infosys Ltd. AVQSBC";
|
||||
case 0xa102: return "Comverse Infosys Ltd. OLDSBC";
|
||||
case 0xa103: return "Symbol Technologies G729A";
|
||||
case 0xa104: return "VoiceAge AMR WB VoiceAge Corporation";
|
||||
case 0xa105: return "Ingenient Technologies Inc. G726";
|
||||
case 0xa106: return "ISO/MPEG-4 advanced audio Coding";
|
||||
case 0xa107: return "Encore Software Ltd G726";
|
||||
case 0xa109: return "Speex ACM Codec xiph.org";
|
||||
case 0xdfac: return "DebugMode SonicFoundry Vegas FrameServer ACM Codec";
|
||||
case 0xf1ac: return "Free Lossless Audio Codec FLAC";
|
||||
case 0xfffe: return "Extensible";
|
||||
case 0xffff: return "Development";
|
||||
default:
|
||||
return format("{0:#x}", codec);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,43 +4,43 @@
|
|||
#include "AudioClip.h"
|
||||
|
||||
enum class SampleFormat {
|
||||
UInt8,
|
||||
Int16,
|
||||
Int24,
|
||||
Int32,
|
||||
Float32,
|
||||
Float64
|
||||
UInt8,
|
||||
Int16,
|
||||
Int24,
|
||||
Int32,
|
||||
Float32,
|
||||
Float64
|
||||
};
|
||||
|
||||
struct WaveFormatInfo {
|
||||
int bytesPerFrame;
|
||||
SampleFormat sampleFormat;
|
||||
int frameRate;
|
||||
int64_t frameCount;
|
||||
int channelCount;
|
||||
std::streampos dataOffset;
|
||||
int bytesPerFrame;
|
||||
SampleFormat sampleFormat;
|
||||
int frameRate;
|
||||
int64_t frameCount;
|
||||
int channelCount;
|
||||
std::streampos dataOffset;
|
||||
};
|
||||
|
||||
WaveFormatInfo getWaveFormatInfo(const std::filesystem::path& filePath);
|
||||
|
||||
class WaveFileReader : public AudioClip {
|
||||
public:
|
||||
WaveFileReader(const std::filesystem::path& filePath);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
WaveFileReader(const std::filesystem::path& filePath);
|
||||
std::unique_ptr<AudioClip> clone() const override;
|
||||
int getSampleRate() const override;
|
||||
size_type size() const override;
|
||||
|
||||
private:
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
SampleReader createUnsafeSampleReader() const override;
|
||||
|
||||
std::filesystem::path filePath;
|
||||
WaveFormatInfo formatInfo;
|
||||
std::filesystem::path filePath;
|
||||
WaveFormatInfo formatInfo;
|
||||
};
|
||||
|
||||
inline int WaveFileReader::getSampleRate() const {
|
||||
return formatInfo.frameRate;
|
||||
return formatInfo.frameRate;
|
||||
}
|
||||
|
||||
inline AudioClip::size_type WaveFileReader::size() const {
|
||||
return formatInfo.frameCount;
|
||||
return formatInfo.frameCount;
|
||||
}
|
||||
|
|
|
@ -10,20 +10,20 @@ using std::runtime_error;
|
|||
using fmt::format;
|
||||
|
||||
std::unique_ptr<AudioClip> createAudioFileClip(path filePath) {
|
||||
try {
|
||||
const string extension =
|
||||
boost::algorithm::to_lower_copy(filePath.extension().u8string());
|
||||
if (extension == ".wav") {
|
||||
return std::make_unique<WaveFileReader>(filePath);
|
||||
}
|
||||
if (extension == ".ogg") {
|
||||
return std::make_unique<OggVorbisFileReader>(filePath);
|
||||
}
|
||||
throw runtime_error(format(
|
||||
"Unsupported file extension '{}'. Supported extensions are '.wav' and '.ogg'.",
|
||||
extension
|
||||
));
|
||||
} catch (...) {
|
||||
std::throw_with_nested(runtime_error(format("Could not open sound file {}.", filePath.u8string())));
|
||||
}
|
||||
try {
|
||||
const string extension =
|
||||
boost::algorithm::to_lower_copy(filePath.extension().u8string());
|
||||
if (extension == ".wav") {
|
||||
return std::make_unique<WaveFileReader>(filePath);
|
||||
}
|
||||
if (extension == ".ogg") {
|
||||
return std::make_unique<OggVorbisFileReader>(filePath);
|
||||
}
|
||||
throw runtime_error(format(
|
||||
"Unsupported file extension '{}'. Supported extensions are '.wav' and '.ogg'.",
|
||||
extension
|
||||
));
|
||||
} catch (...) {
|
||||
std::throw_with_nested(runtime_error(format("Could not open sound file {}.", filePath.u8string())));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,43 +4,43 @@
|
|||
|
||||
namespace little_endian {
|
||||
|
||||
template<typename Type, int bitsToRead = 8 * sizeof(Type)>
|
||||
Type read(std::istream& stream) {
|
||||
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
|
||||
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
|
||||
template<typename Type, int bitsToRead = 8 * sizeof(Type)>
|
||||
Type read(std::istream& stream) {
|
||||
static_assert(bitsToRead % 8 == 0, "Cannot read fractional bytes.");
|
||||
static_assert(bitsToRead <= sizeof(Type) * 8, "Bits to read exceed target type size.");
|
||||
|
||||
Type result = 0;
|
||||
char* p = reinterpret_cast<char*>(&result);
|
||||
const int bytesToRead = bitsToRead / 8;
|
||||
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
|
||||
*(p + byteIndex) = static_cast<char>(stream.get());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
Type result = 0;
|
||||
char* p = reinterpret_cast<char*>(&result);
|
||||
const int bytesToRead = bitsToRead / 8;
|
||||
for (int byteIndex = 0; byteIndex < bytesToRead; byteIndex++) {
|
||||
*(p + byteIndex) = static_cast<char>(stream.get());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename Type, int bitsToWrite = 8 * sizeof(Type)>
|
||||
void write(Type value, std::ostream& stream) {
|
||||
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
|
||||
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
|
||||
template<typename Type, int bitsToWrite = 8 * sizeof(Type)>
|
||||
void write(Type value, std::ostream& stream) {
|
||||
static_assert(bitsToWrite % 8 == 0, "Cannot write fractional bytes.");
|
||||
static_assert(bitsToWrite <= sizeof(Type) * 8, "Bits to write exceed target type size.");
|
||||
|
||||
char* p = reinterpret_cast<char*>(&value);
|
||||
const int bytesToWrite = bitsToWrite / 8;
|
||||
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
|
||||
stream.put(*(p + byteIndex));
|
||||
}
|
||||
}
|
||||
char* p = reinterpret_cast<char*>(&value);
|
||||
const int bytesToWrite = bitsToWrite / 8;
|
||||
for (int byteIndex = 0; byteIndex < bytesToWrite; byteIndex++) {
|
||||
stream.put(*(p + byteIndex));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr uint32_t fourcc(
|
||||
unsigned char c0,
|
||||
unsigned char c1,
|
||||
unsigned char c2,
|
||||
unsigned char c3
|
||||
) {
|
||||
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
|
||||
}
|
||||
constexpr uint32_t fourcc(
|
||||
unsigned char c0,
|
||||
unsigned char c1,
|
||||
unsigned char c2,
|
||||
unsigned char c3
|
||||
) {
|
||||
return c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
|
||||
}
|
||||
|
||||
inline std::string fourccToString(uint32_t fourcc) {
|
||||
return std::string(reinterpret_cast<char*>(&fourcc), 4);
|
||||
}
|
||||
inline std::string fourccToString(uint32_t fourcc) {
|
||||
return std::string(reinterpret_cast<char*>(&fourcc), 4);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,53 +6,53 @@ using std::vector;
|
|||
|
||||
// Converts a float in the range -1..1 to a signed 16-bit int
|
||||
inline int16_t floatSampleToInt16(float sample) {
|
||||
sample = std::max(sample, -1.0f);
|
||||
sample = std::min(sample, 1.0f);
|
||||
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
|
||||
sample = std::max(sample, -1.0f);
|
||||
sample = std::min(sample, 1.0f);
|
||||
return static_cast<int16_t>(((sample + 1) / 2) * (INT16_MAX - INT16_MIN) + INT16_MIN);
|
||||
}
|
||||
|
||||
void process16bitAudioClip(
|
||||
const AudioClip& audioClip,
|
||||
const function<void(const vector<int16_t>&)>& processBuffer,
|
||||
size_t bufferCapacity,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& audioClip,
|
||||
const function<void(const vector<int16_t>&)>& processBuffer,
|
||||
size_t bufferCapacity,
|
||||
ProgressSink& progressSink
|
||||
) {
|
||||
// Process entire sound stream
|
||||
vector<int16_t> buffer;
|
||||
buffer.reserve(bufferCapacity);
|
||||
size_t sampleCount = 0;
|
||||
auto it = audioClip.begin();
|
||||
const auto end = audioClip.end();
|
||||
do {
|
||||
// Read to buffer
|
||||
buffer.clear();
|
||||
for (; buffer.size() < bufferCapacity && it != end; ++it) {
|
||||
// Read sample to buffer
|
||||
buffer.push_back(floatSampleToInt16(*it));
|
||||
}
|
||||
// Process entire sound stream
|
||||
vector<int16_t> buffer;
|
||||
buffer.reserve(bufferCapacity);
|
||||
size_t sampleCount = 0;
|
||||
auto it = audioClip.begin();
|
||||
const auto end = audioClip.end();
|
||||
do {
|
||||
// Read to buffer
|
||||
buffer.clear();
|
||||
for (; buffer.size() < bufferCapacity && it != end; ++it) {
|
||||
// Read sample to buffer
|
||||
buffer.push_back(floatSampleToInt16(*it));
|
||||
}
|
||||
|
||||
// Process buffer
|
||||
processBuffer(buffer);
|
||||
// Process buffer
|
||||
processBuffer(buffer);
|
||||
|
||||
sampleCount += buffer.size();
|
||||
progressSink.reportProgress(static_cast<double>(sampleCount) / static_cast<double>(audioClip.size()));
|
||||
} while (!buffer.empty());
|
||||
sampleCount += buffer.size();
|
||||
progressSink.reportProgress(static_cast<double>(sampleCount) / static_cast<double>(audioClip.size()));
|
||||
} while (!buffer.empty());
|
||||
}
|
||||
|
||||
void process16bitAudioClip(
|
||||
const AudioClip& audioClip,
|
||||
const function<void(const vector<int16_t>&)>& processBuffer,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& audioClip,
|
||||
const function<void(const vector<int16_t>&)>& processBuffer,
|
||||
ProgressSink& progressSink
|
||||
) {
|
||||
const size_t capacity = 1600; // 0.1 second capacity
|
||||
process16bitAudioClip(audioClip, processBuffer, capacity, progressSink);
|
||||
const size_t capacity = 1600; // 0.1 second capacity
|
||||
process16bitAudioClip(audioClip, processBuffer, capacity, progressSink);
|
||||
}
|
||||
|
||||
vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip) {
|
||||
vector<int16_t> result(static_cast<size_t>(audioClip.size()));
|
||||
int index = 0;
|
||||
for (float sample : audioClip) {
|
||||
result[index++] = floatSampleToInt16(sample);
|
||||
}
|
||||
return result;
|
||||
vector<int16_t> result(static_cast<size_t>(audioClip.size()));
|
||||
int index = 0;
|
||||
for (float sample : audioClip) {
|
||||
result[index++] = floatSampleToInt16(sample);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -6,16 +6,16 @@
|
|||
#include "tools/progress.h"
|
||||
|
||||
void process16bitAudioClip(
|
||||
const AudioClip& audioClip,
|
||||
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
|
||||
size_t bufferCapacity,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& audioClip,
|
||||
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
|
||||
size_t bufferCapacity,
|
||||
ProgressSink& progressSink
|
||||
);
|
||||
|
||||
void process16bitAudioClip(
|
||||
const AudioClip& audioClip,
|
||||
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& audioClip,
|
||||
const std::function<void(const std::vector<int16_t>&)>& processBuffer,
|
||||
ProgressSink& progressSink
|
||||
);
|
||||
|
||||
std::vector<int16_t> copyTo16bitBuffer(const AudioClip& audioClip);
|
|
@ -17,79 +17,79 @@ using std::runtime_error;
|
|||
using std::unique_ptr;
|
||||
|
||||
JoiningBoundedTimeline<void> detectVoiceActivity(
|
||||
const AudioClip& inputAudioClip,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& inputAudioClip,
|
||||
ProgressSink& progressSink
|
||||
) {
|
||||
// Prepare audio for VAD
|
||||
constexpr int webRtcSamplingRate = 8000;
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone()
|
||||
| resample(webRtcSamplingRate)
|
||||
| removeDcOffset();
|
||||
// Prepare audio for VAD
|
||||
constexpr int webRtcSamplingRate = 8000;
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone()
|
||||
| resample(webRtcSamplingRate)
|
||||
| removeDcOffset();
|
||||
|
||||
VadInst* vadHandle = WebRtcVad_Create();
|
||||
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
|
||||
VadInst* vadHandle = WebRtcVad_Create();
|
||||
if (!vadHandle) throw runtime_error("Error creating WebRTC VAD handle.");
|
||||
|
||||
auto freeHandle = gsl::finally([&]() { WebRtcVad_Free(vadHandle); });
|
||||
auto freeHandle = gsl::finally([&]() { WebRtcVad_Free(vadHandle); });
|
||||
|
||||
int error = WebRtcVad_Init(vadHandle);
|
||||
if (error) throw runtime_error("Error initializing WebRTC VAD.");
|
||||
int error = WebRtcVad_Init(vadHandle);
|
||||
if (error) throw runtime_error("Error initializing WebRTC VAD.");
|
||||
|
||||
const int aggressiveness = 2; // 0..3. The higher, the more is cut off.
|
||||
error = WebRtcVad_set_mode(vadHandle, aggressiveness);
|
||||
if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness.");
|
||||
const int aggressiveness = 2; // 0..3. The higher, the more is cut off.
|
||||
error = WebRtcVad_set_mode(vadHandle, aggressiveness);
|
||||
if (error) throw runtime_error("Error setting WebRTC VAD aggressiveness.");
|
||||
|
||||
// Detect activity
|
||||
JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange());
|
||||
centiseconds time = 0_cs;
|
||||
const size_t frameSize = webRtcSamplingRate / 100;
|
||||
const auto processBuffer = [&](const vector<int16_t>& buffer) {
|
||||
// WebRTC is picky regarding buffer size
|
||||
if (buffer.size() < frameSize) return;
|
||||
// Detect activity
|
||||
JoiningBoundedTimeline<void> activity(audioClip->getTruncatedRange());
|
||||
centiseconds time = 0_cs;
|
||||
const size_t frameSize = webRtcSamplingRate / 100;
|
||||
const auto processBuffer = [&](const vector<int16_t>& buffer) {
|
||||
// WebRTC is picky regarding buffer size
|
||||
if (buffer.size() < frameSize) return;
|
||||
|
||||
const int result = WebRtcVad_Process(
|
||||
vadHandle,
|
||||
webRtcSamplingRate,
|
||||
buffer.data(),
|
||||
buffer.size()
|
||||
);
|
||||
if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD.");
|
||||
const int result = WebRtcVad_Process(
|
||||
vadHandle,
|
||||
webRtcSamplingRate,
|
||||
buffer.data(),
|
||||
buffer.size()
|
||||
);
|
||||
if (result == -1) throw runtime_error("Error processing audio buffer using WebRTC VAD.");
|
||||
|
||||
// Ignore the result of WebRtcVad_Process, instead directly interpret the internal VAD flag.
|
||||
// The result of WebRtcVad_Process stays 1 for a number of frames after the last detected
|
||||
// activity.
|
||||
const bool isActive = reinterpret_cast<VadInstT*>(vadHandle)->vad == 1;
|
||||
// Ignore the result of WebRtcVad_Process, instead directly interpret the internal VAD flag.
|
||||
// The result of WebRtcVad_Process stays 1 for a number of frames after the last detected
|
||||
// activity.
|
||||
const bool isActive = reinterpret_cast<VadInstT*>(vadHandle)->vad == 1;
|
||||
|
||||
if (isActive) {
|
||||
activity.set(time, time + 1_cs);
|
||||
}
|
||||
if (isActive) {
|
||||
activity.set(time, time + 1_cs);
|
||||
}
|
||||
|
||||
time += 1_cs;
|
||||
};
|
||||
process16bitAudioClip(*audioClip, processBuffer, frameSize, progressSink);
|
||||
time += 1_cs;
|
||||
};
|
||||
process16bitAudioClip(*audioClip, processBuffer, frameSize, progressSink);
|
||||
|
||||
// Fill small gaps in activity
|
||||
const centiseconds maxGap(10);
|
||||
for (const auto& pair : getPairs(activity)) {
|
||||
if (pair.second.getStart() - pair.first.getEnd() <= maxGap) {
|
||||
activity.set(pair.first.getEnd(), pair.second.getStart());
|
||||
}
|
||||
}
|
||||
// Fill small gaps in activity
|
||||
const centiseconds maxGap(10);
|
||||
for (const auto& pair : getPairs(activity)) {
|
||||
if (pair.second.getStart() - pair.first.getEnd() <= maxGap) {
|
||||
activity.set(pair.first.getEnd(), pair.second.getStart());
|
||||
}
|
||||
}
|
||||
|
||||
// Discard very short segments of activity
|
||||
const centiseconds minSegmentLength(5);
|
||||
for (const auto& segment : Timeline<void>(activity)) {
|
||||
if (segment.getDuration() < minSegmentLength) {
|
||||
activity.clear(segment.getTimeRange());
|
||||
}
|
||||
}
|
||||
// Discard very short segments of activity
|
||||
const centiseconds minSegmentLength(5);
|
||||
for (const auto& segment : Timeline<void>(activity)) {
|
||||
if (segment.getDuration() < minSegmentLength) {
|
||||
activity.clear(segment.getTimeRange());
|
||||
}
|
||||
}
|
||||
|
||||
logging::debugFormat(
|
||||
"Found {} sections of voice activity: {}",
|
||||
activity.size(),
|
||||
join(activity | transformed([](const Timed<void>& t) {
|
||||
return format("{0}-{1}", t.getStart(), t.getEnd());
|
||||
}), ", ")
|
||||
);
|
||||
logging::debugFormat(
|
||||
"Found {} sections of voice activity: {}",
|
||||
activity.size(),
|
||||
join(activity | transformed([](const Timed<void>& t) {
|
||||
return format("{0}-{1}", t.getStart(), t.getEnd());
|
||||
}), ", ")
|
||||
);
|
||||
|
||||
return activity;
|
||||
return activity;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,6 @@
|
|||
#include "tools/progress.h"
|
||||
|
||||
JoiningBoundedTimeline<void> detectVoiceActivity(
|
||||
const AudioClip& audioClip,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& audioClip,
|
||||
ProgressSink& progressSink
|
||||
);
|
||||
|
|
|
@ -5,39 +5,39 @@
|
|||
using namespace little_endian;
|
||||
|
||||
void createWaveFile(const AudioClip& audioClip, std::string fileName) {
|
||||
// Open file
|
||||
std::ofstream file;
|
||||
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
file.open(fileName, std::ios::out | std::ios::binary);
|
||||
// Open file
|
||||
std::ofstream file;
|
||||
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
file.open(fileName, std::ios::out | std::ios::binary);
|
||||
|
||||
// Write RIFF chunk
|
||||
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
|
||||
const uint32_t formatChunkSize = 16;
|
||||
const uint16_t channelCount = 1;
|
||||
const uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
|
||||
const uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize);
|
||||
const uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
|
||||
write<uint32_t>(riffChunkSize, file);
|
||||
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
|
||||
// Write RIFF chunk
|
||||
write<uint32_t>(fourcc('R', 'I', 'F', 'F'), file);
|
||||
const uint32_t formatChunkSize = 16;
|
||||
const uint16_t channelCount = 1;
|
||||
const uint16_t frameSize = static_cast<uint16_t>(channelCount * sizeof(float));
|
||||
const uint32_t dataChunkSize = static_cast<uint32_t>(audioClip.size() * frameSize);
|
||||
const uint32_t riffChunkSize = 4 + (8 + formatChunkSize) + (8 + dataChunkSize);
|
||||
write<uint32_t>(riffChunkSize, file);
|
||||
write<uint32_t>(fourcc('W', 'A', 'V', 'E'), file);
|
||||
|
||||
// Write format chunk
|
||||
write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
|
||||
write<uint32_t>(formatChunkSize, file);
|
||||
const uint16_t codec = 0x03; // 32-bit float
|
||||
write<uint16_t>(codec, file);
|
||||
write<uint16_t>(channelCount, file);
|
||||
const uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate());
|
||||
write<uint32_t>(frameRate, file);
|
||||
const uint32_t bytesPerSecond = frameRate * frameSize;
|
||||
write<uint32_t>(bytesPerSecond, file);
|
||||
write<uint16_t>(frameSize, file);
|
||||
const uint16_t bitsPerSample = 8 * sizeof(float);
|
||||
write<uint16_t>(bitsPerSample, file);
|
||||
// Write format chunk
|
||||
write<uint32_t>(fourcc('f', 'm', 't', ' '), file);
|
||||
write<uint32_t>(formatChunkSize, file);
|
||||
const uint16_t codec = 0x03; // 32-bit float
|
||||
write<uint16_t>(codec, file);
|
||||
write<uint16_t>(channelCount, file);
|
||||
const uint32_t frameRate = static_cast<uint16_t>(audioClip.getSampleRate());
|
||||
write<uint32_t>(frameRate, file);
|
||||
const uint32_t bytesPerSecond = frameRate * frameSize;
|
||||
write<uint32_t>(bytesPerSecond, file);
|
||||
write<uint16_t>(frameSize, file);
|
||||
const uint16_t bitsPerSample = 8 * sizeof(float);
|
||||
write<uint16_t>(bitsPerSample, file);
|
||||
|
||||
// Write data chunk
|
||||
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
|
||||
write<uint32_t>(dataChunkSize, file);
|
||||
for (float sample : audioClip) {
|
||||
write<float>(sample, file);
|
||||
}
|
||||
// Write data chunk
|
||||
write<uint32_t>(fourcc('d', 'a', 't', 'a'), file);
|
||||
write<uint32_t>(dataChunkSize, file);
|
||||
for (float sample : audioClip) {
|
||||
write<float>(sample, file);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,89 +4,89 @@ using std::string;
|
|||
using boost::optional;
|
||||
|
||||
PhoneConverter& PhoneConverter::get() {
|
||||
static PhoneConverter converter;
|
||||
return converter;
|
||||
static PhoneConverter converter;
|
||||
return converter;
|
||||
}
|
||||
|
||||
string PhoneConverter::getTypeName() {
|
||||
return "Phone";
|
||||
return "Phone";
|
||||
}
|
||||
|
||||
EnumConverter<Phone>::member_data PhoneConverter::getMemberData() {
|
||||
return member_data {
|
||||
{ Phone::AO, "AO" },
|
||||
{ Phone::AA, "AA" },
|
||||
{ Phone::IY, "IY" },
|
||||
{ Phone::UW, "UW" },
|
||||
{ Phone::EH, "EH" },
|
||||
{ Phone::IH, "IH" },
|
||||
{ Phone::UH, "UH" },
|
||||
{ Phone::AH, "AH" },
|
||||
{ Phone::Schwa, "Schwa" },
|
||||
{ Phone::AE, "AE" },
|
||||
{ Phone::EY, "EY" },
|
||||
{ Phone::AY, "AY" },
|
||||
{ Phone::OW, "OW" },
|
||||
{ Phone::AW, "AW" },
|
||||
{ Phone::OY, "OY" },
|
||||
{ Phone::ER, "ER" },
|
||||
return member_data {
|
||||
{ Phone::AO, "AO" },
|
||||
{ Phone::AA, "AA" },
|
||||
{ Phone::IY, "IY" },
|
||||
{ Phone::UW, "UW" },
|
||||
{ Phone::EH, "EH" },
|
||||
{ Phone::IH, "IH" },
|
||||
{ Phone::UH, "UH" },
|
||||
{ Phone::AH, "AH" },
|
||||
{ Phone::Schwa, "Schwa" },
|
||||
{ Phone::AE, "AE" },
|
||||
{ Phone::EY, "EY" },
|
||||
{ Phone::AY, "AY" },
|
||||
{ Phone::OW, "OW" },
|
||||
{ Phone::AW, "AW" },
|
||||
{ Phone::OY, "OY" },
|
||||
{ Phone::ER, "ER" },
|
||||
|
||||
{ Phone::P, "P" },
|
||||
{ Phone::B, "B" },
|
||||
{ Phone::T, "T" },
|
||||
{ Phone::D, "D" },
|
||||
{ Phone::K, "K" },
|
||||
{ Phone::G, "G" },
|
||||
{ Phone::CH, "CH" },
|
||||
{ Phone::JH, "JH" },
|
||||
{ Phone::F, "F" },
|
||||
{ Phone::V, "V" },
|
||||
{ Phone::TH, "TH" },
|
||||
{ Phone::DH, "DH" },
|
||||
{ Phone::S, "S" },
|
||||
{ Phone::Z, "Z" },
|
||||
{ Phone::SH, "SH" },
|
||||
{ Phone::ZH, "ZH" },
|
||||
{ Phone::HH, "HH" },
|
||||
{ Phone::M, "M" },
|
||||
{ Phone::N, "N" },
|
||||
{ Phone::NG, "NG" },
|
||||
{ Phone::L, "L" },
|
||||
{ Phone::R, "R" },
|
||||
{ Phone::Y, "Y" },
|
||||
{ Phone::W, "W" },
|
||||
{ Phone::P, "P" },
|
||||
{ Phone::B, "B" },
|
||||
{ Phone::T, "T" },
|
||||
{ Phone::D, "D" },
|
||||
{ Phone::K, "K" },
|
||||
{ Phone::G, "G" },
|
||||
{ Phone::CH, "CH" },
|
||||
{ Phone::JH, "JH" },
|
||||
{ Phone::F, "F" },
|
||||
{ Phone::V, "V" },
|
||||
{ Phone::TH, "TH" },
|
||||
{ Phone::DH, "DH" },
|
||||
{ Phone::S, "S" },
|
||||
{ Phone::Z, "Z" },
|
||||
{ Phone::SH, "SH" },
|
||||
{ Phone::ZH, "ZH" },
|
||||
{ Phone::HH, "HH" },
|
||||
{ Phone::M, "M" },
|
||||
{ Phone::N, "N" },
|
||||
{ Phone::NG, "NG" },
|
||||
{ Phone::L, "L" },
|
||||
{ Phone::R, "R" },
|
||||
{ Phone::Y, "Y" },
|
||||
{ Phone::W, "W" },
|
||||
|
||||
{ Phone::Breath, "Breath" },
|
||||
{ Phone::Cough, "Cough" },
|
||||
{ Phone::Smack, "Smack" },
|
||||
{ Phone::Noise, "Noise" }
|
||||
};
|
||||
{ Phone::Breath, "Breath" },
|
||||
{ Phone::Cough, "Cough" },
|
||||
{ Phone::Smack, "Smack" },
|
||||
{ Phone::Noise, "Noise" }
|
||||
};
|
||||
}
|
||||
|
||||
optional<Phone> PhoneConverter::tryParse(const string& s) {
|
||||
auto result = EnumConverter<Phone>::tryParse(s);
|
||||
if (result) return result;
|
||||
auto result = EnumConverter<Phone>::tryParse(s);
|
||||
if (result) return result;
|
||||
|
||||
if (s == "+BREATH+") {
|
||||
return Phone::Breath;
|
||||
}
|
||||
if (s == "+COUGH+") {
|
||||
return Phone::Cough;
|
||||
}
|
||||
if (s == "+SMACK+") {
|
||||
return Phone::Smack;
|
||||
}
|
||||
return Phone::Noise;
|
||||
if (s == "+BREATH+") {
|
||||
return Phone::Breath;
|
||||
}
|
||||
if (s == "+COUGH+") {
|
||||
return Phone::Cough;
|
||||
}
|
||||
if (s == "+SMACK+") {
|
||||
return Phone::Smack;
|
||||
}
|
||||
return Phone::Noise;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, Phone value) {
|
||||
return PhoneConverter::get().write(stream, value);
|
||||
return PhoneConverter::get().write(stream, value);
|
||||
}
|
||||
|
||||
std::istream& operator>>(std::istream& stream, Phone& value) {
|
||||
return PhoneConverter::get().read(stream, value);
|
||||
return PhoneConverter::get().read(stream, value);
|
||||
}
|
||||
|
||||
bool isVowel(Phone phone) {
|
||||
return phone <= Phone::LastVowel;
|
||||
return phone <= Phone::LastVowel;
|
||||
}
|
||||
|
|
|
@ -4,88 +4,88 @@
|
|||
|
||||
// Defines a subset of the Arpabet
|
||||
enum class Phone {
|
||||
/////////
|
||||
// Vowels
|
||||
/////////
|
||||
// Vowels
|
||||
|
||||
// ... monophthongs
|
||||
AO, // [ɔ] as in [o]ff, f[a]ll, fr[o]st
|
||||
AA, // [ɑ] as in f[a]ther
|
||||
IY, // [i] as in b[ee], sh[e]
|
||||
UW, // [u] as in y[ou], n[ew], f[oo]d
|
||||
EH, // [ɛ] as in r[e]d, m[e]n
|
||||
IH, // [ɪ] as in b[i]g, w[i]n
|
||||
UH, // [ʊ] as in sh[ou]ld, c[ou]ld
|
||||
AH, // [ʌ] as in b[u]t, s[u]n
|
||||
Schwa, // [ə] as in [a]lone, disc[u]s
|
||||
AE, // [æ] as in [a]t, b[a]t
|
||||
// ... monophthongs
|
||||
AO, // [ɔ] as in [o]ff, f[a]ll, fr[o]st
|
||||
AA, // [ɑ] as in f[a]ther
|
||||
IY, // [i] as in b[ee], sh[e]
|
||||
UW, // [u] as in y[ou], n[ew], f[oo]d
|
||||
EH, // [ɛ] as in r[e]d, m[e]n
|
||||
IH, // [ɪ] as in b[i]g, w[i]n
|
||||
UH, // [ʊ] as in sh[ou]ld, c[ou]ld
|
||||
AH, // [ʌ] as in b[u]t, s[u]n
|
||||
Schwa, // [ə] as in [a]lone, disc[u]s
|
||||
AE, // [æ] as in [a]t, b[a]t
|
||||
|
||||
// ... diphthongs
|
||||
EY, // [eɪ] as in s[ay], [ei]ght
|
||||
AY, // [aɪ] as in m[y], wh[y], r[i]de
|
||||
OW, // [oʊ] as in sh[ow], c[oa]t
|
||||
AW, // [aʊ] as in h[ow], n[ow]
|
||||
OY, // [ɔɪ] as in b[oy], t[oy]
|
||||
// ... diphthongs
|
||||
EY, // [eɪ] as in s[ay], [ei]ght
|
||||
AY, // [aɪ] as in m[y], wh[y], r[i]de
|
||||
OW, // [oʊ] as in sh[ow], c[oa]t
|
||||
AW, // [aʊ] as in h[ow], n[ow]
|
||||
OY, // [ɔɪ] as in b[oy], t[oy]
|
||||
|
||||
// ... r-colored
|
||||
ER, // [ɝ] as in h[er], b[ir]d, h[ur]t
|
||||
LastVowel = ER,
|
||||
// ... r-colored
|
||||
ER, // [ɝ] as in h[er], b[ir]d, h[ur]t
|
||||
LastVowel = ER,
|
||||
|
||||
/////////////
|
||||
// Consonants
|
||||
/////////////
|
||||
// Consonants
|
||||
|
||||
// ... stops
|
||||
P, // [p] as in [p]ay
|
||||
B, // [b] as in [b]uy
|
||||
T, // [t] as in [t]ake
|
||||
D, // [d] as in [d]ay
|
||||
K, // [k] as in [k]ey
|
||||
G, // [g] as in [g]o
|
||||
// ... stops
|
||||
P, // [p] as in [p]ay
|
||||
B, // [b] as in [b]uy
|
||||
T, // [t] as in [t]ake
|
||||
D, // [d] as in [d]ay
|
||||
K, // [k] as in [k]ey
|
||||
G, // [g] as in [g]o
|
||||
|
||||
// ... affricates
|
||||
CH, // [tʃ] as in [ch]air
|
||||
JH, // [dʒ] as in [j]ust
|
||||
// ... affricates
|
||||
CH, // [tʃ] as in [ch]air
|
||||
JH, // [dʒ] as in [j]ust
|
||||
|
||||
// ... fricatives
|
||||
F, // [f] as in [f]or
|
||||
V, // [v] as in [v]ery
|
||||
TH, // [θ] as in [th]anks
|
||||
DH, // [ð] as in [th]at
|
||||
S, // [s] as in [s]ay
|
||||
Z, // [z] as in [z]oo
|
||||
SH, // [ʃ] as in [sh]ow
|
||||
ZH, // [ʒ] as in mea[s]ure, plea[s]ure
|
||||
HH, // [h] as in [h]ouse
|
||||
// ... fricatives
|
||||
F, // [f] as in [f]or
|
||||
V, // [v] as in [v]ery
|
||||
TH, // [θ] as in [th]anks
|
||||
DH, // [ð] as in [th]at
|
||||
S, // [s] as in [s]ay
|
||||
Z, // [z] as in [z]oo
|
||||
SH, // [ʃ] as in [sh]ow
|
||||
ZH, // [ʒ] as in mea[s]ure, plea[s]ure
|
||||
HH, // [h] as in [h]ouse
|
||||
|
||||
// ... nasals
|
||||
M, // [m] as in [m]an
|
||||
N, // [n] as in [no]
|
||||
NG, // [ŋ] as in si[ng]
|
||||
// ... nasals
|
||||
M, // [m] as in [m]an
|
||||
N, // [n] as in [no]
|
||||
NG, // [ŋ] as in si[ng]
|
||||
|
||||
// ... liquids
|
||||
L, // [ɫ] as in [l]ate
|
||||
R, // [r, ɹ] as in [r]un
|
||||
// ... liquids
|
||||
L, // [ɫ] as in [l]ate
|
||||
R, // [r, ɹ] as in [r]un
|
||||
|
||||
// ... semivowels
|
||||
Y, // [j] as in [y]es
|
||||
W, // [w] as in [w]ay
|
||||
// ... semivowels
|
||||
Y, // [j] as in [y]es
|
||||
W, // [w] as in [w]ay
|
||||
|
||||
/////////////
|
||||
// Misc.
|
||||
/////////////
|
||||
// Misc.
|
||||
|
||||
Breath,
|
||||
Cough,
|
||||
Smack,
|
||||
Noise
|
||||
Breath,
|
||||
Cough,
|
||||
Smack,
|
||||
Noise
|
||||
};
|
||||
|
||||
class PhoneConverter : public EnumConverter<Phone> {
|
||||
public:
|
||||
static PhoneConverter& get();
|
||||
static PhoneConverter& get();
|
||||
protected:
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
public:
|
||||
boost::optional<Phone> tryParse(const std::string& s) override;
|
||||
boost::optional<Phone> tryParse(const std::string& s) override;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, Phone value);
|
||||
|
|
|
@ -4,54 +4,54 @@ using std::string;
|
|||
using std::set;
|
||||
|
||||
ShapeConverter& ShapeConverter::get() {
|
||||
static ShapeConverter converter;
|
||||
return converter;
|
||||
static ShapeConverter converter;
|
||||
return converter;
|
||||
}
|
||||
|
||||
set<Shape> ShapeConverter::getBasicShapes() {
|
||||
static const set<Shape> result = [] {
|
||||
set<Shape> result;
|
||||
for (int i = 0; i <= static_cast<int>(Shape::LastBasicShape); ++i) {
|
||||
result.insert(static_cast<Shape>(i));
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
return result;
|
||||
static const set<Shape> result = [] {
|
||||
set<Shape> result;
|
||||
for (int i = 0; i <= static_cast<int>(Shape::LastBasicShape); ++i) {
|
||||
result.insert(static_cast<Shape>(i));
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Shape> ShapeConverter::getExtendedShapes() {
|
||||
static const set<Shape> result = [] {
|
||||
set<Shape> result;
|
||||
for (int i = static_cast<int>(Shape::LastBasicShape) + 1; i < static_cast<int>(Shape::EndSentinel); ++i) {
|
||||
result.insert(static_cast<Shape>(i));
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
return result;
|
||||
static const set<Shape> result = [] {
|
||||
set<Shape> result;
|
||||
for (int i = static_cast<int>(Shape::LastBasicShape) + 1; i < static_cast<int>(Shape::EndSentinel); ++i) {
|
||||
result.insert(static_cast<Shape>(i));
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
return result;
|
||||
}
|
||||
|
||||
string ShapeConverter::getTypeName() {
|
||||
return "Shape";
|
||||
return "Shape";
|
||||
}
|
||||
|
||||
EnumConverter<Shape>::member_data ShapeConverter::getMemberData() {
|
||||
return member_data {
|
||||
{ Shape::A, "A" },
|
||||
{ Shape::B, "B" },
|
||||
{ Shape::C, "C" },
|
||||
{ Shape::D, "D" },
|
||||
{ Shape::E, "E" },
|
||||
{ Shape::F, "F" },
|
||||
{ Shape::G, "G" },
|
||||
{ Shape::H, "H" },
|
||||
{ Shape::X, "X" }
|
||||
};
|
||||
return member_data {
|
||||
{ Shape::A, "A" },
|
||||
{ Shape::B, "B" },
|
||||
{ Shape::C, "C" },
|
||||
{ Shape::D, "D" },
|
||||
{ Shape::E, "E" },
|
||||
{ Shape::F, "F" },
|
||||
{ Shape::G, "G" },
|
||||
{ Shape::H, "H" },
|
||||
{ Shape::X, "X" }
|
||||
};
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, Shape value) {
|
||||
return ShapeConverter::get().write(stream, value);
|
||||
return ShapeConverter::get().write(stream, value);
|
||||
}
|
||||
|
||||
std::istream& operator>>(std::istream& stream, Shape& value) {
|
||||
return ShapeConverter::get().read(stream, value);
|
||||
return ShapeConverter::get().read(stream, value);
|
||||
}
|
||||
|
|
|
@ -7,33 +7,33 @@
|
|||
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
|
||||
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
|
||||
enum class Shape {
|
||||
// Basic shapes
|
||||
|
||||
A, // Closed mouth (M, B, P)
|
||||
B, // Clenched teeth (most consonants, some vowels like EE as in b[ee])
|
||||
C, // Open mouth (vowels like m[e]n, s[u]n, s[a]y)
|
||||
D, // Mouth wide open (vowels like f[a]ther, b[a]t, wh[y])
|
||||
E, // Rounded mouth (vowels like [o]ff)
|
||||
F, // Puckered lips (y[ou], b[o]y, [w]ay)
|
||||
LastBasicShape = F,
|
||||
// Basic shapes
|
||||
|
||||
A, // Closed mouth (M, B, P)
|
||||
B, // Clenched teeth (most consonants, some vowels like EE as in b[ee])
|
||||
C, // Open mouth (vowels like m[e]n, s[u]n, s[a]y)
|
||||
D, // Mouth wide open (vowels like f[a]ther, b[a]t, wh[y])
|
||||
E, // Rounded mouth (vowels like [o]ff)
|
||||
F, // Puckered lips (y[ou], b[o]y, [w]ay)
|
||||
LastBasicShape = F,
|
||||
|
||||
// Extended shapes
|
||||
// Extended shapes
|
||||
|
||||
G, // "F", "V"
|
||||
H, // "L"
|
||||
X, // Idle
|
||||
G, // "F", "V"
|
||||
H, // "L"
|
||||
X, // Idle
|
||||
|
||||
EndSentinel
|
||||
EndSentinel
|
||||
};
|
||||
|
||||
class ShapeConverter : public EnumConverter<Shape> {
|
||||
public:
|
||||
static ShapeConverter& get();
|
||||
static std::set<Shape> getBasicShapes();
|
||||
static std::set<Shape> getExtendedShapes();
|
||||
static ShapeConverter& get();
|
||||
static std::set<Shape> getBasicShapes();
|
||||
static std::set<Shape> getExtendedShapes();
|
||||
protected:
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, Shape value);
|
||||
|
@ -41,7 +41,7 @@ std::ostream& operator<<(std::ostream& stream, Shape value);
|
|||
std::istream& operator>>(std::istream& stream, Shape& value);
|
||||
|
||||
inline bool isClosed(Shape shape) {
|
||||
return shape == Shape::A || shape == Shape::X;
|
||||
return shape == Shape::A || shape == Shape::X;
|
||||
}
|
||||
|
||||
// A set of mouth shapes.
|
||||
|
|
|
@ -7,66 +7,66 @@ using std::chrono::duration_cast;
|
|||
using std::string;
|
||||
|
||||
DatExporter::DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair) :
|
||||
frameRate(frameRate),
|
||||
convertToPrestonBlair(convertToPrestonBlair),
|
||||
prestonBlairShapeNames {
|
||||
{ Shape::A, "MBP" },
|
||||
{ Shape::B, "etc" },
|
||||
{ Shape::C, "E" },
|
||||
{ Shape::D, "AI" },
|
||||
{ Shape::E, "O" },
|
||||
{ Shape::F, "U" },
|
||||
{ Shape::G, "FV" },
|
||||
{ Shape::H, "L" },
|
||||
{ Shape::X, "rest" },
|
||||
}
|
||||
frameRate(frameRate),
|
||||
convertToPrestonBlair(convertToPrestonBlair),
|
||||
prestonBlairShapeNames {
|
||||
{ Shape::A, "MBP" },
|
||||
{ Shape::B, "etc" },
|
||||
{ Shape::C, "E" },
|
||||
{ Shape::D, "AI" },
|
||||
{ Shape::E, "O" },
|
||||
{ Shape::F, "U" },
|
||||
{ Shape::G, "FV" },
|
||||
{ Shape::H, "L" },
|
||||
{ Shape::X, "rest" },
|
||||
}
|
||||
{
|
||||
// Animation works with a fixed frame rate of 100.
|
||||
// Downsampling to much less than 25 fps may result in dropped frames.
|
||||
// Upsampling to more than 100 fps doesn't make sense.
|
||||
const double minFrameRate = 24.0;
|
||||
const double maxFrameRate = 100.0;
|
||||
// Animation works with a fixed frame rate of 100.
|
||||
// Downsampling to much less than 25 fps may result in dropped frames.
|
||||
// Upsampling to more than 100 fps doesn't make sense.
|
||||
const double minFrameRate = 24.0;
|
||||
const double maxFrameRate = 100.0;
|
||||
|
||||
if (frameRate < minFrameRate || frameRate > maxFrameRate) {
|
||||
throw std::runtime_error(fmt::format("Frame rate must be between {} and {} fps.", minFrameRate, maxFrameRate));
|
||||
}
|
||||
if (frameRate < minFrameRate || frameRate > maxFrameRate) {
|
||||
throw std::runtime_error(fmt::format("Frame rate must be between {} and {} fps.", minFrameRate, maxFrameRate));
|
||||
}
|
||||
|
||||
if (convertToPrestonBlair) {
|
||||
for (Shape shape : targetShapeSet) {
|
||||
if (prestonBlairShapeNames.find(shape) == prestonBlairShapeNames.end()) {
|
||||
throw std::runtime_error(fmt::format("Mouth shape {} cannot be converted to Preston Blair shape names."));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (convertToPrestonBlair) {
|
||||
for (Shape shape : targetShapeSet) {
|
||||
if (prestonBlairShapeNames.find(shape) == prestonBlairShapeNames.end()) {
|
||||
throw std::runtime_error(fmt::format("Mouth shape {} cannot be converted to Preston Blair shape names."));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DatExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
|
||||
outputStream << "MohoSwitch1" << "\n";
|
||||
outputStream << "MohoSwitch1" << "\n";
|
||||
|
||||
// Output shapes with start times
|
||||
int lastFrameNumber = 0;
|
||||
for (auto& timedShape : input.animation) {
|
||||
const int frameNumber = toFrameNumber(timedShape.getStart());
|
||||
if (frameNumber == lastFrameNumber) continue;
|
||||
// Output shapes with start times
|
||||
int lastFrameNumber = 0;
|
||||
for (auto& timedShape : input.animation) {
|
||||
const int frameNumber = toFrameNumber(timedShape.getStart());
|
||||
if (frameNumber == lastFrameNumber) continue;
|
||||
|
||||
const string shapeName = toString(timedShape.getValue());
|
||||
outputStream << frameNumber << " " << shapeName << "\n";
|
||||
lastFrameNumber = frameNumber;
|
||||
}
|
||||
const string shapeName = toString(timedShape.getValue());
|
||||
outputStream << frameNumber << " " << shapeName << "\n";
|
||||
lastFrameNumber = frameNumber;
|
||||
}
|
||||
|
||||
// Output closed mouth with end time
|
||||
int frameNumber = toFrameNumber(input.animation.getRange().getEnd());
|
||||
if (frameNumber == lastFrameNumber) ++frameNumber;
|
||||
const string shapeName = toString(convertToTargetShapeSet(Shape::X, input.targetShapeSet));
|
||||
outputStream << frameNumber << " " << shapeName << "\n";
|
||||
// Output closed mouth with end time
|
||||
int frameNumber = toFrameNumber(input.animation.getRange().getEnd());
|
||||
if (frameNumber == lastFrameNumber) ++frameNumber;
|
||||
const string shapeName = toString(convertToTargetShapeSet(Shape::X, input.targetShapeSet));
|
||||
outputStream << frameNumber << " " << shapeName << "\n";
|
||||
}
|
||||
|
||||
string DatExporter::toString(Shape shape) const {
|
||||
return convertToPrestonBlair
|
||||
? prestonBlairShapeNames.at(shape)
|
||||
: boost::lexical_cast<std::string>(shape);
|
||||
return convertToPrestonBlair
|
||||
? prestonBlairShapeNames.at(shape)
|
||||
: boost::lexical_cast<std::string>(shape);
|
||||
}
|
||||
|
||||
int DatExporter::toFrameNumber(centiseconds time) const {
|
||||
return 1 + static_cast<int>(frameRate * duration_cast<duration<double>>(time).count());
|
||||
return 1 + static_cast<int>(frameRate * duration_cast<duration<double>>(time).count());
|
||||
}
|
||||
|
|
|
@ -8,14 +8,14 @@
|
|||
// Exporter for Moho's switch data file format
|
||||
class DatExporter : public Exporter {
|
||||
public:
|
||||
DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair);
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
DatExporter(const ShapeSet& targetShapeSet, double frameRate, bool convertToPrestonBlair);
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
|
||||
private:
|
||||
int toFrameNumber(centiseconds time) const;
|
||||
std::string toString(Shape shape) const;
|
||||
int toFrameNumber(centiseconds time) const;
|
||||
std::string toString(Shape shape) const;
|
||||
|
||||
double frameRate;
|
||||
bool convertToPrestonBlair;
|
||||
std::map<Shape, std::string> prestonBlairShapeNames;
|
||||
double frameRate;
|
||||
bool convertToPrestonBlair;
|
||||
std::map<Shape, std::string> prestonBlairShapeNames;
|
||||
};
|
||||
|
|
|
@ -6,21 +6,21 @@
|
|||
|
||||
class ExporterInput {
|
||||
public:
|
||||
ExporterInput(
|
||||
const std::filesystem::path& inputFilePath,
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet) :
|
||||
inputFilePath(inputFilePath),
|
||||
animation(animation),
|
||||
targetShapeSet(targetShapeSet) {}
|
||||
ExporterInput(
|
||||
const std::filesystem::path& inputFilePath,
|
||||
const JoiningContinuousTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet) :
|
||||
inputFilePath(inputFilePath),
|
||||
animation(animation),
|
||||
targetShapeSet(targetShapeSet) {}
|
||||
|
||||
std::filesystem::path inputFilePath;
|
||||
JoiningContinuousTimeline<Shape> animation;
|
||||
ShapeSet targetShapeSet;
|
||||
std::filesystem::path inputFilePath;
|
||||
JoiningContinuousTimeline<Shape> animation;
|
||||
ShapeSet targetShapeSet;
|
||||
};
|
||||
|
||||
class Exporter {
|
||||
public:
|
||||
virtual ~Exporter() {}
|
||||
virtual void exportAnimation(const ExporterInput& input, std::ostream& outputStream) = 0;
|
||||
virtual ~Exporter() {}
|
||||
virtual void exportAnimation(const ExporterInput& input, std::ostream& outputStream) = 0;
|
||||
};
|
||||
|
|
|
@ -5,24 +5,24 @@
|
|||
using std::string;
|
||||
|
||||
void JsonExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
|
||||
// Export as JSON.
|
||||
// I'm not using a library because the code is short enough without one and it lets me control
|
||||
// the formatting.
|
||||
outputStream << "{\n";
|
||||
outputStream << " \"metadata\": {\n";
|
||||
outputStream << " \"soundFile\": \"" << escapeJsonString(absolute(input.inputFilePath).u8string()) << "\",\n";
|
||||
outputStream << " \"duration\": " << formatDuration(input.animation.getRange().getDuration()) << "\n";
|
||||
outputStream << " },\n";
|
||||
outputStream << " \"mouthCues\": [\n";
|
||||
bool isFirst = true;
|
||||
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
|
||||
if (!isFirst) outputStream << ",\n";
|
||||
isFirst = false;
|
||||
outputStream << " { \"start\": " << formatDuration(timedShape.getStart())
|
||||
<< ", \"end\": " << formatDuration(timedShape.getEnd())
|
||||
<< ", \"value\": \"" << timedShape.getValue() << "\" }";
|
||||
}
|
||||
outputStream << "\n";
|
||||
outputStream << " ]\n";
|
||||
outputStream << "}\n";
|
||||
// Export as JSON.
|
||||
// I'm not using a library because the code is short enough without one and it lets me control
|
||||
// the formatting.
|
||||
outputStream << "{\n";
|
||||
outputStream << " \"metadata\": {\n";
|
||||
outputStream << " \"soundFile\": \"" << escapeJsonString(absolute(input.inputFilePath).u8string()) << "\",\n";
|
||||
outputStream << " \"duration\": " << formatDuration(input.animation.getRange().getDuration()) << "\n";
|
||||
outputStream << " },\n";
|
||||
outputStream << " \"mouthCues\": [\n";
|
||||
bool isFirst = true;
|
||||
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
|
||||
if (!isFirst) outputStream << ",\n";
|
||||
isFirst = false;
|
||||
outputStream << " { \"start\": " << formatDuration(timedShape.getStart())
|
||||
<< ", \"end\": " << formatDuration(timedShape.getEnd())
|
||||
<< ", \"value\": \"" << timedShape.getValue() << "\" }";
|
||||
}
|
||||
outputStream << "\n";
|
||||
outputStream << " ]\n";
|
||||
outputStream << "}\n";
|
||||
}
|
||||
|
|
|
@ -4,5 +4,5 @@
|
|||
|
||||
class JsonExporter : public Exporter {
|
||||
public:
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
};
|
||||
|
|
|
@ -2,19 +2,19 @@
|
|||
#include "animation/targetShapeSet.h"
|
||||
|
||||
void TsvExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
|
||||
// Output shapes with start times
|
||||
for (auto& timedShape : input.animation) {
|
||||
outputStream
|
||||
<< formatDuration(timedShape.getStart())
|
||||
<< "\t"
|
||||
<< timedShape.getValue()
|
||||
<< "\n";
|
||||
}
|
||||
// Output shapes with start times
|
||||
for (auto& timedShape : input.animation) {
|
||||
outputStream
|
||||
<< formatDuration(timedShape.getStart())
|
||||
<< "\t"
|
||||
<< timedShape.getValue()
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
// Output closed mouth with end time
|
||||
outputStream
|
||||
<< formatDuration(input.animation.getRange().getEnd())
|
||||
<< "\t"
|
||||
<< convertToTargetShapeSet(Shape::X, input.targetShapeSet)
|
||||
<< "\n";
|
||||
// Output closed mouth with end time
|
||||
outputStream
|
||||
<< formatDuration(input.animation.getRange().getEnd())
|
||||
<< "\t"
|
||||
<< convertToTargetShapeSet(Shape::X, input.targetShapeSet)
|
||||
<< "\n";
|
||||
}
|
||||
|
|
|
@ -4,6 +4,6 @@
|
|||
|
||||
class TsvExporter : public Exporter {
|
||||
public:
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
};
|
||||
|
||||
|
|
|
@ -8,33 +8,33 @@ using std::string;
|
|||
using boost::property_tree::ptree;
|
||||
|
||||
void XmlExporter::exportAnimation(const ExporterInput& input, std::ostream& outputStream) {
|
||||
ptree tree;
|
||||
ptree tree;
|
||||
|
||||
// Add metadata
|
||||
tree.put("rhubarbResult.metadata.soundFile", absolute(input.inputFilePath).u8string());
|
||||
tree.put(
|
||||
"rhubarbResult.metadata.duration",
|
||||
formatDuration(input.animation.getRange().getDuration())
|
||||
);
|
||||
// Add metadata
|
||||
tree.put("rhubarbResult.metadata.soundFile", absolute(input.inputFilePath).u8string());
|
||||
tree.put(
|
||||
"rhubarbResult.metadata.duration",
|
||||
formatDuration(input.animation.getRange().getDuration())
|
||||
);
|
||||
|
||||
// Add mouth cues
|
||||
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
|
||||
ptree& mouthCueElement = tree.add(
|
||||
"rhubarbResult.mouthCues.mouthCue",
|
||||
timedShape.getValue()
|
||||
);
|
||||
mouthCueElement.put("<xmlattr>.start", formatDuration(timedShape.getStart()));
|
||||
mouthCueElement.put("<xmlattr>.end", formatDuration(timedShape.getEnd()));
|
||||
}
|
||||
// Add mouth cues
|
||||
for (auto& timedShape : dummyShapeIfEmpty(input.animation, input.targetShapeSet)) {
|
||||
ptree& mouthCueElement = tree.add(
|
||||
"rhubarbResult.mouthCues.mouthCue",
|
||||
timedShape.getValue()
|
||||
);
|
||||
mouthCueElement.put("<xmlattr>.start", formatDuration(timedShape.getStart()));
|
||||
mouthCueElement.put("<xmlattr>.end", formatDuration(timedShape.getEnd()));
|
||||
}
|
||||
|
||||
#ifndef BOOST_VERSION //present in version.hpp
|
||||
#error "Could not detect Boost version."
|
||||
#ifndef BOOST_VERSION //present in version.hpp
|
||||
#error "Could not detect Boost version."
|
||||
#endif
|
||||
|
||||
#if BOOST_VERSION < 105600 // Support legacy syntax
|
||||
using writer_setting = boost::property_tree::xml_writer_settings<char>;
|
||||
using writer_setting = boost::property_tree::xml_writer_settings<char>;
|
||||
#else
|
||||
using writer_setting = boost::property_tree::xml_writer_settings<string>;
|
||||
using writer_setting = boost::property_tree::xml_writer_settings<string>;
|
||||
#endif
|
||||
write_xml(outputStream, tree, writer_setting(' ', 2));
|
||||
write_xml(outputStream, tree, writer_setting(' ', 2));
|
||||
}
|
||||
|
|
|
@ -4,5 +4,5 @@
|
|||
|
||||
class XmlExporter : public Exporter {
|
||||
public:
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
void exportAnimation(const ExporterInput& input, std::ostream& outputStream) override;
|
||||
};
|
||||
|
|
|
@ -3,14 +3,14 @@
|
|||
|
||||
// Makes sure there is at least one mouth shape
|
||||
std::vector<Timed<Shape>> dummyShapeIfEmpty(
|
||||
const JoiningTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
const JoiningTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
) {
|
||||
std::vector<Timed<Shape>> result;
|
||||
std::copy(animation.begin(), animation.end(), std::back_inserter(result));
|
||||
if (result.empty()) {
|
||||
// Add zero-length empty mouth
|
||||
result.emplace_back(0_cs, 0_cs, convertToTargetShapeSet(Shape::X, targetShapeSet));
|
||||
}
|
||||
return result;
|
||||
std::vector<Timed<Shape>> result;
|
||||
std::copy(animation.begin(), animation.end(), std::back_inserter(result));
|
||||
if (result.empty()) {
|
||||
// Add zero-length empty mouth
|
||||
result.emplace_back(0_cs, 0_cs, convertToTargetShapeSet(Shape::X, targetShapeSet));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,6 @@
|
|||
|
||||
// Makes sure there is at least one mouth shape
|
||||
std::vector<Timed<Shape>> dummyShapeIfEmpty(
|
||||
const JoiningTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
const JoiningTimeline<Shape>& animation,
|
||||
const ShapeSet& targetShapeSet
|
||||
);
|
||||
|
|
|
@ -9,27 +9,27 @@ using std::string;
|
|||
using std::filesystem::path;
|
||||
|
||||
JoiningContinuousTimeline<Shape> animateAudioClip(
|
||||
const AudioClip& audioClip,
|
||||
const optional<string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink)
|
||||
const AudioClip& audioClip,
|
||||
const optional<string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink)
|
||||
{
|
||||
const BoundedTimeline<Phone> phones =
|
||||
recognizer.recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
|
||||
JoiningContinuousTimeline<Shape> result = animate(phones, targetShapeSet);
|
||||
return result;
|
||||
const BoundedTimeline<Phone> phones =
|
||||
recognizer.recognizePhones(audioClip, dialog, maxThreadCount, progressSink);
|
||||
JoiningContinuousTimeline<Shape> result = animate(phones, targetShapeSet);
|
||||
return result;
|
||||
}
|
||||
|
||||
JoiningContinuousTimeline<Shape> animateWaveFile(
|
||||
path filePath,
|
||||
const optional<string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink)
|
||||
path filePath,
|
||||
const optional<string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink)
|
||||
{
|
||||
const auto audioClip = createAudioFileClip(filePath);
|
||||
return animateAudioClip(*audioClip, dialog, recognizer, targetShapeSet, maxThreadCount, progressSink);
|
||||
const auto audioClip = createAudioFileClip(filePath);
|
||||
return animateAudioClip(*audioClip, dialog, recognizer, targetShapeSet, maxThreadCount, progressSink);
|
||||
}
|
||||
|
|
|
@ -9,17 +9,17 @@
|
|||
#include "recognition/Recognizer.h"
|
||||
|
||||
JoiningContinuousTimeline<Shape> animateAudioClip(
|
||||
const AudioClip& audioClip,
|
||||
const boost::optional<std::string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink);
|
||||
const AudioClip& audioClip,
|
||||
const boost::optional<std::string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink);
|
||||
|
||||
JoiningContinuousTimeline<Shape> animateWaveFile(
|
||||
std::filesystem::path filePath,
|
||||
const boost::optional<std::string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink);
|
||||
std::filesystem::path filePath,
|
||||
const boost::optional<std::string>& dialog,
|
||||
const Recognizer& recognizer,
|
||||
const ShapeSet& targetShapeSet,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink);
|
||||
|
|
|
@ -10,30 +10,30 @@ using std::string;
|
|||
|
||||
namespace logging {
|
||||
|
||||
// Returns an int representing the current thread.
|
||||
// This used to be a simple thread_local variable, but Xcode doesn't support that yet
|
||||
int getThreadCounter() {
|
||||
using thread_id = std::thread::id;
|
||||
// Returns an int representing the current thread.
|
||||
// This used to be a simple thread_local variable, but Xcode doesn't support that yet
|
||||
int getThreadCounter() {
|
||||
using thread_id = std::thread::id;
|
||||
|
||||
static std::mutex counterMutex;
|
||||
lock_guard<std::mutex> lock(counterMutex);
|
||||
static std::mutex counterMutex;
|
||||
lock_guard<std::mutex> lock(counterMutex);
|
||||
|
||||
static unordered_map<thread_id, int> threadCounters;
|
||||
static int lastThreadId = 0;
|
||||
thread_id threadId = std::this_thread::get_id();
|
||||
if (threadCounters.find(threadId) == threadCounters.end()) {
|
||||
threadCounters.insert({ threadId, ++lastThreadId });
|
||||
}
|
||||
return threadCounters.find(threadId)->second;
|
||||
}
|
||||
static unordered_map<thread_id, int> threadCounters;
|
||||
static int lastThreadId = 0;
|
||||
thread_id threadId = std::this_thread::get_id();
|
||||
if (threadCounters.find(threadId) == threadCounters.end()) {
|
||||
threadCounters.insert({ threadId, ++lastThreadId });
|
||||
}
|
||||
return threadCounters.find(threadId)->second;
|
||||
}
|
||||
|
||||
Entry::Entry(Level level, const string& message) :
|
||||
timestamp(),
|
||||
level(level),
|
||||
message(message)
|
||||
{
|
||||
time(×tamp);
|
||||
this->threadCounter = getThreadCounter();
|
||||
}
|
||||
Entry::Entry(Level level, const string& message) :
|
||||
timestamp(),
|
||||
level(level),
|
||||
message(message)
|
||||
{
|
||||
time(×tamp);
|
||||
this->threadCounter = getThreadCounter();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
#include "Level.h"
|
||||
|
||||
namespace logging {
|
||||
|
||||
struct Entry {
|
||||
Entry(Level level, const std::string& message);
|
||||
virtual ~Entry() = default;
|
||||
|
||||
struct Entry {
|
||||
Entry(Level level, const std::string& message);
|
||||
virtual ~Entry() = default;
|
||||
|
||||
time_t timestamp;
|
||||
int threadCounter;
|
||||
Level level;
|
||||
std::string message;
|
||||
};
|
||||
time_t timestamp;
|
||||
int threadCounter;
|
||||
Level level;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
|
||||
namespace logging {
|
||||
|
||||
class Formatter {
|
||||
public:
|
||||
virtual ~Formatter() = default;
|
||||
virtual std::string format(const Entry& entry) = 0;
|
||||
};
|
||||
class Formatter {
|
||||
public:
|
||||
virtual ~Formatter() = default;
|
||||
virtual std::string format(const Entry& entry) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -4,32 +4,32 @@ using std::string;
|
|||
|
||||
namespace logging {
|
||||
|
||||
LevelConverter& LevelConverter::get() {
|
||||
static LevelConverter converter;
|
||||
return converter;
|
||||
}
|
||||
LevelConverter& LevelConverter::get() {
|
||||
static LevelConverter converter;
|
||||
return converter;
|
||||
}
|
||||
|
||||
string LevelConverter::getTypeName() {
|
||||
return "Level";
|
||||
}
|
||||
string LevelConverter::getTypeName() {
|
||||
return "Level";
|
||||
}
|
||||
|
||||
EnumConverter<Level>::member_data LevelConverter::getMemberData() {
|
||||
return member_data {
|
||||
{ Level::Trace, "Trace" },
|
||||
{ Level::Debug, "Debug" },
|
||||
{ Level::Info, "Info" },
|
||||
{ Level::Warn, "Warn" },
|
||||
{ Level::Error, "Error" },
|
||||
{ Level::Fatal, "Fatal" }
|
||||
};
|
||||
}
|
||||
EnumConverter<Level>::member_data LevelConverter::getMemberData() {
|
||||
return member_data {
|
||||
{ Level::Trace, "Trace" },
|
||||
{ Level::Debug, "Debug" },
|
||||
{ Level::Info, "Info" },
|
||||
{ Level::Warn, "Warn" },
|
||||
{ Level::Error, "Error" },
|
||||
{ Level::Fatal, "Fatal" }
|
||||
};
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, Level value) {
|
||||
return LevelConverter::get().write(stream, value);
|
||||
}
|
||||
std::ostream& operator<<(std::ostream& stream, Level value) {
|
||||
return LevelConverter::get().write(stream, value);
|
||||
}
|
||||
|
||||
std::istream& operator >>(std::istream& stream, Level& value) {
|
||||
return LevelConverter::get().read(stream, value);
|
||||
}
|
||||
std::istream& operator >>(std::istream& stream, Level& value) {
|
||||
return LevelConverter::get().read(stream, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,26 +4,26 @@
|
|||
|
||||
namespace logging {
|
||||
|
||||
enum class Level {
|
||||
Trace,
|
||||
Debug,
|
||||
Info,
|
||||
Warn,
|
||||
Error,
|
||||
Fatal,
|
||||
EndSentinel
|
||||
};
|
||||
enum class Level {
|
||||
Trace,
|
||||
Debug,
|
||||
Info,
|
||||
Warn,
|
||||
Error,
|
||||
Fatal,
|
||||
EndSentinel
|
||||
};
|
||||
|
||||
class LevelConverter : public EnumConverter<Level> {
|
||||
public:
|
||||
static LevelConverter& get();
|
||||
protected:
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
};
|
||||
class LevelConverter : public EnumConverter<Level> {
|
||||
public:
|
||||
static LevelConverter& get();
|
||||
protected:
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, Level value);
|
||||
std::ostream& operator<<(std::ostream& stream, Level value);
|
||||
|
||||
std::istream& operator >>(std::istream& stream, Level& value);
|
||||
std::istream& operator >>(std::istream& stream, Level& value);
|
||||
|
||||
}
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
|
||||
namespace logging {
|
||||
|
||||
class Sink {
|
||||
public:
|
||||
virtual ~Sink() = default;
|
||||
virtual void receive(const Entry& entry) = 0;
|
||||
};
|
||||
class Sink {
|
||||
public:
|
||||
virtual ~Sink() = default;
|
||||
virtual void receive(const Entry& entry) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -7,17 +7,17 @@ using std::string;
|
|||
|
||||
namespace logging {
|
||||
|
||||
string SimpleConsoleFormatter::format(const Entry& entry) {
|
||||
return fmt::format("[{0}] {1}", entry.level, entry.message);
|
||||
}
|
||||
string SimpleConsoleFormatter::format(const Entry& entry) {
|
||||
return fmt::format("[{0}] {1}", entry.level, entry.message);
|
||||
}
|
||||
|
||||
string SimpleFileFormatter::format(const Entry& entry) {
|
||||
return fmt::format(
|
||||
"[{0}] {1} {2}",
|
||||
formatTime(entry.timestamp, "%F %H:%M:%S"),
|
||||
entry.threadCounter,
|
||||
consoleFormatter.format(entry)
|
||||
);
|
||||
}
|
||||
string SimpleFileFormatter::format(const Entry& entry) {
|
||||
return fmt::format(
|
||||
"[{0}] {1} {2}",
|
||||
formatTime(entry.timestamp, "%F %H:%M:%S"),
|
||||
entry.threadCounter,
|
||||
consoleFormatter.format(entry)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,16 +4,16 @@
|
|||
|
||||
namespace logging {
|
||||
|
||||
class SimpleConsoleFormatter : public Formatter {
|
||||
public:
|
||||
std::string format(const Entry& entry) override;
|
||||
};
|
||||
class SimpleConsoleFormatter : public Formatter {
|
||||
public:
|
||||
std::string format(const Entry& entry) override;
|
||||
};
|
||||
|
||||
class SimpleFileFormatter : public Formatter {
|
||||
public:
|
||||
std::string format(const Entry& entry) override;
|
||||
private:
|
||||
SimpleConsoleFormatter consoleFormatter;
|
||||
};
|
||||
class SimpleFileFormatter : public Formatter {
|
||||
public:
|
||||
std::string format(const Entry& entry) override;
|
||||
private:
|
||||
SimpleConsoleFormatter consoleFormatter;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -10,46 +10,46 @@ using std::shared_ptr;
|
|||
using std::lock_guard;
|
||||
|
||||
std::mutex& getLogMutex() {
|
||||
static std::mutex mutex;
|
||||
return mutex;
|
||||
static std::mutex mutex;
|
||||
return mutex;
|
||||
}
|
||||
|
||||
vector<shared_ptr<Sink>>& getSinks() {
|
||||
static vector<shared_ptr<Sink>> sinks;
|
||||
return sinks;
|
||||
static vector<shared_ptr<Sink>> sinks;
|
||||
return sinks;
|
||||
}
|
||||
|
||||
bool logging::addSink(shared_ptr<Sink> sink) {
|
||||
lock_guard<std::mutex> lock(getLogMutex());
|
||||
lock_guard<std::mutex> lock(getLogMutex());
|
||||
|
||||
auto& sinks = getSinks();
|
||||
if (std::find(sinks.begin(), sinks.end(), sink) == sinks.end()) {
|
||||
sinks.push_back(sink);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
auto& sinks = getSinks();
|
||||
if (std::find(sinks.begin(), sinks.end(), sink) == sinks.end()) {
|
||||
sinks.push_back(sink);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool logging::removeSink(std::shared_ptr<Sink> sink) {
|
||||
lock_guard<std::mutex> lock(getLogMutex());
|
||||
lock_guard<std::mutex> lock(getLogMutex());
|
||||
|
||||
auto& sinks = getSinks();
|
||||
const auto it = std::find(sinks.begin(), sinks.end(), sink);
|
||||
if (it != sinks.end()) {
|
||||
sinks.erase(it);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
auto& sinks = getSinks();
|
||||
const auto it = std::find(sinks.begin(), sinks.end(), sink);
|
||||
if (it != sinks.end()) {
|
||||
sinks.erase(it);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void logging::log(const Entry& entry) {
|
||||
lock_guard<std::mutex> lock(getLogMutex());
|
||||
for (auto& sink : getSinks()) {
|
||||
sink->receive(entry);
|
||||
}
|
||||
lock_guard<std::mutex> lock(getLogMutex());
|
||||
for (auto& sink : getSinks()) {
|
||||
sink->receive(entry);
|
||||
}
|
||||
}
|
||||
|
||||
void logging::log(Level level, const string& message) {
|
||||
const Entry entry = Entry(level, message);
|
||||
log(entry);
|
||||
const Entry entry = Entry(level, message);
|
||||
log(entry);
|
||||
}
|
||||
|
|
|
@ -6,32 +6,32 @@
|
|||
|
||||
namespace logging {
|
||||
|
||||
bool addSink(std::shared_ptr<Sink> sink);
|
||||
bool addSink(std::shared_ptr<Sink> sink);
|
||||
|
||||
bool removeSink(std::shared_ptr<Sink> sink);
|
||||
bool removeSink(std::shared_ptr<Sink> sink);
|
||||
|
||||
void log(const Entry& entry);
|
||||
void log(const Entry& entry);
|
||||
|
||||
void log(Level level, const std::string& message);
|
||||
void log(Level level, const std::string& message);
|
||||
|
||||
template<typename... Args>
|
||||
void logFormat(Level level, fmt::CStringRef format, const Args&... args) {
|
||||
log(level, fmt::format(format, args...));
|
||||
}
|
||||
template<typename... Args>
|
||||
void logFormat(Level level, fmt::CStringRef format, const Args&... args) {
|
||||
log(level, fmt::format(format, args...));
|
||||
}
|
||||
|
||||
#define LOG_WITH_LEVEL(levelName, levelEnum) \
|
||||
inline void levelName(const std::string& message) { \
|
||||
log(Level::levelEnum, message); \
|
||||
} \
|
||||
template <typename... Args> \
|
||||
void levelName ## Format(fmt::CStringRef format, const Args&... args) { \
|
||||
logFormat(Level::levelEnum, format, args...); \
|
||||
}
|
||||
inline void levelName(const std::string& message) { \
|
||||
log(Level::levelEnum, message); \
|
||||
} \
|
||||
template <typename... Args> \
|
||||
void levelName ## Format(fmt::CStringRef format, const Args&... args) { \
|
||||
logFormat(Level::levelEnum, format, args...); \
|
||||
}
|
||||
|
||||
LOG_WITH_LEVEL(trace, Trace)
|
||||
LOG_WITH_LEVEL(debug, Debug)
|
||||
LOG_WITH_LEVEL(info, Info)
|
||||
LOG_WITH_LEVEL(warn, Warn)
|
||||
LOG_WITH_LEVEL(error, Error)
|
||||
LOG_WITH_LEVEL(fatal, Fatal)
|
||||
LOG_WITH_LEVEL(trace, Trace)
|
||||
LOG_WITH_LEVEL(debug, Debug)
|
||||
LOG_WITH_LEVEL(info, Info)
|
||||
LOG_WITH_LEVEL(warn, Warn)
|
||||
LOG_WITH_LEVEL(error, Error)
|
||||
LOG_WITH_LEVEL(fatal, Fatal)
|
||||
}
|
||||
|
|
|
@ -7,29 +7,29 @@ using std::shared_ptr;
|
|||
|
||||
namespace logging {
|
||||
|
||||
LevelFilter::LevelFilter(shared_ptr<Sink> innerSink, Level minLevel) :
|
||||
innerSink(innerSink),
|
||||
minLevel(minLevel)
|
||||
{}
|
||||
LevelFilter::LevelFilter(shared_ptr<Sink> innerSink, Level minLevel) :
|
||||
innerSink(innerSink),
|
||||
minLevel(minLevel)
|
||||
{}
|
||||
|
||||
void LevelFilter::receive(const Entry& entry) {
|
||||
if (entry.level >= minLevel) {
|
||||
innerSink->receive(entry);
|
||||
}
|
||||
}
|
||||
void LevelFilter::receive(const Entry& entry) {
|
||||
if (entry.level >= minLevel) {
|
||||
innerSink->receive(entry);
|
||||
}
|
||||
}
|
||||
|
||||
StreamSink::StreamSink(shared_ptr<std::ostream> stream, shared_ptr<Formatter> formatter) :
|
||||
stream(stream),
|
||||
formatter(formatter)
|
||||
{}
|
||||
StreamSink::StreamSink(shared_ptr<std::ostream> stream, shared_ptr<Formatter> formatter) :
|
||||
stream(stream),
|
||||
formatter(formatter)
|
||||
{}
|
||||
|
||||
void StreamSink::receive(const Entry& entry) {
|
||||
const string line = formatter->format(entry);
|
||||
*stream << line << std::endl;
|
||||
}
|
||||
void StreamSink::receive(const Entry& entry) {
|
||||
const string line = formatter->format(entry);
|
||||
*stream << line << std::endl;
|
||||
}
|
||||
|
||||
StdErrSink::StdErrSink(shared_ptr<Formatter> formatter) :
|
||||
StreamSink(std::shared_ptr<std::ostream>(&std::cerr, [](void*) {}), formatter)
|
||||
{}
|
||||
StdErrSink::StdErrSink(shared_ptr<Formatter> formatter) :
|
||||
StreamSink(std::shared_ptr<std::ostream>(&std::cerr, [](void*) {}), formatter)
|
||||
{}
|
||||
|
||||
}
|
||||
|
|
|
@ -5,29 +5,29 @@
|
|||
#include "Formatter.h"
|
||||
|
||||
namespace logging {
|
||||
enum class Level;
|
||||
enum class Level;
|
||||
|
||||
class LevelFilter : public Sink {
|
||||
public:
|
||||
LevelFilter(std::shared_ptr<Sink> innerSink, Level minLevel);
|
||||
void receive(const Entry& entry) override;
|
||||
private:
|
||||
std::shared_ptr<Sink> innerSink;
|
||||
Level minLevel;
|
||||
};
|
||||
class LevelFilter : public Sink {
|
||||
public:
|
||||
LevelFilter(std::shared_ptr<Sink> innerSink, Level minLevel);
|
||||
void receive(const Entry& entry) override;
|
||||
private:
|
||||
std::shared_ptr<Sink> innerSink;
|
||||
Level minLevel;
|
||||
};
|
||||
|
||||
class StreamSink : public Sink {
|
||||
public:
|
||||
StreamSink(std::shared_ptr<std::ostream> stream, std::shared_ptr<Formatter> formatter);
|
||||
void receive(const Entry& entry) override;
|
||||
private:
|
||||
std::shared_ptr<std::ostream> stream;
|
||||
std::shared_ptr<Formatter> formatter;
|
||||
};
|
||||
class StreamSink : public Sink {
|
||||
public:
|
||||
StreamSink(std::shared_ptr<std::ostream> stream, std::shared_ptr<Formatter> formatter);
|
||||
void receive(const Entry& entry) override;
|
||||
private:
|
||||
std::shared_ptr<std::ostream> stream;
|
||||
std::shared_ptr<Formatter> formatter;
|
||||
};
|
||||
|
||||
class StdErrSink : public StreamSink {
|
||||
public:
|
||||
explicit StdErrSink(std::shared_ptr<Formatter> formatter);
|
||||
};
|
||||
class StdErrSink : public StreamSink {
|
||||
public:
|
||||
explicit StdErrSink(std::shared_ptr<Formatter> formatter);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -11,103 +11,103 @@ using std::string;
|
|||
using boost::optional;
|
||||
|
||||
static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) {
|
||||
UNUSED(dialog);
|
||||
UNUSED(dialog);
|
||||
|
||||
lambda_unique_ptr<cmd_ln_t> config(
|
||||
cmd_ln_init(
|
||||
nullptr, ps_args(), true,
|
||||
// Set acoustic model
|
||||
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
|
||||
// Set phonetic language model
|
||||
"-allphone", (getSphinxModelDirectory() / "en-us-phone.lm.bin").u8string().c_str(),
|
||||
"-allphone_ci", "yes",
|
||||
// Set language model probability weight.
|
||||
// Low values (<= 0.4) can lead to fluttering animation.
|
||||
// High values (>= 1.0) can lead to imprecise or freezing animation.
|
||||
"-lw", "0.8",
|
||||
// Add noise against zero silence
|
||||
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
|
||||
"-dither", "yes",
|
||||
// Disable VAD -- we're doing that ourselves
|
||||
"-remove_silence", "no",
|
||||
// Perform per-utterance cepstral mean normalization
|
||||
"-cmn", "batch",
|
||||
lambda_unique_ptr<cmd_ln_t> config(
|
||||
cmd_ln_init(
|
||||
nullptr, ps_args(), true,
|
||||
// Set acoustic model
|
||||
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
|
||||
// Set phonetic language model
|
||||
"-allphone", (getSphinxModelDirectory() / "en-us-phone.lm.bin").u8string().c_str(),
|
||||
"-allphone_ci", "yes",
|
||||
// Set language model probability weight.
|
||||
// Low values (<= 0.4) can lead to fluttering animation.
|
||||
// High values (>= 1.0) can lead to imprecise or freezing animation.
|
||||
"-lw", "0.8",
|
||||
// Add noise against zero silence
|
||||
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
|
||||
"-dither", "yes",
|
||||
// Disable VAD -- we're doing that ourselves
|
||||
"-remove_silence", "no",
|
||||
// Perform per-utterance cepstral mean normalization
|
||||
"-cmn", "batch",
|
||||
|
||||
// The following settings are recommended at
|
||||
// http://cmusphinx.sourceforge.net/wiki/phonemerecognition
|
||||
// The following settings are recommended at
|
||||
// http://cmusphinx.sourceforge.net/wiki/phonemerecognition
|
||||
|
||||
// Set beam width applied to every frame in Viterbi search
|
||||
"-beam", "1e-20",
|
||||
// Set beam width applied to phone transitions
|
||||
"-pbeam", "1e-20",
|
||||
nullptr),
|
||||
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
|
||||
if (!config) throw runtime_error("Error creating configuration.");
|
||||
// Set beam width applied to every frame in Viterbi search
|
||||
"-beam", "1e-20",
|
||||
// Set beam width applied to phone transitions
|
||||
"-pbeam", "1e-20",
|
||||
nullptr),
|
||||
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
|
||||
if (!config) throw runtime_error("Error creating configuration.");
|
||||
|
||||
lambda_unique_ptr<ps_decoder_t> decoder(
|
||||
ps_init(config.get()),
|
||||
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
|
||||
if (!decoder) throw runtime_error("Error creating speech decoder.");
|
||||
lambda_unique_ptr<ps_decoder_t> decoder(
|
||||
ps_init(config.get()),
|
||||
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
|
||||
if (!decoder) throw runtime_error("Error creating speech decoder.");
|
||||
|
||||
return decoder;
|
||||
return decoder;
|
||||
}
|
||||
|
||||
static Timeline<Phone> utteranceToPhones(
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utteranceTimeRange,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& utteranceProgressSink
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utteranceTimeRange,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& utteranceProgressSink
|
||||
) {
|
||||
// Pad time range to give PocketSphinx some breathing room
|
||||
TimeRange paddedTimeRange = utteranceTimeRange;
|
||||
const centiseconds padding(3);
|
||||
paddedTimeRange.grow(padding);
|
||||
paddedTimeRange.trim(audioClip.getTruncatedRange());
|
||||
// Pad time range to give PocketSphinx some breathing room
|
||||
TimeRange paddedTimeRange = utteranceTimeRange;
|
||||
const centiseconds padding(3);
|
||||
paddedTimeRange.grow(padding);
|
||||
paddedTimeRange.trim(audioClip.getTruncatedRange());
|
||||
|
||||
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
|
||||
| segment(paddedTimeRange)
|
||||
| resample(sphinxSampleRate);
|
||||
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
|
||||
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
|
||||
| segment(paddedTimeRange)
|
||||
| resample(sphinxSampleRate);
|
||||
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
|
||||
|
||||
// Detect phones (returned as words)
|
||||
BoundedTimeline<string> phoneStrings = recognizeWords(audioBuffer, decoder);
|
||||
phoneStrings.shift(paddedTimeRange.getStart());
|
||||
Timeline<Phone> utterancePhones;
|
||||
for (const auto& timedPhoneString : phoneStrings) {
|
||||
Phone phone = PhoneConverter::get().parse(timedPhoneString.getValue());
|
||||
if (phone == Phone::AH && timedPhoneString.getDuration() < 6_cs) {
|
||||
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
|
||||
phone = Phone::Schwa;
|
||||
}
|
||||
utterancePhones.set(timedPhoneString.getTimeRange(), phone);
|
||||
}
|
||||
// Detect phones (returned as words)
|
||||
BoundedTimeline<string> phoneStrings = recognizeWords(audioBuffer, decoder);
|
||||
phoneStrings.shift(paddedTimeRange.getStart());
|
||||
Timeline<Phone> utterancePhones;
|
||||
for (const auto& timedPhoneString : phoneStrings) {
|
||||
Phone phone = PhoneConverter::get().parse(timedPhoneString.getValue());
|
||||
if (phone == Phone::AH && timedPhoneString.getDuration() < 6_cs) {
|
||||
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
|
||||
phone = Phone::Schwa;
|
||||
}
|
||||
utterancePhones.set(timedPhoneString.getTimeRange(), phone);
|
||||
}
|
||||
|
||||
// Log raw phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("rawPhone", timedPhone);
|
||||
}
|
||||
// Log raw phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("rawPhone", timedPhone);
|
||||
}
|
||||
|
||||
// Guess positions of noise sounds
|
||||
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
|
||||
for (const auto& noiseSound : noiseSounds) {
|
||||
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
|
||||
}
|
||||
// Guess positions of noise sounds
|
||||
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
|
||||
for (const auto& noiseSound : noiseSounds) {
|
||||
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
|
||||
}
|
||||
|
||||
// Log phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("phone", timedPhone);
|
||||
}
|
||||
// Log phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("phone", timedPhone);
|
||||
}
|
||||
|
||||
utteranceProgressSink.reportProgress(1.0);
|
||||
utteranceProgressSink.reportProgress(1.0);
|
||||
|
||||
return utterancePhones;
|
||||
return utterancePhones;
|
||||
}
|
||||
|
||||
BoundedTimeline<Phone> PhoneticRecognizer::recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const {
|
||||
return ::recognizePhones(inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
|
||||
return ::recognizePhones(inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
|
||||
}
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
|
||||
class PhoneticRecognizer : public Recognizer {
|
||||
public:
|
||||
BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const override;
|
||||
BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const override;
|
||||
};
|
||||
|
|
|
@ -27,316 +27,316 @@ using boost::optional;
|
|||
using std::array;
|
||||
|
||||
bool dictionaryContains(dict_t& dictionary, const string& word) {
|
||||
return dict_wordid(&dictionary, word.c_str()) != BAD_S3WID;
|
||||
return dict_wordid(&dictionary, word.c_str()) != BAD_S3WID;
|
||||
}
|
||||
|
||||
s3wid_t getWordId(const string& word, dict_t& dictionary) {
|
||||
const s3wid_t wordId = dict_wordid(&dictionary, word.c_str());
|
||||
if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word));
|
||||
return wordId;
|
||||
const s3wid_t wordId = dict_wordid(&dictionary, word.c_str());
|
||||
if (wordId == BAD_S3WID) throw invalid_argument(fmt::format("Unknown word '{}'.", word));
|
||||
return wordId;
|
||||
}
|
||||
|
||||
void addMissingDictionaryWords(const vector<string>& words, ps_decoder_t& decoder) {
|
||||
map<string, string> missingPronunciations;
|
||||
for (const string& word : words) {
|
||||
if (!dictionaryContains(*decoder.dict, word)) {
|
||||
string pronunciation;
|
||||
for (Phone phone : wordToPhones(word)) {
|
||||
if (pronunciation.length() > 0) pronunciation += " ";
|
||||
pronunciation += PhoneConverter::get().toString(phone);
|
||||
}
|
||||
missingPronunciations[word] = pronunciation;
|
||||
}
|
||||
}
|
||||
for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) {
|
||||
const bool isLast = it == --missingPronunciations.end();
|
||||
logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second);
|
||||
ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast);
|
||||
}
|
||||
map<string, string> missingPronunciations;
|
||||
for (const string& word : words) {
|
||||
if (!dictionaryContains(*decoder.dict, word)) {
|
||||
string pronunciation;
|
||||
for (Phone phone : wordToPhones(word)) {
|
||||
if (pronunciation.length() > 0) pronunciation += " ";
|
||||
pronunciation += PhoneConverter::get().toString(phone);
|
||||
}
|
||||
missingPronunciations[word] = pronunciation;
|
||||
}
|
||||
}
|
||||
for (auto it = missingPronunciations.begin(); it != missingPronunciations.end(); ++it) {
|
||||
const bool isLast = it == --missingPronunciations.end();
|
||||
logging::infoFormat("Unknown word '{}'. Guessing pronunciation '{}'.", it->first, it->second);
|
||||
ps_add_word(&decoder, it->first.c_str(), it->second.c_str(), isLast);
|
||||
}
|
||||
}
|
||||
|
||||
lambda_unique_ptr<ngram_model_t> createDefaultLanguageModel(ps_decoder_t& decoder) {
|
||||
path modelPath = getSphinxModelDirectory() / "en-us.lm.bin";
|
||||
lambda_unique_ptr<ngram_model_t> result(
|
||||
ngram_model_read(decoder.config, modelPath.u8string().c_str(), NGRAM_AUTO, decoder.lmath),
|
||||
[](ngram_model_t* lm) { ngram_model_free(lm); });
|
||||
if (!result) {
|
||||
throw runtime_error(fmt::format("Error reading language model from {}.", modelPath.u8string()));
|
||||
}
|
||||
path modelPath = getSphinxModelDirectory() / "en-us.lm.bin";
|
||||
lambda_unique_ptr<ngram_model_t> result(
|
||||
ngram_model_read(decoder.config, modelPath.u8string().c_str(), NGRAM_AUTO, decoder.lmath),
|
||||
[](ngram_model_t* lm) { ngram_model_free(lm); });
|
||||
if (!result) {
|
||||
throw runtime_error(fmt::format("Error reading language model from {}.", modelPath.u8string()));
|
||||
}
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
lambda_unique_ptr<ngram_model_t> createDialogLanguageModel(
|
||||
ps_decoder_t& decoder,
|
||||
const string& dialog
|
||||
ps_decoder_t& decoder,
|
||||
const string& dialog
|
||||
) {
|
||||
// Split dialog into normalized words
|
||||
vector<string> words = tokenizeText(
|
||||
dialog,
|
||||
[&](const string& word) { return dictionaryContains(*decoder.dict, word); }
|
||||
);
|
||||
// Split dialog into normalized words
|
||||
vector<string> words = tokenizeText(
|
||||
dialog,
|
||||
[&](const string& word) { return dictionaryContains(*decoder.dict, word); }
|
||||
);
|
||||
|
||||
// Add dialog-specific words to the dictionary
|
||||
addMissingDictionaryWords(words, decoder);
|
||||
// Add dialog-specific words to the dictionary
|
||||
addMissingDictionaryWords(words, decoder);
|
||||
|
||||
// Create dialog-specific language model
|
||||
words.insert(words.begin(), "<s>");
|
||||
words.emplace_back("</s>");
|
||||
return createLanguageModel(words, decoder);
|
||||
// Create dialog-specific language model
|
||||
words.insert(words.begin(), "<s>");
|
||||
words.emplace_back("</s>");
|
||||
return createLanguageModel(words, decoder);
|
||||
}
|
||||
|
||||
lambda_unique_ptr<ngram_model_t> createBiasedLanguageModel(
|
||||
ps_decoder_t& decoder,
|
||||
const string& dialog
|
||||
ps_decoder_t& decoder,
|
||||
const string& dialog
|
||||
) {
|
||||
auto defaultLanguageModel = createDefaultLanguageModel(decoder);
|
||||
auto dialogLanguageModel = createDialogLanguageModel(decoder, dialog);
|
||||
constexpr int modelCount = 2;
|
||||
array<ngram_model_t*, modelCount> languageModels {
|
||||
defaultLanguageModel.get(),
|
||||
dialogLanguageModel.get()
|
||||
};
|
||||
array<const char*, modelCount> modelNames { "defaultLM", "dialogLM" };
|
||||
array<float, modelCount> modelWeights { 0.1f, 0.9f };
|
||||
lambda_unique_ptr<ngram_model_t> result(
|
||||
ngram_model_set_init(
|
||||
nullptr,
|
||||
languageModels.data(),
|
||||
const_cast<char**>(modelNames.data()),
|
||||
modelWeights.data(),
|
||||
modelCount
|
||||
),
|
||||
[](ngram_model_t* lm) { ngram_model_free(lm); });
|
||||
if (!result) {
|
||||
throw runtime_error("Error creating biased language model.");
|
||||
}
|
||||
auto defaultLanguageModel = createDefaultLanguageModel(decoder);
|
||||
auto dialogLanguageModel = createDialogLanguageModel(decoder, dialog);
|
||||
constexpr int modelCount = 2;
|
||||
array<ngram_model_t*, modelCount> languageModels {
|
||||
defaultLanguageModel.get(),
|
||||
dialogLanguageModel.get()
|
||||
};
|
||||
array<const char*, modelCount> modelNames { "defaultLM", "dialogLM" };
|
||||
array<float, modelCount> modelWeights { 0.1f, 0.9f };
|
||||
lambda_unique_ptr<ngram_model_t> result(
|
||||
ngram_model_set_init(
|
||||
nullptr,
|
||||
languageModels.data(),
|
||||
const_cast<char**>(modelNames.data()),
|
||||
modelWeights.data(),
|
||||
modelCount
|
||||
),
|
||||
[](ngram_model_t* lm) { ngram_model_free(lm); });
|
||||
if (!result) {
|
||||
throw runtime_error("Error creating biased language model.");
|
||||
}
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
static lambda_unique_ptr<ps_decoder_t> createDecoder(optional<std::string> dialog) {
|
||||
lambda_unique_ptr<cmd_ln_t> config(
|
||||
cmd_ln_init(
|
||||
nullptr, ps_args(), true,
|
||||
// Set acoustic model
|
||||
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
|
||||
// Set pronunciation dictionary
|
||||
"-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").u8string().c_str(),
|
||||
// Add noise against zero silence
|
||||
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
|
||||
"-dither", "yes",
|
||||
// Disable VAD -- we're doing that ourselves
|
||||
"-remove_silence", "no",
|
||||
// Perform per-utterance cepstral mean normalization
|
||||
"-cmn", "batch",
|
||||
nullptr),
|
||||
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
|
||||
if (!config) throw runtime_error("Error creating configuration.");
|
||||
lambda_unique_ptr<cmd_ln_t> config(
|
||||
cmd_ln_init(
|
||||
nullptr, ps_args(), true,
|
||||
// Set acoustic model
|
||||
"-hmm", (getSphinxModelDirectory() / "acoustic-model").u8string().c_str(),
|
||||
// Set pronunciation dictionary
|
||||
"-dict", (getSphinxModelDirectory() / "cmudict-en-us.dict").u8string().c_str(),
|
||||
// Add noise against zero silence
|
||||
// (see http://cmusphinx.sourceforge.net/wiki/faq#qwhy_my_accuracy_is_poor)
|
||||
"-dither", "yes",
|
||||
// Disable VAD -- we're doing that ourselves
|
||||
"-remove_silence", "no",
|
||||
// Perform per-utterance cepstral mean normalization
|
||||
"-cmn", "batch",
|
||||
nullptr),
|
||||
[](cmd_ln_t* config) { cmd_ln_free_r(config); });
|
||||
if (!config) throw runtime_error("Error creating configuration.");
|
||||
|
||||
lambda_unique_ptr<ps_decoder_t> decoder(
|
||||
ps_init(config.get()),
|
||||
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
|
||||
if (!decoder) throw runtime_error("Error creating speech decoder.");
|
||||
lambda_unique_ptr<ps_decoder_t> decoder(
|
||||
ps_init(config.get()),
|
||||
[](ps_decoder_t* recognizer) { ps_free(recognizer); });
|
||||
if (!decoder) throw runtime_error("Error creating speech decoder.");
|
||||
|
||||
// Set language model
|
||||
lambda_unique_ptr<ngram_model_t> languageModel(dialog
|
||||
? createBiasedLanguageModel(*decoder, *dialog)
|
||||
: createDefaultLanguageModel(*decoder));
|
||||
ps_set_lm(decoder.get(), "lm", languageModel.get());
|
||||
ps_set_search(decoder.get(), "lm");
|
||||
// Set language model
|
||||
lambda_unique_ptr<ngram_model_t> languageModel(dialog
|
||||
? createBiasedLanguageModel(*decoder, *dialog)
|
||||
: createDefaultLanguageModel(*decoder));
|
||||
ps_set_lm(decoder.get(), "lm", languageModel.get());
|
||||
ps_set_search(decoder.get(), "lm");
|
||||
|
||||
return decoder;
|
||||
return decoder;
|
||||
}
|
||||
|
||||
optional<Timeline<Phone>> getPhoneAlignment(
|
||||
const vector<s3wid_t>& wordIds,
|
||||
const vector<int16_t>& audioBuffer,
|
||||
ps_decoder_t& decoder)
|
||||
const vector<s3wid_t>& wordIds,
|
||||
const vector<int16_t>& audioBuffer,
|
||||
ps_decoder_t& decoder)
|
||||
{
|
||||
if (wordIds.empty()) return boost::none;
|
||||
if (wordIds.empty()) return boost::none;
|
||||
|
||||
// Create alignment list
|
||||
lambda_unique_ptr<ps_alignment_t> alignment(
|
||||
ps_alignment_init(decoder.d2p),
|
||||
[](ps_alignment_t* alignment) { ps_alignment_free(alignment); });
|
||||
if (!alignment) throw runtime_error("Error creating alignment.");
|
||||
for (s3wid_t wordId : wordIds) {
|
||||
// Add word. Initial value for duration is ignored.
|
||||
ps_alignment_add_word(alignment.get(), wordId, 0);
|
||||
}
|
||||
int error = ps_alignment_populate(alignment.get());
|
||||
if (error) throw runtime_error("Error populating alignment struct.");
|
||||
// Create alignment list
|
||||
lambda_unique_ptr<ps_alignment_t> alignment(
|
||||
ps_alignment_init(decoder.d2p),
|
||||
[](ps_alignment_t* alignment) { ps_alignment_free(alignment); });
|
||||
if (!alignment) throw runtime_error("Error creating alignment.");
|
||||
for (s3wid_t wordId : wordIds) {
|
||||
// Add word. Initial value for duration is ignored.
|
||||
ps_alignment_add_word(alignment.get(), wordId, 0);
|
||||
}
|
||||
int error = ps_alignment_populate(alignment.get());
|
||||
if (error) throw runtime_error("Error populating alignment struct.");
|
||||
|
||||
// Create search structure
|
||||
acmod_t* acousticModel = decoder.acmod;
|
||||
lambda_unique_ptr<ps_search_t> search(
|
||||
state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()),
|
||||
[](ps_search_t* search) { ps_search_free(search); });
|
||||
if (!search) throw runtime_error("Error creating search.");
|
||||
// Create search structure
|
||||
acmod_t* acousticModel = decoder.acmod;
|
||||
lambda_unique_ptr<ps_search_t> search(
|
||||
state_align_search_init("state_align", decoder.config, acousticModel, alignment.get()),
|
||||
[](ps_search_t* search) { ps_search_free(search); });
|
||||
if (!search) throw runtime_error("Error creating search.");
|
||||
|
||||
// Start recognition
|
||||
error = acmod_start_utt(acousticModel);
|
||||
if (error) throw runtime_error("Error starting utterance processing for alignment.");
|
||||
// Start recognition
|
||||
error = acmod_start_utt(acousticModel);
|
||||
if (error) throw runtime_error("Error starting utterance processing for alignment.");
|
||||
|
||||
{
|
||||
// Eventually end recognition
|
||||
auto endRecognition = gsl::finally([&]() { acmod_end_utt(acousticModel); });
|
||||
{
|
||||
// Eventually end recognition
|
||||
auto endRecognition = gsl::finally([&]() { acmod_end_utt(acousticModel); });
|
||||
|
||||
// Start search
|
||||
ps_search_start(search.get());
|
||||
// Start search
|
||||
ps_search_start(search.get());
|
||||
|
||||
// Process entire audio clip
|
||||
const int16* nextSample = audioBuffer.data();
|
||||
size_t remainingSamples = audioBuffer.size();
|
||||
const bool fullUtterance = true;
|
||||
while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, fullUtterance) > 0) {
|
||||
while (acousticModel->n_feat_frame > 0) {
|
||||
ps_search_step(search.get(), acousticModel->output_frame);
|
||||
acmod_advance(acousticModel);
|
||||
}
|
||||
}
|
||||
// Process entire audio clip
|
||||
const int16* nextSample = audioBuffer.data();
|
||||
size_t remainingSamples = audioBuffer.size();
|
||||
const bool fullUtterance = true;
|
||||
while (acmod_process_raw(acousticModel, &nextSample, &remainingSamples, fullUtterance) > 0) {
|
||||
while (acousticModel->n_feat_frame > 0) {
|
||||
ps_search_step(search.get(), acousticModel->output_frame);
|
||||
acmod_advance(acousticModel);
|
||||
}
|
||||
}
|
||||
|
||||
// End search
|
||||
error = ps_search_finish(search.get());
|
||||
if (error) return boost::none;
|
||||
}
|
||||
// End search
|
||||
error = ps_search_finish(search.get());
|
||||
if (error) return boost::none;
|
||||
}
|
||||
|
||||
// Extract phones with timestamps
|
||||
char** phoneNames = decoder.dict->mdef->ciname;
|
||||
Timeline<Phone> result;
|
||||
for (
|
||||
ps_alignment_iter_t* it = ps_alignment_phones(alignment.get());
|
||||
it;
|
||||
it = ps_alignment_iter_next(it)
|
||||
) {
|
||||
// Get phone
|
||||
ps_alignment_entry_t* phoneEntry = ps_alignment_iter_get(it);
|
||||
const s3cipid_t phoneId = phoneEntry->id.pid.cipid;
|
||||
string phoneName = phoneNames[phoneId];
|
||||
// Extract phones with timestamps
|
||||
char** phoneNames = decoder.dict->mdef->ciname;
|
||||
Timeline<Phone> result;
|
||||
for (
|
||||
ps_alignment_iter_t* it = ps_alignment_phones(alignment.get());
|
||||
it;
|
||||
it = ps_alignment_iter_next(it)
|
||||
) {
|
||||
// Get phone
|
||||
ps_alignment_entry_t* phoneEntry = ps_alignment_iter_get(it);
|
||||
const s3cipid_t phoneId = phoneEntry->id.pid.cipid;
|
||||
string phoneName = phoneNames[phoneId];
|
||||
|
||||
if (phoneName == "SIL") continue;
|
||||
if (phoneName == "SIL") continue;
|
||||
|
||||
// Add entry
|
||||
centiseconds start(phoneEntry->start);
|
||||
centiseconds duration(phoneEntry->duration);
|
||||
Phone phone = PhoneConverter::get().parse(phoneName);
|
||||
if (phone == Phone::AH && duration < 6_cs) {
|
||||
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
|
||||
phone = Phone::Schwa;
|
||||
}
|
||||
const Timed<Phone> timedPhone(start, start + duration, phone);
|
||||
result.set(timedPhone);
|
||||
}
|
||||
return result;
|
||||
// Add entry
|
||||
centiseconds start(phoneEntry->start);
|
||||
centiseconds duration(phoneEntry->duration);
|
||||
Phone phone = PhoneConverter::get().parse(phoneName);
|
||||
if (phone == Phone::AH && duration < 6_cs) {
|
||||
// Heuristic: < 6_cs is schwa. PocketSphinx doesn't differentiate.
|
||||
phone = Phone::Schwa;
|
||||
}
|
||||
const Timed<Phone> timedPhone(start, start + duration, phone);
|
||||
result.set(timedPhone);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Some words have multiple pronunciations, one of which results in better animation than the others.
|
||||
// This function returns the optimal pronunciation for a select set of these words.
|
||||
string fixPronunciation(const string& word) {
|
||||
const static map<string, string> replacements {
|
||||
{ "into(2)", "into" },
|
||||
{ "to(2)", "to" },
|
||||
{ "to(3)", "to" },
|
||||
{ "today(2)", "today" },
|
||||
{ "tomorrow(2)", "tomorrow" },
|
||||
{ "tonight(2)", "tonight" }
|
||||
};
|
||||
const static map<string, string> replacements {
|
||||
{ "into(2)", "into" },
|
||||
{ "to(2)", "to" },
|
||||
{ "to(3)", "to" },
|
||||
{ "today(2)", "today" },
|
||||
{ "tomorrow(2)", "tomorrow" },
|
||||
{ "tonight(2)", "tonight" }
|
||||
};
|
||||
|
||||
const auto pair = replacements.find(word);
|
||||
return pair != replacements.end() ? pair->second : word;
|
||||
const auto pair = replacements.find(word);
|
||||
return pair != replacements.end() ? pair->second : word;
|
||||
}
|
||||
|
||||
static Timeline<Phone> utteranceToPhones(
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utteranceTimeRange,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& utteranceProgressSink
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utteranceTimeRange,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& utteranceProgressSink
|
||||
) {
|
||||
ProgressMerger utteranceProgressMerger(utteranceProgressSink);
|
||||
ProgressSink& wordRecognitionProgressSink =
|
||||
utteranceProgressMerger.addSource("word recognition (PocketSphinx recognizer)", 1.0);
|
||||
ProgressSink& alignmentProgressSink =
|
||||
utteranceProgressMerger.addSource("alignment (PocketSphinx recognizer)", 0.5);
|
||||
ProgressMerger utteranceProgressMerger(utteranceProgressSink);
|
||||
ProgressSink& wordRecognitionProgressSink =
|
||||
utteranceProgressMerger.addSource("word recognition (PocketSphinx recognizer)", 1.0);
|
||||
ProgressSink& alignmentProgressSink =
|
||||
utteranceProgressMerger.addSource("alignment (PocketSphinx recognizer)", 0.5);
|
||||
|
||||
// Pad time range to give PocketSphinx some breathing room
|
||||
TimeRange paddedTimeRange = utteranceTimeRange;
|
||||
const centiseconds padding(3);
|
||||
paddedTimeRange.grow(padding);
|
||||
paddedTimeRange.trim(audioClip.getTruncatedRange());
|
||||
// Pad time range to give PocketSphinx some breathing room
|
||||
TimeRange paddedTimeRange = utteranceTimeRange;
|
||||
const centiseconds padding(3);
|
||||
paddedTimeRange.grow(padding);
|
||||
paddedTimeRange.trim(audioClip.getTruncatedRange());
|
||||
|
||||
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
|
||||
| segment(paddedTimeRange)
|
||||
| resample(sphinxSampleRate);
|
||||
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
|
||||
const unique_ptr<AudioClip> clipSegment = audioClip.clone()
|
||||
| segment(paddedTimeRange)
|
||||
| resample(sphinxSampleRate);
|
||||
const auto audioBuffer = copyTo16bitBuffer(*clipSegment);
|
||||
|
||||
// Get words
|
||||
BoundedTimeline<string> words = recognizeWords(audioBuffer, decoder);
|
||||
wordRecognitionProgressSink.reportProgress(1.0);
|
||||
// Get words
|
||||
BoundedTimeline<string> words = recognizeWords(audioBuffer, decoder);
|
||||
wordRecognitionProgressSink.reportProgress(1.0);
|
||||
|
||||
// Log utterance text
|
||||
string text;
|
||||
for (auto& timedWord : words) {
|
||||
string word = timedWord.getValue();
|
||||
// Skip details
|
||||
if (word == "<s>" || word == "</s>" || word == "<sil>") {
|
||||
continue;
|
||||
}
|
||||
word = regex_replace(word, regex("\\(\\d\\)"), "");
|
||||
if (!text.empty()) {
|
||||
text += " ";
|
||||
}
|
||||
text += word;
|
||||
}
|
||||
logTimedEvent("utterance", utteranceTimeRange, text);
|
||||
// Log utterance text
|
||||
string text;
|
||||
for (auto& timedWord : words) {
|
||||
string word = timedWord.getValue();
|
||||
// Skip details
|
||||
if (word == "<s>" || word == "</s>" || word == "<sil>") {
|
||||
continue;
|
||||
}
|
||||
word = regex_replace(word, regex("\\(\\d\\)"), "");
|
||||
if (!text.empty()) {
|
||||
text += " ";
|
||||
}
|
||||
text += word;
|
||||
}
|
||||
logTimedEvent("utterance", utteranceTimeRange, text);
|
||||
|
||||
// Log words
|
||||
for (Timed<string> timedWord : words) {
|
||||
timedWord.getTimeRange().shift(paddedTimeRange.getStart());
|
||||
logTimedEvent("word", timedWord);
|
||||
}
|
||||
// Log words
|
||||
for (Timed<string> timedWord : words) {
|
||||
timedWord.getTimeRange().shift(paddedTimeRange.getStart());
|
||||
logTimedEvent("word", timedWord);
|
||||
}
|
||||
|
||||
// Convert word strings to word IDs using dictionary
|
||||
vector<s3wid_t> wordIds;
|
||||
for (const auto& timedWord : words) {
|
||||
const string fixedWord = fixPronunciation(timedWord.getValue());
|
||||
wordIds.push_back(getWordId(fixedWord, *decoder.dict));
|
||||
}
|
||||
// Convert word strings to word IDs using dictionary
|
||||
vector<s3wid_t> wordIds;
|
||||
for (const auto& timedWord : words) {
|
||||
const string fixedWord = fixPronunciation(timedWord.getValue());
|
||||
wordIds.push_back(getWordId(fixedWord, *decoder.dict));
|
||||
}
|
||||
|
||||
// Align the words' phones with speech
|
||||
// Align the words' phones with speech
|
||||
#if BOOST_VERSION < 105600 // Support legacy syntax
|
||||
#define value_or get_value_or
|
||||
#endif
|
||||
Timeline<Phone> utterancePhones = getPhoneAlignment(wordIds, audioBuffer, decoder)
|
||||
.value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Noise));
|
||||
alignmentProgressSink.reportProgress(1.0);
|
||||
utterancePhones.shift(paddedTimeRange.getStart());
|
||||
Timeline<Phone> utterancePhones = getPhoneAlignment(wordIds, audioBuffer, decoder)
|
||||
.value_or(ContinuousTimeline<Phone>(clipSegment->getTruncatedRange(), Phone::Noise));
|
||||
alignmentProgressSink.reportProgress(1.0);
|
||||
utterancePhones.shift(paddedTimeRange.getStart());
|
||||
|
||||
// Log raw phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("rawPhone", timedPhone);
|
||||
}
|
||||
// Log raw phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("rawPhone", timedPhone);
|
||||
}
|
||||
|
||||
// Guess positions of noise sounds
|
||||
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
|
||||
for (const auto& noiseSound : noiseSounds) {
|
||||
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
|
||||
}
|
||||
// Guess positions of noise sounds
|
||||
JoiningTimeline<void> noiseSounds = getNoiseSounds(utteranceTimeRange, utterancePhones);
|
||||
for (const auto& noiseSound : noiseSounds) {
|
||||
utterancePhones.set(noiseSound.getTimeRange(), Phone::Noise);
|
||||
}
|
||||
|
||||
// Log phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("phone", timedPhone);
|
||||
}
|
||||
// Log phones
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
logTimedEvent("phone", timedPhone);
|
||||
}
|
||||
|
||||
return utterancePhones;
|
||||
return utterancePhones;
|
||||
}
|
||||
|
||||
BoundedTimeline<Phone> PocketSphinxRecognizer::recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const {
|
||||
return ::recognizePhones(
|
||||
inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
|
||||
return ::recognizePhones(
|
||||
inputAudioClip, dialog, &createDecoder, &utteranceToPhones, maxThreadCount, progressSink);
|
||||
}
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
|
||||
class PocketSphinxRecognizer : public Recognizer {
|
||||
public:
|
||||
BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const override;
|
||||
BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const override;
|
||||
};
|
||||
|
|
|
@ -7,12 +7,12 @@
|
|||
|
||||
class Recognizer {
|
||||
public:
|
||||
virtual ~Recognizer() = default;
|
||||
virtual ~Recognizer() = default;
|
||||
|
||||
virtual BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& audioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const = 0;
|
||||
virtual BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& audioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) const = 0;
|
||||
};
|
|
@ -11,101 +11,101 @@ using std::invalid_argument;
|
|||
using std::pair;
|
||||
|
||||
const vector<pair<wregex, wstring>>& getReplacementRules() {
|
||||
static vector<pair<wregex, wstring>> rules {
|
||||
#include "g2pRules.cpp"
|
||||
static vector<pair<wregex, wstring>> rules {
|
||||
#include "g2pRules.cpp"
|
||||
|
||||
// Turn bigrams into unigrams for easier conversion
|
||||
{ wregex(L"ôw"), L"Ω" },
|
||||
{ wregex(L"öy"), L"ω" },
|
||||
{ wregex(L"@r"), L"ɝ" }
|
||||
};
|
||||
return rules;
|
||||
// Turn bigrams into unigrams for easier conversion
|
||||
{ wregex(L"ôw"), L"Ω" },
|
||||
{ wregex(L"öy"), L"ω" },
|
||||
{ wregex(L"@r"), L"ɝ" }
|
||||
};
|
||||
return rules;
|
||||
}
|
||||
|
||||
Phone charToPhone(wchar_t c) {
|
||||
// For reference, see http://www.zompist.com/spell.html
|
||||
switch (c) {
|
||||
case L'ä': return Phone::EY;
|
||||
case L'â': return Phone::AE;
|
||||
case L'ë': return Phone::IY;
|
||||
case L'ê': return Phone::EH;
|
||||
case L'ï': return Phone::AY;
|
||||
case L'î': return Phone::IH;
|
||||
case L'ö': return Phone::OW;
|
||||
case L'ô': return Phone::AA; // could also be AO/AH
|
||||
case L'ü': return Phone::UW; // really Y+UW
|
||||
case L'û': return Phone::AH; // [ʌ] as in b[u]t
|
||||
case L'u': return Phone::UW;
|
||||
case L'ò': return Phone::AO;
|
||||
case L'ù': return Phone::UH;
|
||||
case L'@': return Phone::AH; // [ə] as in [a]lone
|
||||
case L'Ω': return Phone::AW;
|
||||
case L'ω': return Phone::OY;
|
||||
case L'y': return Phone::Y;
|
||||
case L'w': return Phone::W;
|
||||
case L'ɝ': return Phone::ER;
|
||||
case L'p': return Phone::P;
|
||||
case L'b': return Phone::B;
|
||||
case L't': return Phone::T;
|
||||
case L'd': return Phone::D;
|
||||
case L'g': return Phone::G;
|
||||
case L'k': return Phone::K;
|
||||
case L'm': return Phone::M;
|
||||
case L'n': return Phone::N;
|
||||
case L'ñ': return Phone::NG;
|
||||
case L'f': return Phone::F;
|
||||
case L'v': return Phone::V;
|
||||
case L'+': return Phone::TH; // also covers DH
|
||||
case L's': return Phone::S;
|
||||
case L'z': return Phone::Z;
|
||||
case L'$': return Phone::SH; // also covers ZH
|
||||
case L'ç': return Phone::CH;
|
||||
case L'j': return Phone::JH;
|
||||
case L'r': return Phone::R;
|
||||
case L'l': return Phone::L;
|
||||
case L'h': return Phone::HH;
|
||||
default:
|
||||
return Phone::Noise;
|
||||
}
|
||||
// For reference, see http://www.zompist.com/spell.html
|
||||
switch (c) {
|
||||
case L'ä': return Phone::EY;
|
||||
case L'â': return Phone::AE;
|
||||
case L'ë': return Phone::IY;
|
||||
case L'ê': return Phone::EH;
|
||||
case L'ï': return Phone::AY;
|
||||
case L'î': return Phone::IH;
|
||||
case L'ö': return Phone::OW;
|
||||
case L'ô': return Phone::AA; // could also be AO/AH
|
||||
case L'ü': return Phone::UW; // really Y+UW
|
||||
case L'û': return Phone::AH; // [ʌ] as in b[u]t
|
||||
case L'u': return Phone::UW;
|
||||
case L'ò': return Phone::AO;
|
||||
case L'ù': return Phone::UH;
|
||||
case L'@': return Phone::AH; // [ə] as in [a]lone
|
||||
case L'Ω': return Phone::AW;
|
||||
case L'ω': return Phone::OY;
|
||||
case L'y': return Phone::Y;
|
||||
case L'w': return Phone::W;
|
||||
case L'ɝ': return Phone::ER;
|
||||
case L'p': return Phone::P;
|
||||
case L'b': return Phone::B;
|
||||
case L't': return Phone::T;
|
||||
case L'd': return Phone::D;
|
||||
case L'g': return Phone::G;
|
||||
case L'k': return Phone::K;
|
||||
case L'm': return Phone::M;
|
||||
case L'n': return Phone::N;
|
||||
case L'ñ': return Phone::NG;
|
||||
case L'f': return Phone::F;
|
||||
case L'v': return Phone::V;
|
||||
case L'+': return Phone::TH; // also covers DH
|
||||
case L's': return Phone::S;
|
||||
case L'z': return Phone::Z;
|
||||
case L'$': return Phone::SH; // also covers ZH
|
||||
case L'ç': return Phone::CH;
|
||||
case L'j': return Phone::JH;
|
||||
case L'r': return Phone::R;
|
||||
case L'l': return Phone::L;
|
||||
case L'h': return Phone::HH;
|
||||
default:
|
||||
return Phone::Noise;
|
||||
}
|
||||
}
|
||||
|
||||
vector<Phone> wordToPhones(const std::string& word) {
|
||||
static regex validWord("^[a-z']*$");
|
||||
if (!regex_match(word, validWord)) {
|
||||
throw invalid_argument(fmt::format("Word '{}' contains illegal characters.", word));
|
||||
}
|
||||
static regex validWord("^[a-z']*$");
|
||||
if (!regex_match(word, validWord)) {
|
||||
throw invalid_argument(fmt::format("Word '{}' contains illegal characters.", word));
|
||||
}
|
||||
|
||||
wstring wideWord = latin1ToWide(word);
|
||||
for (const auto& rule : getReplacementRules()) {
|
||||
const wregex& regex = rule.first;
|
||||
const wstring& replacement = rule.second;
|
||||
wstring wideWord = latin1ToWide(word);
|
||||
for (const auto& rule : getReplacementRules()) {
|
||||
const wregex& regex = rule.first;
|
||||
const wstring& replacement = rule.second;
|
||||
|
||||
// Repeatedly apply rule until there is no more change
|
||||
bool changed;
|
||||
do {
|
||||
wstring tmp = regex_replace(wideWord, regex, replacement);
|
||||
changed = tmp != wideWord;
|
||||
wideWord = tmp;
|
||||
} while (changed);
|
||||
}
|
||||
// Repeatedly apply rule until there is no more change
|
||||
bool changed;
|
||||
do {
|
||||
wstring tmp = regex_replace(wideWord, regex, replacement);
|
||||
changed = tmp != wideWord;
|
||||
wideWord = tmp;
|
||||
} while (changed);
|
||||
}
|
||||
|
||||
// Remove duplicate phones
|
||||
vector<Phone> result;
|
||||
Phone lastPhone = Phone::Noise;
|
||||
for (wchar_t c : wideWord) {
|
||||
Phone phone = charToPhone(c);
|
||||
if (phone == Phone::Noise) {
|
||||
logging::errorFormat(
|
||||
"G2P error determining pronunciation for '{}': Character '{}' is not a recognized phone shorthand.",
|
||||
word,
|
||||
static_cast<char>(c)
|
||||
);
|
||||
}
|
||||
// Remove duplicate phones
|
||||
vector<Phone> result;
|
||||
Phone lastPhone = Phone::Noise;
|
||||
for (wchar_t c : wideWord) {
|
||||
Phone phone = charToPhone(c);
|
||||
if (phone == Phone::Noise) {
|
||||
logging::errorFormat(
|
||||
"G2P error determining pronunciation for '{}': Character '{}' is not a recognized phone shorthand.",
|
||||
word,
|
||||
static_cast<char>(c)
|
||||
);
|
||||
}
|
||||
|
||||
if (phone != lastPhone) {
|
||||
result.push_back(phone);
|
||||
}
|
||||
lastPhone = phone;
|
||||
}
|
||||
return result;
|
||||
if (phone != lastPhone) {
|
||||
result.push_back(phone);
|
||||
}
|
||||
lastPhone = phone;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -24,178 +24,178 @@ using Bigram = tuple<string, string>;
|
|||
using Trigram = tuple<string, string, string>;
|
||||
|
||||
map<Unigram, int> getUnigramCounts(const vector<string>& words) {
|
||||
map<Unigram, int> unigramCounts;
|
||||
for (const Unigram& unigram : words) {
|
||||
++unigramCounts[unigram];
|
||||
}
|
||||
return unigramCounts;
|
||||
map<Unigram, int> unigramCounts;
|
||||
for (const Unigram& unigram : words) {
|
||||
++unigramCounts[unigram];
|
||||
}
|
||||
return unigramCounts;
|
||||
}
|
||||
|
||||
map<Bigram, int> getBigramCounts(const vector<string>& words) {
|
||||
map<Bigram, int> bigramCounts;
|
||||
for (auto it = words.begin(); it < words.end() - 1; ++it) {
|
||||
++bigramCounts[Bigram(*it, *(it + 1))];
|
||||
}
|
||||
return bigramCounts;
|
||||
map<Bigram, int> bigramCounts;
|
||||
for (auto it = words.begin(); it < words.end() - 1; ++it) {
|
||||
++bigramCounts[Bigram(*it, *(it + 1))];
|
||||
}
|
||||
return bigramCounts;
|
||||
}
|
||||
|
||||
map<Trigram, int> getTrigramCounts(const vector<string>& words) {
|
||||
map<Trigram, int> trigramCounts;
|
||||
if (words.size() >= 3) {
|
||||
for (auto it = words.begin(); it < words.end() - 2; ++it) {
|
||||
++trigramCounts[Trigram(*it, *(it + 1), *(it + 2))];
|
||||
}
|
||||
}
|
||||
return trigramCounts;
|
||||
map<Trigram, int> trigramCounts;
|
||||
if (words.size() >= 3) {
|
||||
for (auto it = words.begin(); it < words.end() - 2; ++it) {
|
||||
++trigramCounts[Trigram(*it, *(it + 1), *(it + 2))];
|
||||
}
|
||||
}
|
||||
return trigramCounts;
|
||||
}
|
||||
|
||||
map<Unigram, double> getUnigramProbabilities(
|
||||
const vector<string>& words,
|
||||
const map<Unigram, int>& unigramCounts,
|
||||
const double deflator
|
||||
const vector<string>& words,
|
||||
const map<Unigram, int>& unigramCounts,
|
||||
const double deflator
|
||||
) {
|
||||
map<Unigram, double> unigramProbabilities;
|
||||
for (const auto& pair : unigramCounts) {
|
||||
const Unigram& unigram = get<0>(pair);
|
||||
const int unigramCount = get<1>(pair);
|
||||
unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator;
|
||||
}
|
||||
return unigramProbabilities;
|
||||
map<Unigram, double> unigramProbabilities;
|
||||
for (const auto& pair : unigramCounts) {
|
||||
const Unigram& unigram = get<0>(pair);
|
||||
const int unigramCount = get<1>(pair);
|
||||
unigramProbabilities[unigram] = double(unigramCount) / words.size() * deflator;
|
||||
}
|
||||
return unigramProbabilities;
|
||||
}
|
||||
|
||||
map<Bigram, double> getBigramProbabilities(
|
||||
const map<Unigram, int>& unigramCounts,
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const double deflator
|
||||
const map<Unigram, int>& unigramCounts,
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const double deflator
|
||||
) {
|
||||
map<Bigram, double> bigramProbabilities;
|
||||
for (const auto& pair : bigramCounts) {
|
||||
Bigram bigram = get<0>(pair);
|
||||
const int bigramCount = get<1>(pair);
|
||||
const int unigramPrefixCount = unigramCounts.at(get<0>(bigram));
|
||||
bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator;
|
||||
}
|
||||
return bigramProbabilities;
|
||||
map<Bigram, double> bigramProbabilities;
|
||||
for (const auto& pair : bigramCounts) {
|
||||
Bigram bigram = get<0>(pair);
|
||||
const int bigramCount = get<1>(pair);
|
||||
const int unigramPrefixCount = unigramCounts.at(get<0>(bigram));
|
||||
bigramProbabilities[bigram] = double(bigramCount) / unigramPrefixCount * deflator;
|
||||
}
|
||||
return bigramProbabilities;
|
||||
}
|
||||
|
||||
map<Trigram, double> getTrigramProbabilities(
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const map<Trigram, int>& trigramCounts,
|
||||
const double deflator
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const map<Trigram, int>& trigramCounts,
|
||||
const double deflator
|
||||
) {
|
||||
map<Trigram, double> trigramProbabilities;
|
||||
for (const auto& pair : trigramCounts) {
|
||||
Trigram trigram = get<0>(pair);
|
||||
const int trigramCount = get<1>(pair);
|
||||
const int bigramPrefixCount = bigramCounts.at(Bigram(get<0>(trigram), get<1>(trigram)));
|
||||
trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator;
|
||||
}
|
||||
return trigramProbabilities;
|
||||
map<Trigram, double> trigramProbabilities;
|
||||
for (const auto& pair : trigramCounts) {
|
||||
Trigram trigram = get<0>(pair);
|
||||
const int trigramCount = get<1>(pair);
|
||||
const int bigramPrefixCount = bigramCounts.at(Bigram(get<0>(trigram), get<1>(trigram)));
|
||||
trigramProbabilities[trigram] = double(trigramCount) / bigramPrefixCount * deflator;
|
||||
}
|
||||
return trigramProbabilities;
|
||||
}
|
||||
|
||||
map<Unigram, double> getUnigramBackoffWeights(
|
||||
const map<Unigram, int>& unigramCounts,
|
||||
const map<Unigram, double>& unigramProbabilities,
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const double discountMass)
|
||||
const map<Unigram, int>& unigramCounts,
|
||||
const map<Unigram, double>& unigramProbabilities,
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const double discountMass)
|
||||
{
|
||||
map<Unigram, double> unigramBackoffWeights;
|
||||
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
|
||||
double denominator = 1;
|
||||
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
|
||||
if (get<0>(bigram) == unigram) {
|
||||
denominator -= unigramProbabilities.at(get<1>(bigram));
|
||||
}
|
||||
}
|
||||
unigramBackoffWeights[unigram] = discountMass / denominator;
|
||||
}
|
||||
return unigramBackoffWeights;
|
||||
map<Unigram, double> unigramBackoffWeights;
|
||||
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
|
||||
double denominator = 1;
|
||||
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
|
||||
if (get<0>(bigram) == unigram) {
|
||||
denominator -= unigramProbabilities.at(get<1>(bigram));
|
||||
}
|
||||
}
|
||||
unigramBackoffWeights[unigram] = discountMass / denominator;
|
||||
}
|
||||
return unigramBackoffWeights;
|
||||
}
|
||||
|
||||
map<Bigram, double> getBigramBackoffWeights(
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const map<Bigram, double>& bigramProbabilities,
|
||||
const map<Trigram, int>& trigramCounts,
|
||||
const double discountMass)
|
||||
const map<Bigram, int>& bigramCounts,
|
||||
const map<Bigram, double>& bigramProbabilities,
|
||||
const map<Trigram, int>& trigramCounts,
|
||||
const double discountMass)
|
||||
{
|
||||
map<Bigram, double> bigramBackoffWeights;
|
||||
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
|
||||
double denominator = 1;
|
||||
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
|
||||
if (Bigram(get<0>(trigram), get<1>(trigram)) == bigram) {
|
||||
denominator -= bigramProbabilities.at(Bigram(get<1>(trigram), get<2>(trigram)));
|
||||
}
|
||||
}
|
||||
bigramBackoffWeights[bigram] = discountMass / denominator;
|
||||
}
|
||||
return bigramBackoffWeights;
|
||||
map<Bigram, double> bigramBackoffWeights;
|
||||
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
|
||||
double denominator = 1;
|
||||
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
|
||||
if (Bigram(get<0>(trigram), get<1>(trigram)) == bigram) {
|
||||
denominator -= bigramProbabilities.at(Bigram(get<1>(trigram), get<2>(trigram)));
|
||||
}
|
||||
}
|
||||
bigramBackoffWeights[bigram] = discountMass / denominator;
|
||||
}
|
||||
return bigramBackoffWeights;
|
||||
}
|
||||
|
||||
void createLanguageModelFile(const vector<string>& words, const path& filePath) {
|
||||
const double discountMass = 0.5;
|
||||
const double deflator = 1.0 - discountMass;
|
||||
const double discountMass = 0.5;
|
||||
const double deflator = 1.0 - discountMass;
|
||||
|
||||
map<Unigram, int> unigramCounts = getUnigramCounts(words);
|
||||
map<Bigram, int> bigramCounts = getBigramCounts(words);
|
||||
map<Trigram, int> trigramCounts = getTrigramCounts(words);
|
||||
map<Unigram, int> unigramCounts = getUnigramCounts(words);
|
||||
map<Bigram, int> bigramCounts = getBigramCounts(words);
|
||||
map<Trigram, int> trigramCounts = getTrigramCounts(words);
|
||||
|
||||
map<Unigram, double> unigramProbabilities =
|
||||
getUnigramProbabilities(words, unigramCounts, deflator);
|
||||
map<Bigram, double> bigramProbabilities =
|
||||
getBigramProbabilities(unigramCounts, bigramCounts, deflator);
|
||||
map<Trigram, double> trigramProbabilities =
|
||||
getTrigramProbabilities(bigramCounts, trigramCounts, deflator);
|
||||
map<Unigram, double> unigramProbabilities =
|
||||
getUnigramProbabilities(words, unigramCounts, deflator);
|
||||
map<Bigram, double> bigramProbabilities =
|
||||
getBigramProbabilities(unigramCounts, bigramCounts, deflator);
|
||||
map<Trigram, double> trigramProbabilities =
|
||||
getTrigramProbabilities(bigramCounts, trigramCounts, deflator);
|
||||
|
||||
map<Unigram, double> unigramBackoffWeights =
|
||||
getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass);
|
||||
map<Bigram, double> bigramBackoffWeights =
|
||||
getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass);
|
||||
map<Unigram, double> unigramBackoffWeights =
|
||||
getUnigramBackoffWeights(unigramCounts, unigramProbabilities, bigramCounts, discountMass);
|
||||
map<Bigram, double> bigramBackoffWeights =
|
||||
getBigramBackoffWeights(bigramCounts, bigramProbabilities, trigramCounts, discountMass);
|
||||
|
||||
std::ofstream file(filePath);
|
||||
file << "Generated by " << appName << " " << appVersion << endl << endl;
|
||||
std::ofstream file(filePath);
|
||||
file << "Generated by " << appName << " " << appVersion << endl << endl;
|
||||
|
||||
file << "\\data\\" << endl;
|
||||
file << "ngram 1=" << unigramCounts.size() << endl;
|
||||
file << "ngram 2=" << bigramCounts.size() << endl;
|
||||
file << "ngram 3=" << trigramCounts.size() << endl << endl;
|
||||
file << "\\data\\" << endl;
|
||||
file << "ngram 1=" << unigramCounts.size() << endl;
|
||||
file << "ngram 2=" << bigramCounts.size() << endl;
|
||||
file << "ngram 3=" << trigramCounts.size() << endl << endl;
|
||||
|
||||
file.setf(std::ios::fixed, std::ios::floatfield);
|
||||
file.precision(4);
|
||||
file << "\\1-grams:" << endl;
|
||||
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
|
||||
file << log10(unigramProbabilities.at(unigram))
|
||||
<< " " << unigram
|
||||
<< " " << log10(unigramBackoffWeights.at(unigram)) << endl;
|
||||
}
|
||||
file << endl;
|
||||
file.setf(std::ios::fixed, std::ios::floatfield);
|
||||
file.precision(4);
|
||||
file << "\\1-grams:" << endl;
|
||||
for (const Unigram& unigram : unigramCounts | boost::adaptors::map_keys) {
|
||||
file << log10(unigramProbabilities.at(unigram))
|
||||
<< " " << unigram
|
||||
<< " " << log10(unigramBackoffWeights.at(unigram)) << endl;
|
||||
}
|
||||
file << endl;
|
||||
|
||||
file << "\\2-grams:" << endl;
|
||||
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
|
||||
file << log10(bigramProbabilities.at(bigram))
|
||||
<< " " << get<0>(bigram) << " " << get<1>(bigram)
|
||||
<< " " << log10(bigramBackoffWeights.at(bigram)) << endl;
|
||||
}
|
||||
file << endl;
|
||||
file << "\\2-grams:" << endl;
|
||||
for (const Bigram& bigram : bigramCounts | boost::adaptors::map_keys) {
|
||||
file << log10(bigramProbabilities.at(bigram))
|
||||
<< " " << get<0>(bigram) << " " << get<1>(bigram)
|
||||
<< " " << log10(bigramBackoffWeights.at(bigram)) << endl;
|
||||
}
|
||||
file << endl;
|
||||
|
||||
file << "\\3-grams:" << endl;
|
||||
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
|
||||
file << log10(trigramProbabilities.at(trigram))
|
||||
<< " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl;
|
||||
}
|
||||
file << endl;
|
||||
file << "\\3-grams:" << endl;
|
||||
for (const Trigram& trigram : trigramCounts | boost::adaptors::map_keys) {
|
||||
file << log10(trigramProbabilities.at(trigram))
|
||||
<< " " << get<0>(trigram) << " " << get<1>(trigram) << " " << get<2>(trigram) << endl;
|
||||
}
|
||||
file << endl;
|
||||
|
||||
file << "\\end\\" << endl;
|
||||
file << "\\end\\" << endl;
|
||||
}
|
||||
|
||||
lambda_unique_ptr<ngram_model_t> createLanguageModel(
|
||||
const vector<string>& words,
|
||||
ps_decoder_t& decoder
|
||||
const vector<string>& words,
|
||||
ps_decoder_t& decoder
|
||||
) {
|
||||
path tempFilePath = getTempFilePath();
|
||||
createLanguageModelFile(words, tempFilePath);
|
||||
auto deleteTempFile = gsl::finally([&]() { std::filesystem::remove(tempFilePath); });
|
||||
path tempFilePath = getTempFilePath();
|
||||
createLanguageModelFile(words, tempFilePath);
|
||||
auto deleteTempFile = gsl::finally([&]() { std::filesystem::remove(tempFilePath); });
|
||||
|
||||
return lambda_unique_ptr<ngram_model_t>(
|
||||
ngram_model_read(decoder.config, tempFilePath.u8string().c_str(), NGRAM_ARPA, decoder.lmath),
|
||||
[](ngram_model_t* lm) { ngram_model_free(lm); });
|
||||
return lambda_unique_ptr<ngram_model_t>(
|
||||
ngram_model_read(decoder.config, tempFilePath.u8string().c_str(), NGRAM_ARPA, decoder.lmath),
|
||||
[](ngram_model_t* lm) { ngram_model_free(lm); });
|
||||
}
|
||||
|
|
|
@ -9,6 +9,6 @@ extern "C" {
|
|||
}
|
||||
|
||||
lambda_unique_ptr<ngram_model_t> createLanguageModel(
|
||||
const std::vector<std::string>& words,
|
||||
ps_decoder_t& decoder
|
||||
const std::vector<std::string>& words,
|
||||
ps_decoder_t& decoder
|
||||
);
|
||||
|
|
|
@ -23,223 +23,223 @@ using std::filesystem::path;
|
|||
using std::regex;
|
||||
using boost::optional;
|
||||
using std::chrono::duration_cast;
|
||||
|
||||
|
||||
logging::Level convertSphinxErrorLevel(err_lvl_t errorLevel) {
|
||||
switch (errorLevel) {
|
||||
case ERR_DEBUG:
|
||||
case ERR_INFO:
|
||||
case ERR_INFOCONT:
|
||||
return logging::Level::Trace;
|
||||
case ERR_WARN:
|
||||
return logging::Level::Warn;
|
||||
case ERR_ERROR:
|
||||
return logging::Level::Error;
|
||||
case ERR_FATAL:
|
||||
return logging::Level::Fatal;
|
||||
default:
|
||||
throw invalid_argument("Unknown log level.");
|
||||
}
|
||||
switch (errorLevel) {
|
||||
case ERR_DEBUG:
|
||||
case ERR_INFO:
|
||||
case ERR_INFOCONT:
|
||||
return logging::Level::Trace;
|
||||
case ERR_WARN:
|
||||
return logging::Level::Warn;
|
||||
case ERR_ERROR:
|
||||
return logging::Level::Error;
|
||||
case ERR_FATAL:
|
||||
return logging::Level::Fatal;
|
||||
default:
|
||||
throw invalid_argument("Unknown log level.");
|
||||
}
|
||||
}
|
||||
|
||||
void sphinxLogCallback(void* user_data, err_lvl_t errorLevel, const char* format, ...) {
|
||||
UNUSED(user_data);
|
||||
UNUSED(user_data);
|
||||
|
||||
// Create varArgs list
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
auto _ = gsl::finally([&args]() { va_end(args); });
|
||||
// Create varArgs list
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
auto _ = gsl::finally([&args]() { va_end(args); });
|
||||
|
||||
// Format message
|
||||
const int initialSize = 256;
|
||||
vector<char> chars(initialSize);
|
||||
bool success = false;
|
||||
while (!success) {
|
||||
const int charsWritten = vsnprintf(chars.data(), chars.size(), format, args);
|
||||
if (charsWritten < 0) throw runtime_error("Error formatting PocketSphinx log message.");
|
||||
// Format message
|
||||
const int initialSize = 256;
|
||||
vector<char> chars(initialSize);
|
||||
bool success = false;
|
||||
while (!success) {
|
||||
const int charsWritten = vsnprintf(chars.data(), chars.size(), format, args);
|
||||
if (charsWritten < 0) throw runtime_error("Error formatting PocketSphinx log message.");
|
||||
|
||||
success = charsWritten < static_cast<int>(chars.size());
|
||||
if (!success) chars.resize(chars.size() * 2);
|
||||
}
|
||||
const regex waste("^(DEBUG|INFO|INFOCONT|WARN|ERROR|FATAL): ");
|
||||
string message =
|
||||
std::regex_replace(chars.data(), waste, "", std::regex_constants::format_first_only);
|
||||
boost::algorithm::trim(message);
|
||||
success = charsWritten < static_cast<int>(chars.size());
|
||||
if (!success) chars.resize(chars.size() * 2);
|
||||
}
|
||||
const regex waste("^(DEBUG|INFO|INFOCONT|WARN|ERROR|FATAL): ");
|
||||
string message =
|
||||
std::regex_replace(chars.data(), waste, "", std::regex_constants::format_first_only);
|
||||
boost::algorithm::trim(message);
|
||||
|
||||
const logging::Level logLevel = convertSphinxErrorLevel(errorLevel);
|
||||
logging::log(logLevel, message);
|
||||
const logging::Level logLevel = convertSphinxErrorLevel(errorLevel);
|
||||
logging::log(logLevel, message);
|
||||
}
|
||||
|
||||
void redirectPocketSphinxOutput() {
|
||||
static bool redirected = false;
|
||||
if (redirected) return;
|
||||
static bool redirected = false;
|
||||
if (redirected) return;
|
||||
|
||||
// Discard PocketSphinx output
|
||||
err_set_logfp(nullptr);
|
||||
// Discard PocketSphinx output
|
||||
err_set_logfp(nullptr);
|
||||
|
||||
// Redirect PocketSphinx output to log
|
||||
err_set_callback(sphinxLogCallback, nullptr);
|
||||
// Redirect PocketSphinx output to log
|
||||
err_set_callback(sphinxLogCallback, nullptr);
|
||||
|
||||
redirected = true;
|
||||
redirected = true;
|
||||
}
|
||||
|
||||
BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<std::string> dialog,
|
||||
decoderFactory createDecoder,
|
||||
utteranceToPhonesFunction utteranceToPhones,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& inputAudioClip,
|
||||
optional<std::string> dialog,
|
||||
decoderFactory createDecoder,
|
||||
utteranceToPhonesFunction utteranceToPhones,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
) {
|
||||
ProgressMerger totalProgressMerger(progressSink);
|
||||
ProgressSink& voiceActivationProgressSink =
|
||||
totalProgressMerger.addSource("VAD (PocketSphinx tools)", 1.0);
|
||||
ProgressSink& dialogProgressSink =
|
||||
totalProgressMerger.addSource("recognition (PocketSphinx tools)", 15.0);
|
||||
ProgressMerger totalProgressMerger(progressSink);
|
||||
ProgressSink& voiceActivationProgressSink =
|
||||
totalProgressMerger.addSource("VAD (PocketSphinx tools)", 1.0);
|
||||
ProgressSink& dialogProgressSink =
|
||||
totalProgressMerger.addSource("recognition (PocketSphinx tools)", 15.0);
|
||||
|
||||
// Make sure audio stream has no DC offset
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
|
||||
// Make sure audio stream has no DC offset
|
||||
const unique_ptr<AudioClip> audioClip = inputAudioClip.clone() | removeDcOffset();
|
||||
|
||||
// Split audio into utterances
|
||||
JoiningBoundedTimeline<void> utterances;
|
||||
try {
|
||||
utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink);
|
||||
} catch (...) {
|
||||
std::throw_with_nested(runtime_error("Error detecting segments of speech."));
|
||||
}
|
||||
// Split audio into utterances
|
||||
JoiningBoundedTimeline<void> utterances;
|
||||
try {
|
||||
utterances = detectVoiceActivity(*audioClip, voiceActivationProgressSink);
|
||||
} catch (...) {
|
||||
std::throw_with_nested(runtime_error("Error detecting segments of speech."));
|
||||
}
|
||||
|
||||
redirectPocketSphinxOutput();
|
||||
redirectPocketSphinxOutput();
|
||||
|
||||
// Prepare pool of decoders
|
||||
ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool(
|
||||
[&] { return createDecoder(dialog); });
|
||||
// Prepare pool of decoders
|
||||
ObjectPool<ps_decoder_t, lambda_unique_ptr<ps_decoder_t>> decoderPool(
|
||||
[&] { return createDecoder(dialog); });
|
||||
|
||||
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
|
||||
std::mutex resultMutex;
|
||||
const auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) {
|
||||
// Detect phones for utterance
|
||||
const auto decoder = decoderPool.acquire();
|
||||
Timeline<Phone> utterancePhones = utteranceToPhones(
|
||||
*audioClip,
|
||||
timedUtterance.getTimeRange(),
|
||||
*decoder,
|
||||
utteranceProgressSink
|
||||
);
|
||||
BoundedTimeline<Phone> phones(audioClip->getTruncatedRange());
|
||||
std::mutex resultMutex;
|
||||
const auto processUtterance = [&](Timed<void> timedUtterance, ProgressSink& utteranceProgressSink) {
|
||||
// Detect phones for utterance
|
||||
const auto decoder = decoderPool.acquire();
|
||||
Timeline<Phone> utterancePhones = utteranceToPhones(
|
||||
*audioClip,
|
||||
timedUtterance.getTimeRange(),
|
||||
*decoder,
|
||||
utteranceProgressSink
|
||||
);
|
||||
|
||||
// Copy phones to result timeline
|
||||
std::lock_guard<std::mutex> lock(resultMutex);
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
phones.set(timedPhone);
|
||||
}
|
||||
};
|
||||
// Copy phones to result timeline
|
||||
std::lock_guard<std::mutex> lock(resultMutex);
|
||||
for (const auto& timedPhone : utterancePhones) {
|
||||
phones.set(timedPhone);
|
||||
}
|
||||
};
|
||||
|
||||
const auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) {
|
||||
return timedUtterance.getDuration().count();
|
||||
};
|
||||
const auto getUtteranceProgressWeight = [](const Timed<void> timedUtterance) {
|
||||
return timedUtterance.getDuration().count();
|
||||
};
|
||||
|
||||
// Perform speech recognition
|
||||
try {
|
||||
// Determine how many parallel threads to use
|
||||
int threadCount = std::min({
|
||||
maxThreadCount,
|
||||
// Don't use more threads than there are utterances to be processed
|
||||
static_cast<int>(utterances.size()),
|
||||
// Don't waste time creating additional threads (and decoders!) if the recording is short
|
||||
static_cast<int>(
|
||||
duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5
|
||||
)
|
||||
});
|
||||
if (threadCount < 1) {
|
||||
threadCount = 1;
|
||||
}
|
||||
logging::debugFormat("Speech recognition using {} threads -- start", threadCount);
|
||||
runParallel(
|
||||
"speech recognition (PocketSphinx tools)",
|
||||
processUtterance,
|
||||
utterances,
|
||||
threadCount,
|
||||
dialogProgressSink,
|
||||
getUtteranceProgressWeight
|
||||
);
|
||||
logging::debug("Speech recognition -- end");
|
||||
} catch (...) {
|
||||
std::throw_with_nested(runtime_error("Error performing speech recognition via PocketSphinx tools."));
|
||||
}
|
||||
// Perform speech recognition
|
||||
try {
|
||||
// Determine how many parallel threads to use
|
||||
int threadCount = std::min({
|
||||
maxThreadCount,
|
||||
// Don't use more threads than there are utterances to be processed
|
||||
static_cast<int>(utterances.size()),
|
||||
// Don't waste time creating additional threads (and decoders!) if the recording is short
|
||||
static_cast<int>(
|
||||
duration_cast<std::chrono::seconds>(audioClip->getTruncatedRange().getDuration()).count() / 5
|
||||
)
|
||||
});
|
||||
if (threadCount < 1) {
|
||||
threadCount = 1;
|
||||
}
|
||||
logging::debugFormat("Speech recognition using {} threads -- start", threadCount);
|
||||
runParallel(
|
||||
"speech recognition (PocketSphinx tools)",
|
||||
processUtterance,
|
||||
utterances,
|
||||
threadCount,
|
||||
dialogProgressSink,
|
||||
getUtteranceProgressWeight
|
||||
);
|
||||
logging::debug("Speech recognition -- end");
|
||||
} catch (...) {
|
||||
std::throw_with_nested(runtime_error("Error performing speech recognition via PocketSphinx tools."));
|
||||
}
|
||||
|
||||
return phones;
|
||||
return phones;
|
||||
}
|
||||
|
||||
const path& getSphinxModelDirectory() {
|
||||
static path sphinxModelDirectory(getBinDirectory() / "res" / "sphinx");
|
||||
return sphinxModelDirectory;
|
||||
static path sphinxModelDirectory(getBinDirectory() / "res" / "sphinx");
|
||||
return sphinxModelDirectory;
|
||||
}
|
||||
|
||||
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones) {
|
||||
JoiningTimeline<void> noiseSounds;
|
||||
JoiningTimeline<void> noiseSounds;
|
||||
|
||||
// Find utterance parts without recognized phones
|
||||
noiseSounds.set(utteranceTimeRange);
|
||||
for (const auto& timedPhone : phones) {
|
||||
noiseSounds.clear(timedPhone.getTimeRange());
|
||||
}
|
||||
// Find utterance parts without recognized phones
|
||||
noiseSounds.set(utteranceTimeRange);
|
||||
for (const auto& timedPhone : phones) {
|
||||
noiseSounds.clear(timedPhone.getTimeRange());
|
||||
}
|
||||
|
||||
// Remove undesired elements
|
||||
const centiseconds minSoundDuration = 12_cs;
|
||||
for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) {
|
||||
const bool startsAtZero = unknownSound.getStart() == 0_cs;
|
||||
const bool tooShort = unknownSound.getDuration() < minSoundDuration;
|
||||
if (startsAtZero || tooShort) {
|
||||
noiseSounds.clear(unknownSound.getTimeRange());
|
||||
}
|
||||
}
|
||||
// Remove undesired elements
|
||||
const centiseconds minSoundDuration = 12_cs;
|
||||
for (const auto& unknownSound : JoiningTimeline<void>(noiseSounds)) {
|
||||
const bool startsAtZero = unknownSound.getStart() == 0_cs;
|
||||
const bool tooShort = unknownSound.getDuration() < minSoundDuration;
|
||||
if (startsAtZero || tooShort) {
|
||||
noiseSounds.clear(unknownSound.getTimeRange());
|
||||
}
|
||||
}
|
||||
|
||||
return noiseSounds;
|
||||
return noiseSounds;
|
||||
}
|
||||
|
||||
BoundedTimeline<string> recognizeWords(const vector<int16_t>& audioBuffer, ps_decoder_t& decoder) {
|
||||
// Restart timing at 0
|
||||
ps_start_stream(&decoder);
|
||||
// Restart timing at 0
|
||||
ps_start_stream(&decoder);
|
||||
|
||||
// Start recognition
|
||||
int error = ps_start_utt(&decoder);
|
||||
if (error) throw runtime_error("Error starting utterance processing for word recognition.");
|
||||
// Start recognition
|
||||
int error = ps_start_utt(&decoder);
|
||||
if (error) throw runtime_error("Error starting utterance processing for word recognition.");
|
||||
|
||||
// Process entire audio clip
|
||||
const bool noRecognition = false;
|
||||
const bool fullUtterance = true;
|
||||
const int searchedFrameCount =
|
||||
ps_process_raw(&decoder, audioBuffer.data(), audioBuffer.size(), noRecognition, fullUtterance);
|
||||
if (searchedFrameCount < 0) {
|
||||
throw runtime_error("Error analyzing raw audio data for word recognition.");
|
||||
}
|
||||
// Process entire audio clip
|
||||
const bool noRecognition = false;
|
||||
const bool fullUtterance = true;
|
||||
const int searchedFrameCount =
|
||||
ps_process_raw(&decoder, audioBuffer.data(), audioBuffer.size(), noRecognition, fullUtterance);
|
||||
if (searchedFrameCount < 0) {
|
||||
throw runtime_error("Error analyzing raw audio data for word recognition.");
|
||||
}
|
||||
|
||||
// End recognition
|
||||
error = ps_end_utt(&decoder);
|
||||
if (error) throw runtime_error("Error ending utterance processing for word recognition.");
|
||||
// End recognition
|
||||
error = ps_end_utt(&decoder);
|
||||
if (error) throw runtime_error("Error ending utterance processing for word recognition.");
|
||||
|
||||
BoundedTimeline<string> result(
|
||||
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
|
||||
);
|
||||
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
|
||||
if (!phonetic) {
|
||||
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each
|
||||
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
|
||||
// "Couldn't find <s> in first frame".
|
||||
// Not every utterance does contain speech, however. In this case, we exit early to prevent
|
||||
// the log output.
|
||||
// We *don't* to that in phonetic mode because here, the same code would omit valid phones.
|
||||
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
||||
if (noWordsRecognized) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
BoundedTimeline<string> result(
|
||||
TimeRange(0_cs, centiseconds(100 * audioBuffer.size() / sphinxSampleRate))
|
||||
);
|
||||
const bool phonetic = cmd_ln_boolean_r(decoder.config, "-allphone_ci");
|
||||
if (!phonetic) {
|
||||
// If the decoder is in word mode (as opposed to phonetic recognition), it expects each
|
||||
// utterance to contain speech. If it doesn't, ps_seg_word() logs the annoying error
|
||||
// "Couldn't find <s> in first frame".
|
||||
// Not every utterance does contain speech, however. In this case, we exit early to prevent
|
||||
// the log output.
|
||||
// We *don't* to that in phonetic mode because here, the same code would omit valid phones.
|
||||
const bool noWordsRecognized = reinterpret_cast<ngram_search_t*>(decoder.search)->bpidx == 0;
|
||||
if (noWordsRecognized) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Collect words
|
||||
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
|
||||
const char* word = ps_seg_word(it);
|
||||
int firstFrame, lastFrame;
|
||||
ps_seg_frames(it, &firstFrame, &lastFrame);
|
||||
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
|
||||
}
|
||||
// Collect words
|
||||
for (ps_seg_t* it = ps_seg_iter(&decoder); it; it = ps_seg_next(it)) {
|
||||
const char* word = ps_seg_word(it);
|
||||
int firstFrame, lastFrame;
|
||||
ps_seg_frames(it, &firstFrame, &lastFrame);
|
||||
result.set(centiseconds(firstFrame), centiseconds(lastFrame + 1), word);
|
||||
}
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -11,23 +11,23 @@ extern "C" {
|
|||
}
|
||||
|
||||
typedef std::function<lambda_unique_ptr<ps_decoder_t>(
|
||||
boost::optional<std::string> dialog
|
||||
boost::optional<std::string> dialog
|
||||
)> decoderFactory;
|
||||
|
||||
typedef std::function<Timeline<Phone>(
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utteranceTimeRange,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& utteranceProgressSink
|
||||
const AudioClip& audioClip,
|
||||
TimeRange utteranceTimeRange,
|
||||
ps_decoder_t& decoder,
|
||||
ProgressSink& utteranceProgressSink
|
||||
)> utteranceToPhonesFunction;
|
||||
|
||||
BoundedTimeline<Phone> recognizePhones(
|
||||
const AudioClip& inputAudioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
decoderFactory createDecoder,
|
||||
utteranceToPhonesFunction utteranceToPhones,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
const AudioClip& inputAudioClip,
|
||||
boost::optional<std::string> dialog,
|
||||
decoderFactory createDecoder,
|
||||
utteranceToPhonesFunction utteranceToPhones,
|
||||
int maxThreadCount,
|
||||
ProgressSink& progressSink
|
||||
);
|
||||
|
||||
constexpr int sphinxSampleRate = 16000;
|
||||
|
@ -37,6 +37,6 @@ const std::filesystem::path& getSphinxModelDirectory();
|
|||
JoiningTimeline<void> getNoiseSounds(TimeRange utteranceTimeRange, const Timeline<Phone>& phones);
|
||||
|
||||
BoundedTimeline<std::string> recognizeWords(
|
||||
const std::vector<int16_t>& audioBuffer,
|
||||
ps_decoder_t& decoder
|
||||
const std::vector<int16_t>& audioBuffer,
|
||||
ps_decoder_t& decoder
|
||||
);
|
||||
|
|
|
@ -19,117 +19,117 @@ using boost::optional;
|
|||
using std::function;
|
||||
|
||||
lambda_unique_ptr<cst_voice> createDummyVoice() {
|
||||
lambda_unique_ptr<cst_voice> voice(new_voice(), [](cst_voice* voice) { delete_voice(voice); });
|
||||
voice->name = "dummy_voice";
|
||||
usenglish_init(voice.get());
|
||||
cst_lexicon* lexicon = cmu_lex_init();
|
||||
feat_set(voice->features, "lexicon", lexicon_val(lexicon));
|
||||
return voice;
|
||||
lambda_unique_ptr<cst_voice> voice(new_voice(), [](cst_voice* voice) { delete_voice(voice); });
|
||||
voice->name = "dummy_voice";
|
||||
usenglish_init(voice.get());
|
||||
cst_lexicon* lexicon = cmu_lex_init();
|
||||
feat_set(voice->features, "lexicon", lexicon_val(lexicon));
|
||||
return voice;
|
||||
}
|
||||
|
||||
static const cst_synth_module synth_method_normalize[] = {
|
||||
{ "tokenizer_func", default_tokenization }, // split text into tokens
|
||||
{ "textanalysis_func", default_textanalysis }, // transform tokens into words
|
||||
{ nullptr, nullptr }
|
||||
{ "tokenizer_func", default_tokenization }, // split text into tokens
|
||||
{ "textanalysis_func", default_textanalysis }, // transform tokens into words
|
||||
{ nullptr, nullptr }
|
||||
};
|
||||
|
||||
vector<string> tokenizeViaFlite(const string& text) {
|
||||
// Convert text to ASCII
|
||||
const string asciiText = utf8ToAscii(text);
|
||||
// Convert text to ASCII
|
||||
const string asciiText = utf8ToAscii(text);
|
||||
|
||||
// Create utterance object with text
|
||||
lambda_unique_ptr<cst_utterance> utterance(
|
||||
new_utterance(),
|
||||
[](cst_utterance* utterance) { delete_utterance(utterance); }
|
||||
);
|
||||
utt_set_input_text(utterance.get(), asciiText.c_str());
|
||||
lambda_unique_ptr<cst_voice> voice = createDummyVoice();
|
||||
utt_init(utterance.get(), voice.get());
|
||||
// Create utterance object with text
|
||||
lambda_unique_ptr<cst_utterance> utterance(
|
||||
new_utterance(),
|
||||
[](cst_utterance* utterance) { delete_utterance(utterance); }
|
||||
);
|
||||
utt_set_input_text(utterance.get(), asciiText.c_str());
|
||||
lambda_unique_ptr<cst_voice> voice = createDummyVoice();
|
||||
utt_init(utterance.get(), voice.get());
|
||||
|
||||
// Perform tokenization and text normalization
|
||||
if (!apply_synth_method(utterance.get(), synth_method_normalize)) {
|
||||
throw runtime_error("Error normalizing text using Flite.");
|
||||
}
|
||||
// Perform tokenization and text normalization
|
||||
if (!apply_synth_method(utterance.get(), synth_method_normalize)) {
|
||||
throw runtime_error("Error normalizing text using Flite.");
|
||||
}
|
||||
|
||||
vector<string> result;
|
||||
for (
|
||||
cst_item* item = relation_head(utt_relation(utterance.get(), "Word"));
|
||||
item;
|
||||
item = item_next(item)
|
||||
) {
|
||||
const char* word = item_feat_string(item, "name");
|
||||
result.emplace_back(word);
|
||||
}
|
||||
return result;
|
||||
vector<string> result;
|
||||
for (
|
||||
cst_item* item = relation_head(utt_relation(utterance.get(), "Word"));
|
||||
item;
|
||||
item = item_next(item)
|
||||
) {
|
||||
const char* word = item_feat_string(item, "name");
|
||||
result.emplace_back(word);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
optional<string> findSimilarDictionaryWord(
|
||||
const string& word,
|
||||
const function<bool(const string&)>& dictionaryContains
|
||||
const string& word,
|
||||
const function<bool(const string&)>& dictionaryContains
|
||||
) {
|
||||
for (bool addPeriod : { false, true }) {
|
||||
for (int apostropheIndex = -1; apostropheIndex <= static_cast<int>(word.size()); ++apostropheIndex) {
|
||||
string modified = word;
|
||||
if (apostropheIndex != -1) {
|
||||
modified.insert(apostropheIndex, "'");
|
||||
}
|
||||
if (addPeriod) {
|
||||
modified += ".";
|
||||
}
|
||||
for (bool addPeriod : { false, true }) {
|
||||
for (int apostropheIndex = -1; apostropheIndex <= static_cast<int>(word.size()); ++apostropheIndex) {
|
||||
string modified = word;
|
||||
if (apostropheIndex != -1) {
|
||||
modified.insert(apostropheIndex, "'");
|
||||
}
|
||||
if (addPeriod) {
|
||||
modified += ".";
|
||||
}
|
||||
|
||||
if (dictionaryContains(modified)) {
|
||||
return modified;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dictionaryContains(modified)) {
|
||||
return modified;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return boost::none;
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
vector<string> tokenizeText(
|
||||
const string& text,
|
||||
const function<bool(const string&)>& dictionaryContains
|
||||
const string& text,
|
||||
const function<bool(const string&)>& dictionaryContains
|
||||
) {
|
||||
vector<string> words = tokenizeViaFlite(text);
|
||||
vector<string> words = tokenizeViaFlite(text);
|
||||
|
||||
// Join words separated by apostrophes
|
||||
for (int i = static_cast<int>(words.size()) - 1; i > 0; --i) {
|
||||
if (!words[i].empty() && words[i][0] == '\'') {
|
||||
words[i - 1].append(words[i]);
|
||||
words.erase(words.begin() + i);
|
||||
}
|
||||
}
|
||||
// Join words separated by apostrophes
|
||||
for (int i = static_cast<int>(words.size()) - 1; i > 0; --i) {
|
||||
if (!words[i].empty() && words[i][0] == '\'') {
|
||||
words[i - 1].append(words[i]);
|
||||
words.erase(words.begin() + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Turn some symbols into words, remove the rest
|
||||
const static vector<pair<regex, string>> replacements {
|
||||
{ regex("&"), "and" },
|
||||
{ regex("\\*"), "times" },
|
||||
{ regex("\\+"), "plus" },
|
||||
{ regex("="), "equals" },
|
||||
{ regex("@"), "at" },
|
||||
{ regex("[^a-z']"), "" }
|
||||
};
|
||||
for (auto& word : words) {
|
||||
for (const auto& replacement : replacements) {
|
||||
word = regex_replace(word, replacement.first, replacement.second);
|
||||
}
|
||||
}
|
||||
// Turn some symbols into words, remove the rest
|
||||
const static vector<pair<regex, string>> replacements {
|
||||
{ regex("&"), "and" },
|
||||
{ regex("\\*"), "times" },
|
||||
{ regex("\\+"), "plus" },
|
||||
{ regex("="), "equals" },
|
||||
{ regex("@"), "at" },
|
||||
{ regex("[^a-z']"), "" }
|
||||
};
|
||||
for (auto& word : words) {
|
||||
for (const auto& replacement : replacements) {
|
||||
word = regex_replace(word, replacement.first, replacement.second);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove empty words
|
||||
words.erase(
|
||||
std::remove_if(words.begin(), words.end(), [](const string& s) { return s.empty(); }),
|
||||
words.end()
|
||||
);
|
||||
// Remove empty words
|
||||
words.erase(
|
||||
std::remove_if(words.begin(), words.end(), [](const string& s) { return s.empty(); }),
|
||||
words.end()
|
||||
);
|
||||
|
||||
// Try to replace words that are not in the dictionary with similar ones that are
|
||||
for (auto& word : words) {
|
||||
if (!dictionaryContains(word)) {
|
||||
optional<string> modifiedWord = findSimilarDictionaryWord(word, dictionaryContains);
|
||||
if (modifiedWord) {
|
||||
word = *modifiedWord;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Try to replace words that are not in the dictionary with similar ones that are
|
||||
for (auto& word : words) {
|
||||
if (!dictionaryContains(word)) {
|
||||
optional<string> modifiedWord = findSimilarDictionaryWord(word, dictionaryContains);
|
||||
if (modifiedWord) {
|
||||
word = *modifiedWord;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return words;
|
||||
return words;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,6 @@
|
|||
#include <string>
|
||||
|
||||
std::vector<std::string> tokenizeText(
|
||||
const std::string& text,
|
||||
const std::function<bool(const std::string&)>& dictionaryContains
|
||||
const std::string& text,
|
||||
const std::function<bool(const std::string&)>& dictionaryContains
|
||||
);
|
||||
|
|
|
@ -3,27 +3,27 @@
|
|||
using std::string;
|
||||
|
||||
ExportFormatConverter& ExportFormatConverter::get() {
|
||||
static ExportFormatConverter converter;
|
||||
return converter;
|
||||
static ExportFormatConverter converter;
|
||||
return converter;
|
||||
}
|
||||
|
||||
string ExportFormatConverter::getTypeName() {
|
||||
return "ExportFormat";
|
||||
return "ExportFormat";
|
||||
}
|
||||
|
||||
EnumConverter<ExportFormat>::member_data ExportFormatConverter::getMemberData() {
|
||||
return member_data {
|
||||
{ ExportFormat::Dat, "dat" },
|
||||
{ ExportFormat::Tsv, "tsv" },
|
||||
{ ExportFormat::Xml, "xml" },
|
||||
{ ExportFormat::Json, "json" }
|
||||
};
|
||||
return member_data {
|
||||
{ ExportFormat::Dat, "dat" },
|
||||
{ ExportFormat::Tsv, "tsv" },
|
||||
{ ExportFormat::Xml, "xml" },
|
||||
{ ExportFormat::Json, "json" }
|
||||
};
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, ExportFormat value) {
|
||||
return ExportFormatConverter::get().write(stream, value);
|
||||
return ExportFormatConverter::get().write(stream, value);
|
||||
}
|
||||
|
||||
std::istream& operator>>(std::istream& stream, ExportFormat& value) {
|
||||
return ExportFormatConverter::get().read(stream, value);
|
||||
return ExportFormatConverter::get().read(stream, value);
|
||||
}
|
||||
|
|
|
@ -3,18 +3,18 @@
|
|||
#include "tools/EnumConverter.h"
|
||||
|
||||
enum class ExportFormat {
|
||||
Dat,
|
||||
Tsv,
|
||||
Xml,
|
||||
Json
|
||||
Dat,
|
||||
Tsv,
|
||||
Xml,
|
||||
Json
|
||||
};
|
||||
|
||||
class ExportFormatConverter : public EnumConverter<ExportFormat> {
|
||||
public:
|
||||
static ExportFormatConverter& get();
|
||||
static ExportFormatConverter& get();
|
||||
protected:
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, ExportFormat value);
|
||||
|
|
|
@ -3,25 +3,25 @@
|
|||
using std::string;
|
||||
|
||||
RecognizerTypeConverter& RecognizerTypeConverter::get() {
|
||||
static RecognizerTypeConverter converter;
|
||||
return converter;
|
||||
static RecognizerTypeConverter converter;
|
||||
return converter;
|
||||
}
|
||||
|
||||
string RecognizerTypeConverter::getTypeName() {
|
||||
return "RecognizerType";
|
||||
return "RecognizerType";
|
||||
}
|
||||
|
||||
EnumConverter<RecognizerType>::member_data RecognizerTypeConverter::getMemberData() {
|
||||
return member_data {
|
||||
{ RecognizerType::PocketSphinx, "pocketSphinx" },
|
||||
{ RecognizerType::Phonetic, "phonetic" }
|
||||
};
|
||||
return member_data {
|
||||
{ RecognizerType::PocketSphinx, "pocketSphinx" },
|
||||
{ RecognizerType::Phonetic, "phonetic" }
|
||||
};
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, RecognizerType value) {
|
||||
return RecognizerTypeConverter::get().write(stream, value);
|
||||
return RecognizerTypeConverter::get().write(stream, value);
|
||||
}
|
||||
|
||||
std::istream& operator>>(std::istream& stream, RecognizerType& value) {
|
||||
return RecognizerTypeConverter::get().read(stream, value);
|
||||
return RecognizerTypeConverter::get().read(stream, value);
|
||||
}
|
||||
|
|
|
@ -3,16 +3,16 @@
|
|||
#include "tools/EnumConverter.h"
|
||||
|
||||
enum class RecognizerType {
|
||||
PocketSphinx,
|
||||
Phonetic
|
||||
PocketSphinx,
|
||||
Phonetic
|
||||
};
|
||||
|
||||
class RecognizerTypeConverter : public EnumConverter<RecognizerType> {
|
||||
public:
|
||||
static RecognizerTypeConverter& get();
|
||||
static RecognizerTypeConverter& get();
|
||||
protected:
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
std::string getTypeName() override;
|
||||
member_data getMemberData() override;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, RecognizerType value);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue