Added a number of string-related tools.
This commit is contained in:
parent
3c63c9c58b
commit
acd13e2890
|
@ -99,6 +99,7 @@ set(SOURCE_FILES
|
||||||
src/audio_input/SampleRateConverter.cpp
|
src/audio_input/SampleRateConverter.cpp
|
||||||
src/audio_input/WaveFileReader.cpp
|
src/audio_input/WaveFileReader.cpp
|
||||||
src/audio_input/wave_file_writing.cpp
|
src/audio_input/wave_file_writing.cpp
|
||||||
|
src/string_tools.cpp
|
||||||
)
|
)
|
||||||
add_executable(rhubarb ${SOURCE_FILES})
|
add_executable(rhubarb ${SOURCE_FILES})
|
||||||
target_link_libraries(rhubarb ${Boost_LIBRARIES} cppFormat sphinxbase pocketSphinx)
|
target_link_libraries(rhubarb ${Boost_LIBRARIES} cppFormat sphinxbase pocketSphinx)
|
||||||
|
@ -106,7 +107,10 @@ target_compile_options(rhubarb PUBLIC ${enableWarningsFlags})
|
||||||
|
|
||||||
# Define test project
|
# Define test project
|
||||||
#include_directories("${gtest_SOURCE_DIR}/include")
|
#include_directories("${gtest_SOURCE_DIR}/include")
|
||||||
set(TEST_FILES "")
|
set(TEST_FILES
|
||||||
|
tests/string_tools_tests.cpp
|
||||||
|
src/string_tools.cpp
|
||||||
|
)
|
||||||
add_executable(runTests ${TEST_FILES})
|
add_executable(runTests ${TEST_FILES})
|
||||||
target_link_libraries(runTests gtest gmock gmock_main)
|
target_link_libraries(runTests gtest gmock gmock_main)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,78 @@
|
||||||
|
#include "string_tools.h"
|
||||||
|
#include <boost/algorithm/string/trim.hpp>
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
|
vector<string> splitIntoLines(const string& s) {
|
||||||
|
vector<string> lines;
|
||||||
|
auto it = s.cbegin();
|
||||||
|
auto lineBegin = it;
|
||||||
|
auto end = s.cend();
|
||||||
|
// Iterate over input string
|
||||||
|
while (it <= end) {
|
||||||
|
// Add a new result line when we hit a \n character or the end of the string
|
||||||
|
if (it == end || *it == '\n') {
|
||||||
|
string line(lineBegin, it);
|
||||||
|
// Trim \r characters
|
||||||
|
boost::algorithm::trim_if(line, [](char c) { return c == '\r'; });
|
||||||
|
lines.push_back(line);
|
||||||
|
lineBegin = it + 1;
|
||||||
|
}
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<string> wrapSingleLineString(const string& s, int lineLength) {
|
||||||
|
if (lineLength <= 0) throw std::invalid_argument("lineLength must be > 0.");
|
||||||
|
if (s.find('\t') != std::string::npos) throw std::invalid_argument("s must not contain tabs.");
|
||||||
|
if (s.find('\n') != std::string::npos) throw std::invalid_argument("s must not contain line breaks.");
|
||||||
|
|
||||||
|
vector<string> lines;
|
||||||
|
auto it = s.cbegin();
|
||||||
|
auto lineBegin = it;
|
||||||
|
auto lineEnd = it;
|
||||||
|
auto end = s.cend();
|
||||||
|
// Iterate over input string
|
||||||
|
while (it <= end) {
|
||||||
|
// If we're at a word boundary: update safeLineEnd
|
||||||
|
if (it == end || *it == ' ') {
|
||||||
|
lineEnd = it;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've hit lineLength or the end of the string: add a new result line
|
||||||
|
if (it == end || it - lineBegin == lineLength) {
|
||||||
|
if (lineEnd == lineBegin) {
|
||||||
|
// The line contains a single word, which is too long. Split mid-word.
|
||||||
|
lineEnd = it;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add trimmed line to list
|
||||||
|
string line(lineBegin, lineEnd);
|
||||||
|
boost::algorithm::trim_right(line);
|
||||||
|
lines.push_back(line);
|
||||||
|
|
||||||
|
// Resume after the last line, skipping spaces
|
||||||
|
it = lineEnd;
|
||||||
|
while (it != end && *it == ' ') ++it;
|
||||||
|
lineBegin = lineEnd = it;
|
||||||
|
}
|
||||||
|
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<string> wrapString(const string& s, int lineLength) {
|
||||||
|
vector<string> lines;
|
||||||
|
for (string paragraph : splitIntoLines(s)) {
|
||||||
|
auto paragraphLines = wrapSingleLineString(paragraph, lineLength);
|
||||||
|
copy(paragraphLines.cbegin(), paragraphLines.cend(), back_inserter(lines));
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
#ifndef RHUBARB_LIP_SYNC_STRING_TOOLS_H
|
||||||
|
#define RHUBARB_LIP_SYNC_STRING_TOOLS_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
std::vector<std::string> splitIntoLines(const std::string& s);
|
||||||
|
|
||||||
|
std::vector<std::string> wrapSingleLineString(const std::string& s, int lineLength);
|
||||||
|
|
||||||
|
std::vector<std::string> wrapString(const std::string& s, int lineLength);
|
||||||
|
|
||||||
|
#endif //RHUBARB_LIP_SYNC_STRING_TOOLS_H
|
|
@ -0,0 +1,73 @@
|
||||||
|
#include <gmock/gmock.h>
|
||||||
|
#include "string_tools.h"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
|
||||||
|
// splitIntoLines
|
||||||
|
|
||||||
|
TEST(splitIntoLines, splitsOnLineBreaks) {
|
||||||
|
EXPECT_THAT(splitIntoLines("this\nis\r\na\r\ntest"), ElementsAre("this", "is", "a", "test"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(splitIntoLines, handlesEmptyElements) {
|
||||||
|
EXPECT_THAT(splitIntoLines("\n1\n\n\n2\n"), ElementsAre("", "1", "", "", "2", ""));
|
||||||
|
EXPECT_THAT(splitIntoLines("\n"), ElementsAre("", ""));
|
||||||
|
EXPECT_THAT(splitIntoLines(""), ElementsAre(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
// wrapSingleLineString
|
||||||
|
|
||||||
|
TEST(wrapSingleLineString, basic) {
|
||||||
|
const char* lipsum = "Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua.";
|
||||||
|
EXPECT_THAT(wrapSingleLineString(lipsum, 30), ElementsAre("Lorem ipsum dolor sit amet,", "consectetur adipisici elit,", "sed eiusmod tempor incidunt ut", "labore et dolore magna aliqua."));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(wrapSingleLineString, preciseWrapPosition) {
|
||||||
|
const char* test = "a b c";
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 5), ElementsAre("a b c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 4), ElementsAre("a b", "c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 3), ElementsAre("a b", "c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 2), ElementsAre("a", "b", "c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 1), ElementsAre("a", "b", "c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(wrapSingleLineString, overlongLines) {
|
||||||
|
EXPECT_THAT(wrapSingleLineString("aaa bbbb ccc", 3), ElementsAre("aaa", "bbb", "b", "ccc"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString("aaa bbbb c", 3), ElementsAre("aaa", "bbb", "b c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString("a bbbb c", 5), ElementsAre("a", "bbbb", "c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString("aa b", 1), ElementsAre("a", "a", "b"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(wrapSingleLineString, discardingSpacesAtWrapPositionsAndEnd) {
|
||||||
|
const char* test = " a b c ";
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 20), ElementsAre(" a b c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 6), ElementsAre(" a b", "c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 5), ElementsAre(" a", "b c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 4), ElementsAre(" a", "b c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 3), ElementsAre(" a", "b", "c"));
|
||||||
|
EXPECT_THAT(wrapSingleLineString(test, 2), ElementsAre("", "a", "b", "c"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(wrapSingleLineString, errorHandling) {
|
||||||
|
EXPECT_NO_THROW(wrapSingleLineString("test", 1));
|
||||||
|
EXPECT_NO_THROW(wrapSingleLineString("", 1));
|
||||||
|
|
||||||
|
// Throw if lineLength < 1
|
||||||
|
EXPECT_ANY_THROW(wrapSingleLineString("test", 0));
|
||||||
|
EXPECT_ANY_THROW(wrapSingleLineString("", 0));
|
||||||
|
EXPECT_ANY_THROW(wrapSingleLineString("test", -1));
|
||||||
|
EXPECT_ANY_THROW(wrapSingleLineString("", -1));
|
||||||
|
|
||||||
|
// Throw if string contains tabs
|
||||||
|
EXPECT_ANY_THROW(wrapSingleLineString("a\tb", 10));
|
||||||
|
|
||||||
|
// Throw if string contains line breaks
|
||||||
|
EXPECT_ANY_THROW(wrapSingleLineString("a\nb", 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
// wrapString
|
||||||
|
|
||||||
|
TEST(wrapString, basic) {
|
||||||
|
EXPECT_THAT(wrapString("\n\nLine no 3\n\nLine no 4\n", 8), ElementsAre("", "", "Line no", "3", "", "Line no", "4", ""));
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue