Added a number of string-related tools.

This commit is contained in:
Daniel Wolf 2015-12-29 11:47:22 +01:00
parent 3c63c9c58b
commit acd13e2890
4 changed files with 169 additions and 1 deletions

View File

@ -99,6 +99,7 @@ set(SOURCE_FILES
src/audio_input/SampleRateConverter.cpp
src/audio_input/WaveFileReader.cpp
src/audio_input/wave_file_writing.cpp
src/string_tools.cpp
)
add_executable(rhubarb ${SOURCE_FILES})
target_link_libraries(rhubarb ${Boost_LIBRARIES} cppFormat sphinxbase pocketSphinx)
@ -106,7 +107,10 @@ target_compile_options(rhubarb PUBLIC ${enableWarningsFlags})
# Define test project
#include_directories("${gtest_SOURCE_DIR}/include")
set(TEST_FILES "")
set(TEST_FILES
tests/string_tools_tests.cpp
src/string_tools.cpp
)
add_executable(runTests ${TEST_FILES})
target_link_libraries(runTests gtest gmock gmock_main)

78
src/string_tools.cpp Normal file
View File

@ -0,0 +1,78 @@
#include "string_tools.h"
#include <boost/algorithm/string/trim.hpp>
using std::string;
using std::vector;
vector<string> splitIntoLines(const string& s) {
vector<string> lines;
auto it = s.cbegin();
auto lineBegin = it;
auto end = s.cend();
// Iterate over input string
while (it <= end) {
// Add a new result line when we hit a \n character or the end of the string
if (it == end || *it == '\n') {
string line(lineBegin, it);
// Trim \r characters
boost::algorithm::trim_if(line, [](char c) { return c == '\r'; });
lines.push_back(line);
lineBegin = it + 1;
}
++it;
}
return lines;
}
vector<string> wrapSingleLineString(const string& s, int lineLength) {
if (lineLength <= 0) throw std::invalid_argument("lineLength must be > 0.");
if (s.find('\t') != std::string::npos) throw std::invalid_argument("s must not contain tabs.");
if (s.find('\n') != std::string::npos) throw std::invalid_argument("s must not contain line breaks.");
vector<string> lines;
auto it = s.cbegin();
auto lineBegin = it;
auto lineEnd = it;
auto end = s.cend();
// Iterate over input string
while (it <= end) {
// If we're at a word boundary: update safeLineEnd
if (it == end || *it == ' ') {
lineEnd = it;
}
// If we've hit lineLength or the end of the string: add a new result line
if (it == end || it - lineBegin == lineLength) {
if (lineEnd == lineBegin) {
// The line contains a single word, which is too long. Split mid-word.
lineEnd = it;
}
// Add trimmed line to list
string line(lineBegin, lineEnd);
boost::algorithm::trim_right(line);
lines.push_back(line);
// Resume after the last line, skipping spaces
it = lineEnd;
while (it != end && *it == ' ') ++it;
lineBegin = lineEnd = it;
}
++it;
}
return lines;
}
vector<string> wrapString(const string& s, int lineLength) {
vector<string> lines;
for (string paragraph : splitIntoLines(s)) {
auto paragraphLines = wrapSingleLineString(paragraph, lineLength);
copy(paragraphLines.cbegin(), paragraphLines.cend(), back_inserter(lines));
}
return lines;
}

13
src/string_tools.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef RHUBARB_LIP_SYNC_STRING_TOOLS_H
#define RHUBARB_LIP_SYNC_STRING_TOOLS_H
#include <string>
#include <vector>
std::vector<std::string> splitIntoLines(const std::string& s);
std::vector<std::string> wrapSingleLineString(const std::string& s, int lineLength);
std::vector<std::string> wrapString(const std::string& s, int lineLength);
#endif //RHUBARB_LIP_SYNC_STRING_TOOLS_H

View File

@ -0,0 +1,73 @@
#include <gmock/gmock.h>
#include "string_tools.h"
using namespace testing;
// splitIntoLines
TEST(splitIntoLines, splitsOnLineBreaks) {
EXPECT_THAT(splitIntoLines("this\nis\r\na\r\ntest"), ElementsAre("this", "is", "a", "test"));
}
TEST(splitIntoLines, handlesEmptyElements) {
EXPECT_THAT(splitIntoLines("\n1\n\n\n2\n"), ElementsAre("", "1", "", "", "2", ""));
EXPECT_THAT(splitIntoLines("\n"), ElementsAre("", ""));
EXPECT_THAT(splitIntoLines(""), ElementsAre(""));
}
// wrapSingleLineString
TEST(wrapSingleLineString, basic) {
const char* lipsum = "Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua.";
EXPECT_THAT(wrapSingleLineString(lipsum, 30), ElementsAre("Lorem ipsum dolor sit amet,", "consectetur adipisici elit,", "sed eiusmod tempor incidunt ut", "labore et dolore magna aliqua."));
}
TEST(wrapSingleLineString, preciseWrapPosition) {
const char* test = "a b c";
EXPECT_THAT(wrapSingleLineString(test, 5), ElementsAre("a b c"));
EXPECT_THAT(wrapSingleLineString(test, 4), ElementsAre("a b", "c"));
EXPECT_THAT(wrapSingleLineString(test, 3), ElementsAre("a b", "c"));
EXPECT_THAT(wrapSingleLineString(test, 2), ElementsAre("a", "b", "c"));
EXPECT_THAT(wrapSingleLineString(test, 1), ElementsAre("a", "b", "c"));
}
TEST(wrapSingleLineString, overlongLines) {
EXPECT_THAT(wrapSingleLineString("aaa bbbb ccc", 3), ElementsAre("aaa", "bbb", "b", "ccc"));
EXPECT_THAT(wrapSingleLineString("aaa bbbb c", 3), ElementsAre("aaa", "bbb", "b c"));
EXPECT_THAT(wrapSingleLineString("a bbbb c", 5), ElementsAre("a", "bbbb", "c"));
EXPECT_THAT(wrapSingleLineString("aa b", 1), ElementsAre("a", "a", "b"));
}
TEST(wrapSingleLineString, discardingSpacesAtWrapPositionsAndEnd) {
const char* test = " a b c ";
EXPECT_THAT(wrapSingleLineString(test, 20), ElementsAre(" a b c"));
EXPECT_THAT(wrapSingleLineString(test, 6), ElementsAre(" a b", "c"));
EXPECT_THAT(wrapSingleLineString(test, 5), ElementsAre(" a", "b c"));
EXPECT_THAT(wrapSingleLineString(test, 4), ElementsAre(" a", "b c"));
EXPECT_THAT(wrapSingleLineString(test, 3), ElementsAre(" a", "b", "c"));
EXPECT_THAT(wrapSingleLineString(test, 2), ElementsAre("", "a", "b", "c"));
}
TEST(wrapSingleLineString, errorHandling) {
EXPECT_NO_THROW(wrapSingleLineString("test", 1));
EXPECT_NO_THROW(wrapSingleLineString("", 1));
// Throw if lineLength < 1
EXPECT_ANY_THROW(wrapSingleLineString("test", 0));
EXPECT_ANY_THROW(wrapSingleLineString("", 0));
EXPECT_ANY_THROW(wrapSingleLineString("test", -1));
EXPECT_ANY_THROW(wrapSingleLineString("", -1));
// Throw if string contains tabs
EXPECT_ANY_THROW(wrapSingleLineString("a\tb", 10));
// Throw if string contains line breaks
EXPECT_ANY_THROW(wrapSingleLineString("a\nb", 10));
}
// wrapString
TEST(wrapString, basic) {
EXPECT_THAT(wrapString("\n\nLine no 3\n\nLine no 4\n", 8), ElementsAre("", "", "Line no", "3", "", "Line no", "4", ""));
}