2015-12-29 15:26:01 +00:00
|
|
|
#include "stringTools.h"
|
2015-12-29 10:47:22 +00:00
|
|
|
#include <boost/algorithm/string/trim.hpp>
|
2016-06-03 19:07:49 +00:00
|
|
|
#include <codecvt>
|
2015-12-29 10:47:22 +00:00
|
|
|
|
|
|
|
using std::string;
|
2016-06-02 20:21:37 +00:00
|
|
|
using std::wstring;
|
2016-06-02 18:09:37 +00:00
|
|
|
using std::u32string;
|
2015-12-29 10:47:22 +00:00
|
|
|
using std::vector;
|
2016-06-02 18:09:37 +00:00
|
|
|
using boost::optional;
|
2015-12-29 10:47:22 +00:00
|
|
|
|
|
|
|
vector<string> splitIntoLines(const string& s) {
|
|
|
|
vector<string> lines;
|
2016-02-01 19:47:27 +00:00
|
|
|
auto p = &s[0];
|
|
|
|
auto lineBegin = p;
|
|
|
|
auto end = p + s.size();
|
2015-12-29 10:47:22 +00:00
|
|
|
// Iterate over input string
|
2016-02-01 19:47:27 +00:00
|
|
|
while (p <= end) {
|
2015-12-29 10:47:22 +00:00
|
|
|
// Add a new result line when we hit a \n character or the end of the string
|
2016-02-01 19:47:27 +00:00
|
|
|
if (p == end || *p == '\n') {
|
|
|
|
string line(lineBegin, p);
|
2015-12-29 10:47:22 +00:00
|
|
|
// Trim \r characters
|
|
|
|
boost::algorithm::trim_if(line, [](char c) { return c == '\r'; });
|
|
|
|
lines.push_back(line);
|
2016-02-01 19:47:27 +00:00
|
|
|
lineBegin = p + 1;
|
2015-12-29 10:47:22 +00:00
|
|
|
}
|
2016-02-01 19:47:27 +00:00
|
|
|
++p;
|
2015-12-29 10:47:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return lines;
|
|
|
|
}
|
|
|
|
|
2016-04-12 19:23:15 +00:00
|
|
|
vector<string> wrapSingleLineString(const string& s, int lineLength, int hangingIndent) {
|
2015-12-29 10:47:22 +00:00
|
|
|
if (lineLength <= 0) throw std::invalid_argument("lineLength must be > 0.");
|
2016-04-12 19:23:15 +00:00
|
|
|
if (hangingIndent < 0) throw std::invalid_argument("hangingIndent must be >= 0.");
|
|
|
|
if (hangingIndent >= lineLength) throw std::invalid_argument("hangingIndent must be < lineLength.");
|
2015-12-29 10:47:22 +00:00
|
|
|
if (s.find('\t') != std::string::npos) throw std::invalid_argument("s must not contain tabs.");
|
|
|
|
if (s.find('\n') != std::string::npos) throw std::invalid_argument("s must not contain line breaks.");
|
|
|
|
|
|
|
|
vector<string> lines;
|
2016-02-01 19:47:27 +00:00
|
|
|
auto p = &s[0];
|
|
|
|
auto lineBegin = p;
|
|
|
|
auto lineEnd = p;
|
|
|
|
auto end = p + s.size();
|
2015-12-29 10:47:22 +00:00
|
|
|
// Iterate over input string
|
2016-02-01 19:47:27 +00:00
|
|
|
while (p <= end) {
|
2016-04-12 19:23:15 +00:00
|
|
|
// If we're at a word boundary: update lineEnd
|
|
|
|
if (p == end || *p == ' ' || *p == '|') {
|
2016-02-01 19:47:27 +00:00
|
|
|
lineEnd = p;
|
2015-12-29 10:47:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we've hit lineLength or the end of the string: add a new result line
|
2016-04-12 19:23:15 +00:00
|
|
|
int currentIndent = lines.empty() ? 0 : hangingIndent;
|
|
|
|
if (p == end || p - lineBegin == lineLength - currentIndent) {
|
2015-12-29 10:47:22 +00:00
|
|
|
if (lineEnd == lineBegin) {
|
|
|
|
// The line contains a single word, which is too long. Split mid-word.
|
2016-02-01 19:47:27 +00:00
|
|
|
lineEnd = p;
|
2015-12-29 10:47:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add trimmed line to list
|
|
|
|
string line(lineBegin, lineEnd);
|
|
|
|
boost::algorithm::trim_right(line);
|
2016-04-12 19:23:15 +00:00
|
|
|
lines.push_back(string(currentIndent, ' ') + line);
|
2015-12-29 10:47:22 +00:00
|
|
|
|
|
|
|
// Resume after the last line, skipping spaces
|
2016-02-01 19:47:27 +00:00
|
|
|
p = lineEnd;
|
|
|
|
while (p != end && *p == ' ') ++p;
|
|
|
|
lineBegin = lineEnd = p;
|
2015-12-29 10:47:22 +00:00
|
|
|
}
|
|
|
|
|
2016-02-01 19:47:27 +00:00
|
|
|
++p;
|
2015-12-29 10:47:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return lines;
|
|
|
|
}
|
|
|
|
|
2016-04-12 19:23:15 +00:00
|
|
|
vector<string> wrapString(const string& s, int lineLength, int hangingIndent) {
|
2015-12-29 10:47:22 +00:00
|
|
|
vector<string> lines;
|
|
|
|
for (string paragraph : splitIntoLines(s)) {
|
2016-04-12 19:23:15 +00:00
|
|
|
auto paragraphLines = wrapSingleLineString(paragraph, lineLength, hangingIndent);
|
2015-12-29 10:47:22 +00:00
|
|
|
copy(paragraphLines.cbegin(), paragraphLines.cend(), back_inserter(lines));
|
|
|
|
}
|
|
|
|
|
|
|
|
return lines;
|
|
|
|
}
|
|
|
|
|
2016-06-02 20:21:37 +00:00
|
|
|
wstring latin1ToWide(const string& s) {
|
|
|
|
wstring result;
|
|
|
|
for (unsigned char c : s) {
|
|
|
|
result.append(1, c);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-11-16 10:49:25 +00:00
|
|
|
optional<char> toAscii(char32_t ch) {
|
2016-06-02 18:09:37 +00:00
|
|
|
switch (ch) {
|
|
|
|
#include "asciiCases.cpp"
|
|
|
|
default:
|
|
|
|
return ch < 0x80 ? static_cast<char>(ch) : optional<char>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-16 10:49:25 +00:00
|
|
|
string toAscii(const u32string& s) {
|
2016-06-02 18:09:37 +00:00
|
|
|
string result;
|
|
|
|
for (char32_t ch : s) {
|
2016-11-16 10:49:25 +00:00
|
|
|
optional<char> ascii = toAscii(ch);
|
2016-06-02 18:09:37 +00:00
|
|
|
if (ascii) result.append(1, *ascii);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
2016-06-03 19:07:49 +00:00
|
|
|
|
2016-12-20 19:04:43 +00:00
|
|
|
string toAscii(const wstring& s) {
|
|
|
|
string result;
|
|
|
|
for (wchar_t ch : s) {
|
|
|
|
optional<char> ascii = toAscii(ch);
|
|
|
|
if (ascii) result.append(1, *ascii);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-06-03 19:07:49 +00:00
|
|
|
u32string utf8ToUtf32(const string& s) {
|
2016-06-16 07:36:33 +00:00
|
|
|
#if defined(_MSC_VER) && _MSC_VER <= 1900
|
|
|
|
// Workaround for Visual Studio 2015
|
|
|
|
// See https://connect.microsoft.com/VisualStudio/feedback/details/1403302/unresolved-external-when-using-codecvt-utf8
|
2016-06-03 19:07:49 +00:00
|
|
|
std::wstring_convert<std::codecvt_utf8<uint32_t>, uint32_t> convert;
|
|
|
|
return u32string(reinterpret_cast<const char32_t*>(convert.from_bytes(s).c_str()));
|
2016-06-16 07:36:33 +00:00
|
|
|
#else
|
|
|
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
|
|
|
|
return convert.from_bytes(s);
|
|
|
|
#endif
|
2016-06-03 19:07:49 +00:00
|
|
|
}
|