rhubarb-lip-sync/lib/utf8proc-2a2f97e1/test/graphemetest.c

#include "tests.h"

int main(int argc, char **argv)
{
    char *buf = NULL;
    size_t bufsize = 0;
    FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
    utf8proc_uint8_t src[1024];
    int len;
    
    check(f != NULL, "error opening GraphemeBreakTest.txt");
    while (getline(&buf, &bufsize, f) > 0) {
        size_t bi = 0, si = 0;
        lineno += 1;
        
        if (lineno % 100 == 0)
            printf("checking line %zd...\n", lineno);
        
        if (buf[0] == '#') continue;
        
        while (buf[bi]) {
            bi = skipspaces(buf, bi);
            if (buf[bi] == '/') { /* grapheme break */
                src[si++] = '/';
                bi++;
            }
            else if (buf[bi] == '+') { /* no break */
                bi++;
            }
            else if (buf[bi] == '#') { /* start of comments */
                break;
            }
	    else { /* hex-encoded codepoint */
                len = encode((char*) (src + si), buf + bi) - 1;
                while (src[si]) ++si; /* advance to NUL termination */
                bi += len;
            }
        }
        if (si && src[si-1] == '/')
            --si; /* no break after final grapheme */
        src[si] = 0; /* NUL-terminate */
        
        if (si) {
            utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
            size_t i = 0, j = 0;
            utf8proc_ssize_t glen;
            utf8proc_uint8_t *g; /* utf8proc_map grapheme results */
            while (i < si) {
                if (src[i] != '/')
                    utf8[j++] = src[i++];
                else
                    i++;
            }
            glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
            if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
                 /* the test file contains surrogate codepoints, which are only for UTF-16 */
                 printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
            }
            else {
                 check(glen >= 0, "utf8proc_map error = %s",
                       utf8proc_errmsg(glen));
                 for (i = 0; i <= glen; ++i)
                      if (g[i] == 0xff)
                          g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
                 check(!strcmp((char*)g, (char*)src),
                       "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
            }
            free(g);
        }
    }
    fclose(f);
    printf("Passed tests after %zd lines!\n", lineno);
    return 0;
}
Added utf8proc library 2017-08-01 17:10:03 +00:00			`#include "tests.h"`

			`int main(int argc, char **argv)`
			`{`
			`char *buf = NULL;`
			`size_t bufsize = 0;`
			`FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;`
			`utf8proc_uint8_t src[1024];`
			`int len;`

			`check(f != NULL, "error opening GraphemeBreakTest.txt");`
			`while (getline(&buf, &bufsize, f) > 0) {`
			`size_t bi = 0, si = 0;`
			`lineno += 1;`

			`if (lineno % 100 == 0)`
			`printf("checking line %zd...\n", lineno);`

			`if (buf[0] == '#') continue;`

			`while (buf[bi]) {`
			`bi = skipspaces(buf, bi);`
			`if (buf[bi] == '/') { /* grapheme break */`
			`src[si++] = '/';`
			`bi++;`
			`}`
			`else if (buf[bi] == '+') { /* no break */`
			`bi++;`
			`}`
			`else if (buf[bi] == '#') { /* start of comments */`
			`break;`
			`}`
			`else { /* hex-encoded codepoint */`
			`len = encode((char*) (src + si), buf + bi) - 1;`
			`while (src[si]) ++si; /* advance to NUL termination */`
			`bi += len;`
			`}`
			`}`
			`if (si && src[si-1] == '/')`
			`--si; /* no break after final grapheme */`
			`src[si] = 0; /* NUL-terminate */`

			`if (si) {`
			`utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */`
			`size_t i = 0, j = 0;`
			`utf8proc_ssize_t glen;`
			`utf8proc_uint8_t g; / utf8proc_map grapheme results */`
			`while (i < si) {`
			`if (src[i] != '/')`
			`utf8[j++] = src[i++];`
			`else`
			`i++;`
			`}`
			`glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);`
			`if (glen == UTF8PROC_ERROR_INVALIDUTF8) {`
			`/* the test file contains surrogate codepoints, which are only for UTF-16 */`
			`printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);`
			`}`
			`else {`
			`check(glen >= 0, "utf8proc_map error = %s",`
			`utf8proc_errmsg(glen));`
			`for (i = 0; i <= glen; ++i)`
			`if (g[i] == 0xff)`
			`g[i] = '/'; /* easier-to-read output (/ is not in test strings) */`
			`check(!strcmp((char)g, (char)src),`
			`"grapheme mismatch: \"%s\" instead of \"%s\"", (char)g, (char)src);`
			`}`
			`free(g);`
			`}`
			`}`
			`fclose(f);`
			`printf("Passed tests after %zd lines!\n", lineno);`
			`return 0;`
			`}`