/* ** SOUNDS.C ** ** Sound Change Applier ** ** Copyright (C) 2000 by Mark Rosenfelder. ** This program may be freely used and modified for non-commercial purposes. ** See http://www.zompist.com/sounds.htm for documentation. */ #include #include #include #include #include #define TRUE 1 #define FALSE 0 static int printRules = 0; static int bracketOut = 0; static int printSourc = 1; static int toScreen = 1; #define MAXRULE 200 #define MAXCAT 50 static int nRule = 0; static char *Rule[MAXRULE]; static int nCat = 0; static char *Cat[MAXCAT]; /* ** ReadRules ** ** Read in the rules file *.sc for a given project. ** ** There are two types of rules: sound changes and category definitions. ** The former are stored in Rule[], the latter in Cat[]. ** ** The format of these rules is given under Transform(). */ int ReadRules( char *filestart ) { char filename[84]; char buffer[129]; char *s; int n; FILE *f; nRule = 0; nCat = 0; /* Open the file */ sprintf( filename, "%s.sc", filestart ); f = fopen( filename, "r" ); if (!f) { printf( "File %s could not be read in.\n\n", filename ); return(FALSE); } while (fgets( buffer, 129, f)) { if (strlen(buffer)) buffer[strlen(buffer)-1] = '\0'; s = malloc( strlen(buffer) + 1); if (s) strcpy( s, buffer ); if (buffer[0] != '*') { if (strchr( buffer, '/' )) Rule[nRule++] = s; else if (strchr( buffer, '=')) Cat[ nCat++] = s; } } fclose(f); if (nCat) { printf( "%i categories found\n", nCat ); #ifdef PRINT_RULES for (n = 0; n < nCat; n++) printf( "%s\n", Cat[n] ); printf( "\n" ); #endif } else printf( "No rules were found.\n\n" ); if (nRule) { printf( "%i rules found\n", nRule ); #ifdef PRINT_RULES for (n = 0; n < nRule; n++) printf( "%s\n", Rule[n] ); printf( "\n" ); #endif } else printf( "No rules were found.\n\n" ); return( nRule ); } /*ReadRules*/ /* ** Divide ** ** Divide a rule into source and target phoneme(s) and environment. ** That is, for a rule s1/s2/env ** create the three null-terminated strings s1, s2, and env. ** ** If this cannot be done, return FALSE. */ int Divide( char *Rule, char **s1, char **s2, char **env ) { size_t i; static char s1_str[20]; static char s2_str[20]; static char ev_str[50]; i = strcspn( Rule, "/" ); if (i == 0 || i > 19) return(FALSE); strncpy( s1_str, Rule, i ); s1_str[i] = '\0'; Rule += i + 1; i = strcspn( Rule, "/" ); if (i > 19) return(FALSE); if (i) strncpy( s2_str, Rule, i ); s2_str[i] = '\0'; Rule += i + 1; strcpy( ev_str, Rule ); *s1 = s1_str; *s2 = s2_str; *env = ev_str; return(TRUE); } /*Divide*/ /* ** TryCat ** ** See if a particular phoneme sequence is part of any category. ** (We try all the categories.) ** ** For instance, if we have 'a' in the source word and 'V' in the ** structural description, and a category V=aeiou, TryCat returns TRUE, ** and sets *n to the number of characters to skip. ** ** If we had 'b' instead, TryCat would return FALSE instead. ** ** If no category with the given identification (env) can be found, ** we return TRUE (continue looking), but set *n to 0. ** ** Warning: For now, we don't have a way to handle digraphs. ** ** We also return TRUE if */ int TryCat( char *env, char *word, int *n, int *catLoc ) { int c; char *catdef; if (*word == '\0') return(FALSE); for (c = 0; c < nCat; c++) { if (*env == *Cat[c]) { catdef = strchr( Cat[c], '=' ); if (strchr( catdef + 1, word[0] )) { *n = 1; *catLoc = strchr( Cat[c], word[0] ) - Cat[c]; return(TRUE); } else return(FALSE); } } *n = 0; return(TRUE); } /*TryCat*/ /* ** TryRule ** ** See if a rule s1->s2/env applies at position i in the given word. ** ** If it does, we pass back the index where s1 was found in the ** word, as well as s1 and s2, and return TRUE. ** ** Otherwise, we return FALSE, and pass garbage in the output variables. */ int TryRule( char *word, int i, char *Rule, int *n, char **s1, char **s2, char *varRep ) { int j, m, cont = 0; int catLoc; char *env; int optional = FALSE; *varRep = '\0'; if (!Divide( Rule, s1, s2, &env ) || !strchr( env, '_' )) return(FALSE); for (j = 0, cont = TRUE; cont && j < strlen(env); j++) { switch( env[j] ) { case '(': optional = TRUE; break; case ')': optional = FALSE; break; case '#': cont = j ? (i == strlen(word)) : (i == 0); break; case '_': cont = !strncmp( &word[i], *s1, strlen(*s1) ); if (cont) { *n = i; i += strlen(*s1); } else { cont = TryCat( *s1, &word[i], &m, &catLoc ); if (cont && m) { int c; *n = i; i += m; for (c = 0; c < nCat; c++) if ((*s2)[0] == Cat[c][0] && catLoc < strlen(Cat[c])) *varRep = Cat[c][catLoc]; } else if (cont) cont = FALSE; } break; default: cont = TryCat( &env[j], &word[i], &m, &catLoc ); if (cont && !m) { /* no category applied */ cont = i < strlen(word) && word[i] == env[j]; m = 1; } if (cont) i += m; if (!cont && optional) cont = TRUE; } } if (cont && printRules) printf( " %s->%s /%s applies to %s at %i\n", *s1, *s2, env, word, *n ); return(cont); } /*TryRule*/ /* ** Transform ** ** Apply the rules to a single word and return the result. ** ** The rules are stated in the form string1/string2/environment, e.g. ** f/h/#_V ** which states that f changes to h at the beginning of a word before a ** vowel. */ char *Transform( char *input ) { char inword[80]; static char outword[80]; char instr[10]; char *s1, *s2; int i; int r; int n; strcpy( inword, input ); /* Try to apply each rule in turn */ for (r = 0; r < nRule; r++) { /* Initialize output of this rule to null */ memset( outword, 0, 80 ); /* Check each position of the input word in turn */ i = 0; while (i < strlen(inword)) { char varRep = 0; if (TryRule( inword, i, Rule[r], &n, &s1, &s2, &varRep )) { /* Rule applies at inword[n] */ if (n) strncat( outword, &inword[i], n - i ); if (varRep) outword[strlen(outword)] = varRep; else if (strlen(s2)) strcat( outword, s2 ); i = n + strlen(s1); } else { /* Rule doesn't apply at this location */ outword[strlen(outword)] = inword[i++]; } } /* Output of one rule is input to next one */ strcpy( inword, outword ); } /* Return the output of the last rule */ return(outword); } /*Transform*/ /* ** DoWords ** ** Read in each word in turn from the input file, ** transform it according to the rules, ** and output it to the output file. ** ** This algorithm ensures that word files of any size can be processed. */ void DoWords( char *lexname, char *outname ) { char filename[84]; char inword[84]; int n = 0; FILE *f, *g; char *outword; sprintf( filename, "%s.lex", lexname ); f = fopen( filename, "r" ); if (!f) { printf( "File %s could not be read in.\n\n", filename ); return; } sprintf( filename, "%s.out", outname ); g = fopen( filename, "w" ); if (!g) { printf( "File %s could not be created.\n\n", filename ); fclose(f); return; } while (fgets( inword, 129, f)) { n++; if (strlen(inword)) inword[strlen(inword) - 1] = '\0'; outword = Transform(inword); if (!printSourc) { if (toScreen) printf( "%s\n", outword ); fprintf( g, "%s\n", outword ); } else if (bracketOut) { if (toScreen) printf( "%s \t[%s]\n", outword, inword ); fprintf( g, "%s \t[%s]\n", outword, inword ); } else { if (toScreen) printf( "%s --> %s\n", inword, outword ); fprintf( g, "%s --> %s\n", inword, outword ); } } fclose(f); fclose(g); printf( "%i word%s processed.\n", n, n == 1 ? "" : "s" ); } /*DoWords*/ /* ** MAIN ROUTINE ** ** Ask for name of project ** Read in rules and input words ** Apply transformations ** Output words ** */ main( int argc, char **argv ) { int once = FALSE; char lexicon[65] = "\0"; char rules[65] = "\0"; /* Read command line arguments */ int i; for (i = 1; i < argc; i++) { if (argv[i][0] == '-' && strlen(argv[i]) > 1) { switch (argv[i][1]) { case 'p': case 'P': printRules = 1; break; case 'b': case 'B': bracketOut = 1; break; case 'l': case 'L': printSourc = 0; break; case 'f': case 'F': toScreen = 0; break; } } else if (!lexicon[0]) strcpy( lexicon, argv[i] ); else strcpy( rules, argv[i] ); } once = lexicon[0] && rules[0]; printf( "\nSOUND CHANGE APPLIER\n(C) 1992,2000 by Mark Rosenfelder\nFor more information see www.zompist.com\n\n" ); if (once) { printf( "Applying %s.sc to %s.lex\n\n", lexicon, rules ); if (ReadRules( rules )) DoWords( lexicon, rules ); } else { int done = FALSE; while (!done) { printf( "\nEnter the name of a LEXICON.\n\n" ); printf( "For example, enter latin to specify latin.lex.\nEnter q to quit the program.\n-->" ); fgets( lexicon, 65, stdin ); if (strlen(lexicon)) lexicon[strlen(lexicon) - 1] = '\0'; if (!strcmp( lexicon, "q" )) done = TRUE; else { printf( "Enter the name of a RULES FILE.\n\n" ); printf( "For example, enter french to specify french.sc.\n" ); printf( "The output words would be stored in french.out.\n-->" ); fgets( rules, 65, stdin ); if (strlen(rules)) rules[strlen(rules) - 1] = '\0'; if (ReadRules( rules )) DoWords( lexicon, rules ); } } } printf( "\nThank you for using the SOUND CHANGE APPLIER!\n" ); } /*main*/