524 lines
9.6 KiB
C
524 lines
9.6 KiB
C
|
/*
|
||
|
** SOUNDS.C
|
||
|
**
|
||
|
** Sound Change Applier
|
||
|
**
|
||
|
** Copyright (C) 2000 by Mark Rosenfelder.
|
||
|
** This program may be freely used and modified for non-commercial purposes.
|
||
|
** See http://www.zompist.com/sounds.htm for documentation.
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <ctype.h>
|
||
|
#include <string.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <math.h>
|
||
|
|
||
|
#define TRUE 1
|
||
|
#define FALSE 0
|
||
|
|
||
|
static int printRules = 0;
|
||
|
static int bracketOut = 0;
|
||
|
static int printSourc = 1;
|
||
|
static int toScreen = 1;
|
||
|
|
||
|
#define MAXRULE 200
|
||
|
#define MAXCAT 50
|
||
|
|
||
|
|
||
|
static int nRule = 0;
|
||
|
static char *Rule[MAXRULE];
|
||
|
|
||
|
static int nCat = 0;
|
||
|
static char *Cat[MAXCAT];
|
||
|
|
||
|
/*
|
||
|
** ReadRules
|
||
|
**
|
||
|
** Read in the rules file *.sc for a given project.
|
||
|
**
|
||
|
** There are two types of rules: sound changes and category definitions.
|
||
|
** The former are stored in Rule[], the latter in Cat[].
|
||
|
**
|
||
|
** The format of these rules is given under Transform().
|
||
|
*/
|
||
|
int ReadRules( char *filestart )
|
||
|
{
|
||
|
char filename[84];
|
||
|
char buffer[129];
|
||
|
char *s;
|
||
|
int n;
|
||
|
FILE *f;
|
||
|
|
||
|
nRule = 0;
|
||
|
nCat = 0;
|
||
|
|
||
|
/* Open the file */
|
||
|
|
||
|
sprintf( filename, "%s.sc", filestart );
|
||
|
|
||
|
f = fopen( filename, "r" );
|
||
|
if (!f)
|
||
|
{
|
||
|
printf( "File %s could not be read in.\n\n", filename );
|
||
|
return(FALSE);
|
||
|
}
|
||
|
|
||
|
while (fgets( buffer, 129, f))
|
||
|
{
|
||
|
if (strlen(buffer))
|
||
|
buffer[strlen(buffer)-1] = '\0';
|
||
|
|
||
|
s = malloc( strlen(buffer) + 1);
|
||
|
if (s)
|
||
|
strcpy( s, buffer );
|
||
|
|
||
|
if (buffer[0] != '*')
|
||
|
{
|
||
|
if (strchr( buffer, '/' ))
|
||
|
Rule[nRule++] = s;
|
||
|
else if (strchr( buffer, '='))
|
||
|
Cat[ nCat++] = s;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fclose(f);
|
||
|
|
||
|
if (nCat)
|
||
|
{
|
||
|
printf( "%i categories found\n", nCat );
|
||
|
|
||
|
#ifdef PRINT_RULES
|
||
|
for (n = 0; n < nCat; n++)
|
||
|
printf( "%s\n", Cat[n] );
|
||
|
|
||
|
printf( "\n" );
|
||
|
#endif
|
||
|
}
|
||
|
else
|
||
|
printf( "No rules were found.\n\n" );
|
||
|
|
||
|
if (nRule)
|
||
|
{
|
||
|
printf( "%i rules found\n", nRule );
|
||
|
|
||
|
#ifdef PRINT_RULES
|
||
|
for (n = 0; n < nRule; n++)
|
||
|
printf( "%s\n", Rule[n] );
|
||
|
|
||
|
printf( "\n" );
|
||
|
#endif
|
||
|
}
|
||
|
else
|
||
|
printf( "No rules were found.\n\n" );
|
||
|
|
||
|
return( nRule );
|
||
|
|
||
|
} /*ReadRules*/
|
||
|
|
||
|
|
||
|
/*
|
||
|
** Divide
|
||
|
**
|
||
|
** Divide a rule into source and target phoneme(s) and environment.
|
||
|
** That is, for a rule s1/s2/env
|
||
|
** create the three null-terminated strings s1, s2, and env.
|
||
|
**
|
||
|
** If this cannot be done, return FALSE.
|
||
|
*/
|
||
|
int Divide( char *Rule, char **s1, char **s2, char **env )
|
||
|
{
|
||
|
size_t i;
|
||
|
static char s1_str[20];
|
||
|
static char s2_str[20];
|
||
|
static char ev_str[50];
|
||
|
|
||
|
i = strcspn( Rule, "/" );
|
||
|
if (i == 0 || i > 19)
|
||
|
return(FALSE);
|
||
|
|
||
|
strncpy( s1_str, Rule, i );
|
||
|
s1_str[i] = '\0';
|
||
|
Rule += i + 1;
|
||
|
|
||
|
i = strcspn( Rule, "/" );
|
||
|
if (i > 19)
|
||
|
return(FALSE);
|
||
|
|
||
|
if (i)
|
||
|
strncpy( s2_str, Rule, i );
|
||
|
s2_str[i] = '\0';
|
||
|
Rule += i + 1;
|
||
|
|
||
|
strcpy( ev_str, Rule );
|
||
|
|
||
|
*s1 = s1_str;
|
||
|
*s2 = s2_str;
|
||
|
*env = ev_str;
|
||
|
|
||
|
return(TRUE);
|
||
|
|
||
|
} /*Divide*/
|
||
|
|
||
|
|
||
|
/*
|
||
|
** TryCat
|
||
|
**
|
||
|
** See if a particular phoneme sequence is part of any category.
|
||
|
** (We try all the categories.)
|
||
|
**
|
||
|
** For instance, if we have 'a' in the source word and 'V' in the
|
||
|
** structural description, and a category V=aeiou, TryCat returns TRUE,
|
||
|
** and sets *n to the number of characters to skip.
|
||
|
**
|
||
|
** If we had 'b' instead, TryCat would return FALSE instead.
|
||
|
**
|
||
|
** If no category with the given identification (env) can be found,
|
||
|
** we return TRUE (continue looking), but set *n to 0.
|
||
|
**
|
||
|
** Warning: For now, we don't have a way to handle digraphs.
|
||
|
**
|
||
|
** We also return TRUE if
|
||
|
*/
|
||
|
int TryCat( char *env, char *word, int *n, int *catLoc )
|
||
|
{
|
||
|
int c;
|
||
|
char *catdef;
|
||
|
|
||
|
if (*word == '\0')
|
||
|
return(FALSE);
|
||
|
|
||
|
for (c = 0; c < nCat; c++)
|
||
|
{
|
||
|
if (*env == *Cat[c])
|
||
|
{
|
||
|
catdef = strchr( Cat[c], '=' );
|
||
|
|
||
|
if (strchr( catdef + 1, word[0] ))
|
||
|
{
|
||
|
*n = 1;
|
||
|
*catLoc = strchr( Cat[c], word[0] ) - Cat[c];
|
||
|
return(TRUE);
|
||
|
}
|
||
|
else
|
||
|
return(FALSE);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*n = 0;
|
||
|
return(TRUE);
|
||
|
|
||
|
} /*TryCat*/
|
||
|
|
||
|
/*
|
||
|
** TryRule
|
||
|
**
|
||
|
** See if a rule s1->s2/env applies at position i in the given word.
|
||
|
**
|
||
|
** If it does, we pass back the index where s1 was found in the
|
||
|
** word, as well as s1 and s2, and return TRUE.
|
||
|
**
|
||
|
** Otherwise, we return FALSE, and pass garbage in the output variables.
|
||
|
*/
|
||
|
int TryRule( char *word, int i, char *Rule, int *n, char **s1, char **s2, char *varRep )
|
||
|
{
|
||
|
int j, m, cont = 0;
|
||
|
int catLoc;
|
||
|
char *env;
|
||
|
int optional = FALSE;
|
||
|
*varRep = '\0';
|
||
|
|
||
|
if (!Divide( Rule, s1, s2, &env ) || !strchr( env, '_' ))
|
||
|
return(FALSE);
|
||
|
|
||
|
for (j = 0, cont = TRUE; cont && j < strlen(env); j++)
|
||
|
{
|
||
|
switch( env[j] )
|
||
|
{
|
||
|
case '(':
|
||
|
optional = TRUE;
|
||
|
break;
|
||
|
|
||
|
case ')':
|
||
|
optional = FALSE;
|
||
|
break;
|
||
|
|
||
|
case '#':
|
||
|
cont = j ? (i == strlen(word)) : (i == 0);
|
||
|
break;
|
||
|
|
||
|
case '_':
|
||
|
cont = !strncmp( &word[i], *s1, strlen(*s1) );
|
||
|
if (cont)
|
||
|
{
|
||
|
*n = i;
|
||
|
i += strlen(*s1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cont = TryCat( *s1, &word[i], &m, &catLoc );
|
||
|
if (cont && m)
|
||
|
{
|
||
|
int c;
|
||
|
*n = i;
|
||
|
i += m;
|
||
|
|
||
|
for (c = 0; c < nCat; c++)
|
||
|
if ((*s2)[0] == Cat[c][0] && catLoc < strlen(Cat[c]))
|
||
|
*varRep = Cat[c][catLoc];
|
||
|
}
|
||
|
else if (cont)
|
||
|
cont = FALSE;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
cont = TryCat( &env[j], &word[i], &m, &catLoc );
|
||
|
|
||
|
if (cont && !m)
|
||
|
{
|
||
|
/* no category applied */
|
||
|
|
||
|
cont = i < strlen(word) && word[i] == env[j];
|
||
|
|
||
|
m = 1;
|
||
|
}
|
||
|
if (cont)
|
||
|
i += m;
|
||
|
|
||
|
if (!cont && optional)
|
||
|
cont = TRUE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (cont && printRules)
|
||
|
printf( " %s->%s /%s applies to %s at %i\n",
|
||
|
*s1, *s2, env, word, *n );
|
||
|
|
||
|
return(cont);
|
||
|
|
||
|
} /*TryRule*/
|
||
|
|
||
|
/*
|
||
|
** Transform
|
||
|
**
|
||
|
** Apply the rules to a single word and return the result.
|
||
|
**
|
||
|
** The rules are stated in the form string1/string2/environment, e.g.
|
||
|
** f/h/#_V
|
||
|
** which states that f changes to h at the beginning of a word before a
|
||
|
** vowel.
|
||
|
*/
|
||
|
char *Transform( char *input )
|
||
|
{
|
||
|
char inword[80];
|
||
|
static char outword[80];
|
||
|
|
||
|
char instr[10];
|
||
|
char *s1, *s2;
|
||
|
int i;
|
||
|
int r;
|
||
|
int n;
|
||
|
|
||
|
strcpy( inword, input );
|
||
|
|
||
|
/* Try to apply each rule in turn */
|
||
|
|
||
|
for (r = 0; r < nRule; r++)
|
||
|
{
|
||
|
/* Initialize output of this rule to null */
|
||
|
|
||
|
memset( outword, 0, 80 );
|
||
|
|
||
|
/* Check each position of the input word in turn */
|
||
|
|
||
|
i = 0;
|
||
|
while (i < strlen(inword))
|
||
|
{
|
||
|
char varRep = 0;
|
||
|
|
||
|
if (TryRule( inword, i, Rule[r], &n, &s1, &s2, &varRep ))
|
||
|
{
|
||
|
/* Rule applies at inword[n] */
|
||
|
|
||
|
if (n)
|
||
|
strncat( outword, &inword[i], n - i );
|
||
|
|
||
|
if (varRep)
|
||
|
outword[strlen(outword)] = varRep;
|
||
|
else if (strlen(s2))
|
||
|
strcat( outword, s2 );
|
||
|
|
||
|
i = n + strlen(s1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* Rule doesn't apply at this location */
|
||
|
|
||
|
outword[strlen(outword)] = inword[i++];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Output of one rule is input to next one */
|
||
|
|
||
|
strcpy( inword, outword );
|
||
|
}
|
||
|
|
||
|
/* Return the output of the last rule */
|
||
|
|
||
|
return(outword);
|
||
|
|
||
|
} /*Transform*/
|
||
|
|
||
|
/*
|
||
|
** DoWords
|
||
|
**
|
||
|
** Read in each word in turn from the input file,
|
||
|
** transform it according to the rules,
|
||
|
** and output it to the output file.
|
||
|
**
|
||
|
** This algorithm ensures that word files of any size can be processed.
|
||
|
*/
|
||
|
void DoWords( char *lexname, char *outname )
|
||
|
{
|
||
|
char filename[84];
|
||
|
char inword[84];
|
||
|
int n = 0;
|
||
|
FILE *f, *g;
|
||
|
char *outword;
|
||
|
|
||
|
sprintf( filename, "%s.lex", lexname );
|
||
|
|
||
|
f = fopen( filename, "r" );
|
||
|
if (!f)
|
||
|
{
|
||
|
printf( "File %s could not be read in.\n\n", filename );
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
sprintf( filename, "%s.out", outname );
|
||
|
|
||
|
g = fopen( filename, "w" );
|
||
|
if (!g)
|
||
|
{
|
||
|
printf( "File %s could not be created.\n\n", filename );
|
||
|
fclose(f);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
while (fgets( inword, 129, f))
|
||
|
{
|
||
|
n++;
|
||
|
if (strlen(inword))
|
||
|
inword[strlen(inword) - 1] = '\0';
|
||
|
|
||
|
outword = Transform(inword);
|
||
|
|
||
|
if (!printSourc)
|
||
|
{
|
||
|
if (toScreen)
|
||
|
printf( "%s\n", outword );
|
||
|
fprintf( g, "%s\n", outword );
|
||
|
}
|
||
|
else if (bracketOut)
|
||
|
{
|
||
|
if (toScreen)
|
||
|
printf( "%s \t[%s]\n", outword, inword );
|
||
|
fprintf( g, "%s \t[%s]\n", outword, inword );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (toScreen)
|
||
|
printf( "%s --> %s\n", inword, outword );
|
||
|
fprintf( g, "%s --> %s\n", inword, outword );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fclose(f);
|
||
|
fclose(g);
|
||
|
|
||
|
printf( "%i word%s processed.\n", n, n == 1 ? "" : "s" );
|
||
|
|
||
|
} /*DoWords*/
|
||
|
|
||
|
/*
|
||
|
** MAIN ROUTINE
|
||
|
**
|
||
|
** Ask for name of project
|
||
|
** Read in rules and input words
|
||
|
** Apply transformations
|
||
|
** Output words
|
||
|
**
|
||
|
*/
|
||
|
main( int argc, char **argv )
|
||
|
{
|
||
|
int once = FALSE;
|
||
|
char lexicon[65] = "\0";
|
||
|
char rules[65] = "\0";
|
||
|
|
||
|
/* Read command line arguments */
|
||
|
int i;
|
||
|
for (i = 1; i < argc; i++)
|
||
|
{
|
||
|
if (argv[i][0] == '-' && strlen(argv[i]) > 1)
|
||
|
{
|
||
|
switch (argv[i][1])
|
||
|
{
|
||
|
case 'p': case 'P': printRules = 1; break;
|
||
|
case 'b': case 'B': bracketOut = 1; break;
|
||
|
case 'l': case 'L': printSourc = 0; break;
|
||
|
case 'f': case 'F': toScreen = 0; break;
|
||
|
}
|
||
|
}
|
||
|
else if (!lexicon[0])
|
||
|
strcpy( lexicon, argv[i] );
|
||
|
else
|
||
|
strcpy( rules, argv[i] );
|
||
|
}
|
||
|
|
||
|
once = lexicon[0] && rules[0];
|
||
|
|
||
|
printf( "\nSOUND CHANGE APPLIER\n(C) 1992,2000 by Mark Rosenfelder\nFor more information see www.zompist.com\n\n" );
|
||
|
|
||
|
if (once)
|
||
|
{
|
||
|
printf( "Applying %s.sc to %s.lex\n\n", lexicon, rules );
|
||
|
|
||
|
if (ReadRules( rules ))
|
||
|
DoWords( lexicon, rules );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
int done = FALSE;
|
||
|
while (!done)
|
||
|
{
|
||
|
printf( "\nEnter the name of a LEXICON.\n\n" );
|
||
|
printf( "For example, enter latin to specify latin.lex.\nEnter q to quit the program.\n-->" );
|
||
|
|
||
|
fgets( lexicon, 65, stdin );
|
||
|
|
||
|
if (strlen(lexicon))
|
||
|
lexicon[strlen(lexicon) - 1] = '\0';
|
||
|
|
||
|
if (!strcmp( lexicon, "q" ))
|
||
|
done = TRUE;
|
||
|
else
|
||
|
{
|
||
|
printf( "Enter the name of a RULES FILE.\n\n" );
|
||
|
printf( "For example, enter french to specify french.sc.\n" );
|
||
|
printf( "The output words would be stored in french.out.\n-->" );
|
||
|
|
||
|
fgets( rules, 65, stdin );
|
||
|
|
||
|
if (strlen(rules))
|
||
|
rules[strlen(rules) - 1] = '\0';
|
||
|
|
||
|
if (ReadRules( rules ))
|
||
|
DoWords( lexicon, rules );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
printf( "\nThank you for using the SOUND CHANGE APPLIER!\n" );
|
||
|
|
||
|
} /*main*/
|