/* classifyWord classifies words from a list of words

   This utility includes a simple pattern matching like
   regular expressions, to find words for a specified class.
   Follow metacharacters are used for the pattern matching:
   *     matches zero or more characters
   .     matches exactly one character
   [...] specifies a character class
         matches any character in the class
   ^     matches the beginning of the line
   $     matches the end of the line

   In addition, the standard C escape sequences are understood:
   \a, \b, \f, \t, \v

   copyright (c) by rae, 2000
*/

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <ctype.h>
#include <string.h>

#define COMMAND_LENGTH         1024
#define SIZE_OF_GROUP            80
#define SIZE_OF_BEGIN            80
#define SIZE_OF_VARIABLE_PART    80
#define SIZE_OF_ENDING           80
#define SIZE_OF_BASERULE         80
#define SIZE_OF_BASEWORD         80
#define SIZE_OF_ENDINGS        1024
#define SIZE_OF_PREFIXES       1024
#define SIZE_OF_SEARCH_PATTERN   80
#define MAX_GROUPS              256

        /* Codes for pattern meta characters. */
#define STAR        1
#define QM          2
#define BEGCLASS    3
#define ENDCLASS    4
#define ANCHOR      5
#define ENDOFLINE   6

        /* Other codes and definitions. */
#define EOS '\0'

typedef struct _WordList
{
  struct _WordList *next;
  struct _WordList *previous;
  struct _WordList *neighbour;
         void      *content;
} WordList;

/*
 * the list of classified words looks like
 *
 *   1. word - 1. neighbour - 2. neighbour
 *      |          |
 *      |      alternate
 *      |
 *   2. word - ...
 *      |
 *      .
 *      .
 *      .
 *      |
 *   n. word - ...
 *
 */

char  *parseCommandLine(char *commandLine, char **command, char **argument1, char **argument2);
long  exactFindWord(char **list, char *word, long numberOfWords, long startPosition);
char  *setValue(char *variable, char *value, size_t size, char *name, char *comment);
char  *addValue(char *variable, char *value, size_t size, char *name, char *comment);
short extract(char **mainList, long numberOfWords, char **listOfGroups, char **wordList, char *group, char *begin, char *variablePart, char *ending, char *baseRule, char *prefixes, char *endings, bool allEndings, bool withBasewordOnly, long kind);
long  findGroupNumber(char **listOfGroups, char *group);
long  patternFindWord(char **list, char *pattern, long numberOfWords, long startPosition);
void  prepareSearchString (char *pattern, char *buffer);
bool  patternMatch (char *pattern, char *string);
char  *transformWord(char *word, char *rule);
void  appendListToGroup(char **mainList, long numberOfWords, char **listOfGroups, long groupNumber, char **wordList, char *baseWord, char *endings, bool allEndings, bool withBasewordOnly, long kindOfRule);
char  *appendListOfWords(char **mainList, long numberOfWords, char *baseWord, char *endings, bool allEndings, bool withBasewordOnly, long kind);

int main(int argc, char *argv[])
{
    char     **mainList,
             **wordList,
             *listOfGroups[256];
    char     *nameOfControlfile     = NULL,
             *nameOfSource      = NULL,
             *nameOfDestination       = NULL,
             *word            = (char *) malloc(1025),
             *commandLine     = (char *) malloc(COMMAND_LENGTH + 1),
             *group           = (char *) malloc(SIZE_OF_GROUP + 1),
             *begin           = (char *) malloc(SIZE_OF_BEGIN + 1),
             *variablePart    = (char *) malloc(SIZE_OF_VARIABLE_PART + 1),
             *ending          = (char *) malloc(SIZE_OF_ENDING + 1),
             *baseRule        = (char *) malloc(SIZE_OF_BASERULE + 1),
             *baseWord        = (char *) malloc(SIZE_OF_BASEWORD + 1),
             *endings         = (char *) malloc(SIZE_OF_ENDINGS + 1),
             *prefixes        = (char *) malloc(SIZE_OF_PREFIXES + 1),
             *command,
             *argument1,
             *argument2,
             *buffer,
             *tmpPtr;
    FILE     *fileOfSource       = stdin,
             *fileOfDestination        = stdout,
             *fileOfControl      = 0;
    int      result           = 0;
    bool     allEndings,
             withBasewordOnly,
             stop             = false,
             activeWord       = false;
    long     position,
             counter,
             kindOfRule,
             i,
             j,
             numberOfWords = 0;
    size_t   length,
             readBytes;
    

    if (argc < 2 || 5 < argc)
    {
        printf("Utility to classify of single words based on a list of words\n");
        printf("Usage    : %s <nameOfControlfile> [<nameOfSource> [<nameOfDestination>]]\n", argv[0]);
        printf("Parameter: <nameOfControlfile> File with rules to classificate the words\n");
        printf("           <nameOfSource>      file with the list of words. if no name given\n");
        printf("                               the input will expect from 'stdin'\n");
        printf("           <nameOfDestination> file with the classified words. If no name is given,\n");
        printf("                               the words will be written to 'stdout'\n");
        result = 1;
    }
    else
    {
        if (word == 0)
        {
            fprintf(stderr, "%s: not enoug memory available\n", argv[0]);
            result = 1;
        }
        else
        {
            nameOfControlfile = argv[1];
            if (argc != 2)
                nameOfSource = argv[2];
            if (argc == 4)
                nameOfDestination = argv[3];

            fileOfControl = fopen(nameOfControlfile, "r");
            if (nameOfSource != NULL)
            {
              fileOfSource = fopen(nameOfSource, "rb");
            }
            if (fileOfSource == 0)
            {
              fprintf(stderr, "%s: Can't open %s to read => error %d\n", argv[0], nameOfSource, errno);
              result = 1;
            }
            else
            {
              fseek(fileOfSource, 0, SEEK_END);
              length = ftell(fileOfSource);
              fseek(fileOfSource, 0, SEEK_SET);
              printf("size of file '%s': %d bytes\n", nameOfSource, length);
              buffer = (char *) malloc(length + 2);
//              printf("address of buffer: %lx bytes\n", buffer);

/** at this time the function 'fread()' reads in normal mode less
 *  characters then the length, because the length includes '\r\n',
 *  which will be converted to '\n'. Opening in binary mode reads all,
 *  but on a DOS-System, the character '\r' must be convertet explizit.
 *  So the check is not active yet. Maybe, I must detect the file length
 *  in an other way
 */
    
              if ((readBytes = fread(buffer, 1, length, fileOfSource)) != length)
              {
                fprintf(stderr, "%s: Can't read the complete file '%s' --- could read only %d bytes\n", argv[0], nameOfSource, readBytes);
                result = 1;
              }
              else
              {
/**/              fread(buffer, 1, length, fileOfSource);
                tmpPtr = buffer;
                activeWord = false;
                for (i = 0; i < (long) length; i++)
                {
                  if (*tmpPtr == '\n' || *tmpPtr == '\r')
                  {
                    activeWord = false;
                  }
                  else
                  {
                    if (activeWord == false)
                    {
                      activeWord = true;
                      numberOfWords++;
                    }
                  }
                  tmpPtr++;
                }
                printf("number of lines in file '%s': %ld\n", nameOfSource, numberOfWords);
                if ((mainList = (char **) malloc(numberOfWords * sizeof(*mainList))) == (char **) NULL)
                {
                  fprintf(stderr, "%s: Not enough memory for main wordlist available\n", argv[0]);
                  result = 1;
                }
                else
                {
                  if ((wordList = (char **) malloc(numberOfWords * sizeof(*wordList))) == (char **) NULL)
                  {
                    fprintf(stderr, "%s: Not enough memory for wordlist available\n", argv[0]);
                    result = 1;
                  }
                  else
                  {
                    tmpPtr = buffer;
                    activeWord = false;
                    i = 0;

                    while (i < numberOfWords)
                    {
                      if (*tmpPtr == '\n' || *tmpPtr == '\r')
                      {
                        activeWord = false;
                        *tmpPtr    = '\0';
                      }
                      else
                      {
                        if (activeWord == false)
                        {
                          activeWord = true;
                          wordList[i] = (char *) NULL;
                          mainList[i] = tmpPtr;
                          i++;
                        }
                      }
                      tmpPtr++;
                    }
                    if ((fileOfControl = fopen(nameOfControlfile, "r")) == NULL)
                    {
                      fprintf(stderr, "%s: Can't open %s to read => error %d\n", argv[0], nameOfControlfile, errno);
                      result = 1;
                    }
                    else
                    {
                      for (i = 0; i < 256; i++)
                        listOfGroups[i] = (char *) NULL;
                        
                      while (!feof(fileOfControl) && !stop)
                      {
                        fgets(commandLine, COMMAND_LENGTH, fileOfControl);
                        parseCommandLine(commandLine, &command, &argument1, &argument2);
                        if (command != NULL)
                        {
                          if (strcmp(command, "delete") == 0)
                          {
                            if (argument1 == NULL)
                            {
                              printf("%s: invalid argument of command '%s'\n", nameOfControlfile, command);
                            }
                            else
                            {
                              if ((position = exactFindWord(mainList, argument1, numberOfWords, 0)) == -1)
                              {
//                                printf("%s: word '%s' not available in the main list\n", nameOfControlfile, argument1);
                              }
                              else
                              {
                                mainList[position] = NULL;
                                printf("%s: word '%s' deleted on position %ld\n", nameOfControlfile, argument1, position);
                              }
                            }
                          }
                          else if (strcmp(command, "set") == 0)
                          {
                            if (argument1 == NULL || argument2 == NULL)
                            {
                              printf("%s: invalid argument of command '%s'\n", nameOfControlfile, command);
                            }
                            else
                            {
                              if (strcmp(argument1, "counter") == 0)
                              {
                                counter = atoi(argument2);
                                printf("'%s' set to '%ld'\n", argument1, counter);
                              }
                              if (strcmp(argument1, "kindOfRule") == 0)
                              {
                                kindOfRule = atoi(argument2);
                                printf("'%s' set to '%ld'\n", argument1, kindOfRule);
                              }
                              else if (strcmp(argument1, "group") == 0)
                              {
                                setValue(group, argument2, SIZE_OF_GROUP, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "begin") == 0)
                              {
                                setValue(begin, argument2, SIZE_OF_BEGIN, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "variablePart") == 0)
                              {
                                setValue(variablePart, argument2, SIZE_OF_VARIABLE_PART, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "ending") == 0)
                              {
                                setValue(ending, argument2, SIZE_OF_ENDING, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "baseRule") == 0)
                              {
                                setValue(baseRule, argument2, SIZE_OF_BASERULE, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "baseWord") == 0)
                              {
                                setValue(baseWord, argument2, SIZE_OF_BASEWORD, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "endings") == 0)
                              {
                                setValue(endings, argument2, SIZE_OF_ENDINGS, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "prefixes") == 0)
                              {
                                setValue(prefixes, argument2, SIZE_OF_PREFIXES, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "allEndings") == 0)
                              {
                                allEndings = (strcmp(argument2, "true") == 0);
                                printf("'%s' set to '%d'\n", argument1, allEndings);
                              }
                              else if (strcmp(argument1, "withBasewordOnly") == 0)
                              {
                                withBasewordOnly = (strcmp(argument2, "true") == 0);
                                printf("'%s' set to '%d'\n", argument1, withBasewordOnly);
                              }
                              else
                              {
                                printf("%s: unknown variable %s\n", nameOfControlfile, argument1);
                              }
                            }
                          }
                          else if (strcmp(command, "add") == 0)
                          {
                            if (argument1 == NULL || argument2 == NULL)
                            {
                              printf("%s: invalid argument of command '%s'\n", nameOfControlfile, command);
                            }
                            else
                            {
                              if (strcmp(argument1, "kindOfRule") == 0)
                              {
                                kindOfRule += atoi(argument2);
                                printf("'%s' set to '%ld'\n", argument1, kindOfRule);
                              }
                              if (strcmp(argument1, "counter") == 0)
                              {
                                counter += atoi(argument2);
                                printf("'%s' set to '%ld'\n", argument1, counter);
                              }
                              else if (strcmp(argument1, "group") == 0)
                              {
                                addValue(group, argument2, SIZE_OF_GROUP, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "begin") == 0)
                              {
                                addValue(begin, argument2, SIZE_OF_BEGIN, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "variablePart") == 0)
                              {
                                addValue(variablePart, argument2, SIZE_OF_VARIABLE_PART, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "ending") == 0)
                              {
                                addValue(ending, argument2, SIZE_OF_ENDING, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "baseRule") == 0)
                              {
                                addValue(baseRule, argument2, SIZE_OF_BASERULE, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "baseWord") == 0)
                              {
                                addValue(baseWord, argument2, SIZE_OF_BASEWORD, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "endings") == 0)
                              {
                                addValue(endings, argument2, SIZE_OF_ENDINGS, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "prefixes") == 0)
                              {
                                addValue(prefixes, argument2, SIZE_OF_PREFIXES, argument1, nameOfControlfile);
                              }
                              else if (strcmp(argument1, "allEndings") == 0)
                              {
                                printf("%s: invalid command '%s' for variable '%s'\n", nameOfControlfile, command, argument1);
                              }
                              else if (strcmp(argument1, "withBasewordOnly") == 0)
                              {
                                printf("%s: invalid command '%s' for variable '%s'\n", nameOfControlfile, command, argument1);
                              }
                              else
                              {
                                printf("%s: unknown variable %s\n", nameOfControlfile, argument1);
                              }
                            }
                          }
                          else if (strcmp(command, "extract") == 0)
                          {
                            extract(mainList, numberOfWords, listOfGroups, wordList, group, begin, variablePart, ending, baseRule, prefixes, endings, allEndings, withBasewordOnly, kindOfRule);
                            j = 0;
                            for (i = 0; i < numberOfWords; i++)
                            {
                              if (mainList[i] != (char *) NULL)
                              {
                                mainList[j++] = mainList[i];
                              }
                            }
                            printf("list compressed from %ld to %ld words\n", numberOfWords, j);
                            numberOfWords = j;
                            
//                            stop = true;
                          }
                          else if (strcmp(command, "append") == 0)
                          {
                            long groupNumber;
                            
                            if ((groupNumber = findGroupNumber(listOfGroups, group)) != -1)
                              appendListToGroup(mainList, numberOfWords, listOfGroups, groupNumber, wordList, baseWord, endings, allEndings, withBasewordOnly, kindOfRule);
                          }
                          else if (strcmp(command, "compress") == 0)
                          {
                            j = 0;
                            for (i = 0; i < numberOfWords; i++)
                            {
                              if (mainList[i] != (char *) NULL)
                              {
                                mainList[j++] = mainList[i];
                              }
                            }
                            printf("list compressed from %ld to %ld words\n", numberOfWords, j);
                            numberOfWords = j;
                          }
                          else if (strcmp(command, "message") == 0)
                          {
                            if (argument1 == NULL)
                            {
                              printf("%s: invalid argument of command '%s'\n", nameOfControlfile, command);
                            }
                            else
                            {
                              printf("*** %s ***\n", argument1);
                            }
                          }
                          else
                            printf("%s: unknown command '%s'\n", nameOfControlfile, command);
                        }
                      }
                    }
                  }
                  printf("store list of words in file '%s'\n", nameOfDestination);
                  if (nameOfDestination != NULL)
                  {
                      fileOfDestination = fopen(nameOfDestination, "w");
                  }
                
                  if (fileOfDestination == 0)
                  {
                      fprintf(stderr, "%s: Can't open %s to write => error %d\n", argv[0], nameOfDestination, errno);
                    result = 1;
                }
                else
                {
                  for (i = 0; i < numberOfWords; i++)
                  {
                    if (mainList[i] != NULL)
                    {
                      fputs(mainList[i], fileOfDestination);
                      fputs("\n", fileOfDestination);
                    }
                  }
                  printf("words saved\n");
                  i = 0;
                  while (listOfGroups[i] != (char *) NULL)
                  {
                    printf("save group '%s'\n", listOfGroups[i]);
                    fputs("\n[", fileOfDestination);
                    fputs(listOfGroups[i], fileOfDestination);
                    fputs("]\n", fileOfDestination);
                    for (j = 0; j < numberOfWords; j++)
                    {
                      if (wordList[j] != (char *) NULL)
                      {
                        if (*wordList[j] == i)
                        {
                          fputs(wordList[j] + 1, fileOfDestination);
                          fputs("\n", fileOfDestination);
                        }
                      }
                    }
                    i++;
                  }
                  if (nameOfDestination != NULL)
                  {
                    fclose(fileOfDestination);
                  }
                }
/**/              }
              if (nameOfSource != NULL)
              {
                fclose(fileOfSource);
              }
              printf("freeing wordList\n");
              if (wordList != (char **) NULL)
              {
                for (i = numberOfWords - 1; 0 <= i; i--)
                  if (wordList[i] != (char *) NULL)
                    free(wordList[i]);
                free(wordList);
              }
              printf("freeing mainList\n");
              if (mainList != (char **) NULL)
                free(mainList);
              printf("freeing buffer\n");
              if (buffer != (char *) NULL)
                free(buffer);
            }
            }
        }
    }
    printf("freeing prefixes\n");
    if (prefixes != (char *) NULL)
      free(prefixes);
    printf("freeing endings\n");
    if (endings != (char *) NULL)
      free(endings);
    printf("freeing baseRule\n");
    if (baseRule != (char *) NULL)
      free(baseRule);
    printf("freeing ending\n");
    if (ending != (char *) NULL)
      free(ending);
    printf("freeing variablePart\n");
    if (variablePart != (char *) NULL)
      free(variablePart);
    printf("freeing begin\n");
    if (begin  != (char *) NULL)
      free(begin);
    printf("freeing group\n");
    if (group  != (char *) NULL)
      free(group);
    printf("freeing commandLine\n");
    if (commandLine != (char *) NULL)
      free(commandLine);
    printf("freeing word\n");
    if (word != (char *) NULL)
      free(word);

    exit (result);
}

char *parseCommandLine(char *commandLine, char **command, char **argument1, char **argument2)
{
  bool comment      = false,
       endOfCommand = false,
       longValue    = false;
  
  *command   = (char *) NULL;
  *argument1 = (char *) NULL;
  *argument2 = (char *) NULL;

  while (!isalpha(*commandLine) && !comment && !endOfCommand)
  {
    if (*commandLine == ';')
      comment = true;
    else if (*commandLine == '\0')
      endOfCommand = true;
    else
      commandLine++;
  }
  if (!comment && !endOfCommand)
  {
    *command = commandLine;
    while (isalpha(*commandLine))
      commandLine++;
    if (*commandLine != '\0')
      *commandLine++ = '\0';
    else
      endOfCommand = true;
  }
  while (!isalpha(*commandLine) &&
         *commandLine != '"'    &&
         !comment               &&
         !endOfCommand             )
  {
    if (*commandLine == ';')
      comment = true;
    else if (*commandLine == '\0')
      endOfCommand = true;
    else
      commandLine++;
  }
  if (!comment && !endOfCommand)
  {
    longValue = (*commandLine == '"');
    if (longValue)
    {
      *argument1 = ++commandLine;
      while (*commandLine != '"' && *commandLine != '\0')
        commandLine++;
      if (*commandLine != '\0')
        *commandLine++ = '\0';
      else
        endOfCommand = true;
    }
    else
    {
      *argument1 = commandLine;
      while (isalpha(*commandLine))
        commandLine++;
      if (*commandLine != '\0')
        *commandLine++ = '\0';
      else
        endOfCommand = true;
    }
  }
  while (!isalnum(*commandLine) &&
         *commandLine != '"'    &&
         !comment               &&
         !endOfCommand             )
  {
    if (*commandLine == ';')
      comment = true;
    else if (*commandLine == '\0')
      endOfCommand = true;
    else
      commandLine++;
  }
  if (!comment && !endOfCommand)
  {
    longValue = (*commandLine == '"');
    if (longValue)
    {
      *argument2 = ++commandLine;
      while (*commandLine != '"' && *commandLine != '\0')
        commandLine++;
      if (*commandLine != '\0')
        *commandLine++ = '\0';
      else
        endOfCommand = true;
    }
    else
    {
      *argument2 = commandLine;
      while (isalnum(*commandLine))
        commandLine++;
      if (*commandLine != '\0')
        *commandLine++ = '\0';
      else
        endOfCommand = true;
    }
  }
  return(commandLine);
}

long exactFindWord(char **list, char *word, long numberOfWords, long startPosition)
{
  long result = -1,
       i;

  for (i = startPosition; i < numberOfWords && result == -1; i++)
  {
    if (list[i] != NULL)
    {
      if (strcmp(list[i], word) == 0)
        result = i;
    }
  }

  return(result);
}

char *setValue(char *variable, char *value, size_t size, char *name, char *comment)
{
  if (size < strlen(value))
  {
    printf("%s: value '%s' of %s too long; max size: %d\n", comment, value, name, size);
  }
  else
  {
    strcpy(variable, value);
    printf("'%s' set to '%s'\n", name, variable);
  }
        
  return(variable);
}

char *addValue(char *variable, char *value, size_t size, char *name, char *comment)
{
  if (size < strlen(value) + strlen(variable))
  {
    printf("%s: value '%s' of %s too long; max size: %d\n", comment, value, name, size);
  }
  else
  {
    strcat(variable, value);
    printf("'%s' set to '%s'\n", name, variable);
  }
        
  return(variable);
}

short extract(char **mainList, long numberOfWords, char **listOfGroups, char **wordList, char *group, char *begin, char *variablePart, char *ending, char *baseRule, char *prefixes, char *endings, bool allEndings, bool withBasewordOnly, long kindOfRule)
{
  short result,
        groupNumber = -1;
  long  position  = -1;
  char  *searchPattern = (char *) malloc(SIZE_OF_SEARCH_PATTERN + 1),
        *baseWord,
        *tmpPrefixes,
        *endOfPrefix,
        *extendedBaseWord;

  strcpy(searchPattern, "^");
  strcat(searchPattern, begin);
  strcat(searchPattern, "*");
  strcat(searchPattern, variablePart);
  strcat(searchPattern, ending);
  strcat(searchPattern, "$");
  printf("classify words for '%s' with '%s' and create base word with '%s' and deliverated words with '%s'\n", group, searchPattern, baseRule, endings);

  if ((groupNumber = findGroupNumber(listOfGroups, group)) != -1)
  {
    while ((position = patternFindWord(mainList, searchPattern, numberOfWords, position + 1)) != -1)
    {
      baseWord = transformWord(mainList[position], baseRule);
      tmpPrefixes = prefixes;
      while ((endOfPrefix = index(tmpPrefixes, ',')) != (char *) NULL)
      {
        *endOfPrefix = '\0';
        while (*tmpPrefixes != '\0' && isspace(*tmpPrefixes))
          tmpPrefixes++;
        extendedBaseWord = transformWord(baseWord, tmpPrefixes);
        appendListToGroup(mainList, numberOfWords, listOfGroups, groupNumber, wordList, extendedBaseWord, endings, allEndings, withBasewordOnly, kindOfRule);
        *endOfPrefix++ = ',';   // restore delimiter
        tmpPrefixes = endOfPrefix; // and set 'prefixes' to the next one 
      }
    }
  }
  
  free(searchPattern);
  return(result);
}

long findGroupNumber(char **listOfGroups, char *group)
{
  long result = -1,
       i;

  for (i = 0; i < MAX_GROUPS && result == -1; i++)
  {
    if (listOfGroups[i] != (char *) NULL)
    {
      if (strcmp(listOfGroups[i], group) == 0)
      {
        result = i;
      }
    }
    else
    {
      if (i != MAX_GROUPS)
      {
        listOfGroups[i] = strdup(group);
        result = i;
        printf("new group '%s' added. The number is %ld\n", group, result);
      }
      else
        fprintf(stderr, "list of groups can only handle %d groups\n", MAX_GROUPS);
    }
  }
  return(result);
}

long patternFindWord(char **list, char *pattern, long numberOfWords, long startPosition)
{
  long result = -1,
       i;
  char *patternBuffer = (char *) malloc(strlen(pattern) + 1);

  prepareSearchString (pattern, patternBuffer);
  for (i = startPosition; i < numberOfWords && result == -1; i++)
  {
    if (list[i] != (char *) NULL)
    {
      if (patternMatch (patternBuffer, list[i]))
        result = i;
    }
  }

  free(patternBuffer);
  return(result);
}

void prepareSearchString (char *pattern, char *buffer)
{
  /* Copy prep'ed search string to buf. */
  register int c;
  register int i = 0;

  if (*pattern == '^')
  {
    buffer [i++] = ANCHOR;
    ++pattern;
  }

  while (*pattern != '\0')
  {
    switch (c=*pattern++)
    {
      case '*' : if (i >= 0 && buffer [i - 1] != STAR )
                   c = STAR;
                 break;
      case '?' : c = QM;
                 break;
      case '[' : c = BEGCLASS;
                 break;
      case ']' : c = ENDCLASS;
                 break;
      case '$' : if (*pattern == '\0')
                   c = ENDOFLINE;
                 break;
      case '\\': switch( c = *pattern++)
                 {
                   case  'a': c = '\a'; break;
                   case  'b': c = '\b'; break;
                   case  'f': c = '\f'; break;
                   case  't': c = '\t'; break;
                   case  'v': c = '\v'; break;
                   case  '.': c = '.' ; break;  
                   case  '$': c = '$' ; break;  
                   case  '"': c = '"' ; break;  
                   case  '*': c = '*' ; break;  
                   case  '[': c = '[' ; break;  
                   case  ']': c = ']' ; break;  
                   case '\\': c = '\\'; break;
                 }
                 break;
    }
    buffer [i++] = c;
  }

  buffer [i] = '\0';
}

bool patternMatch (char *pattern, char *string)
        /* Return 'true' if pattern matches string. */
{
        register char pc, sc;
        char *pat;
        bool anchored;
        bool inverse;
        
        if (anchored = (*pattern == ANCHOR))
                ++pattern;

Top:            /* Once per char in string. */
        pat = pattern;

Again:
        pc = *pat;
        sc = *string;

        if (sc == '\n' || sc == EOS )
        {
            /* At end of line or end of text. */
            if (pc == EOS || pc == ENDOFLINE)
              goto Success;
            else if (pc == STAR)
            {
              /* patternMatch (pat + 1, base, index, end) */
              ++pat;
              goto Again;
            }
            else
              return (false);
        }
        else
        {
          if (pc == ENDOFLINE)
            return (false);
          else if (pc == sc || pc == QM)
          {
            /* patternMatch (pat + 1, string + 1) */
            ++pat;
            ++string;
            goto Again;
          }
          else if (pc == EOS)
            goto Success;
          else if (pc == STAR)
          {
            if (patternMatch (pat + 1, string))
              goto Success;
            else
            { /* patternMatch (pat, string + 1) */
              ++string;
              goto Again;
            }
          }
          else if(pc == BEGCLASS)
          { /* char class */
            bool clmatch = false;
            /*
             * if the first character is a '^', then
             * the characterlist includes all characters,
             * which should not match
             */
            if (*(pat + 1) == '^')
            {
              inverse = true;
              pat++;
              clmatch = true;
            }
            else
            {
              inverse = false;
            }
            while (*++pat != ENDCLASS)
            {
              if (inverse)
              {
                if (clmatch && *pat == sc)
                  clmatch = false;
              }
              else
              {
                if (!clmatch && *pat == sc)
                  clmatch = true;
              }
            }
            if(clmatch)
            {
              ++pat;
              ++string;
              goto Again;
            }
          }
        }

        if (anchored)
          return(false);

        ++string;
        goto Top;

Success:
        return (true);
}

/*
 * generate-baseword
 *
 * Diese Funktion transformiert das word in der Variablen '%Tmpword'
 * in ein word und legt es in '%Tmp' ab. Die Tranformationsregel
 * ist in '%Regel' enthalten.
 *
 * Eine Regel besteht aus den Teilen
 * <Anfang><Zahl><-><Zahl><Ende>
 *
 * <Anfang>: Text, mit dem das entgueltige word beginnt
 * <Zahl>  : Anzahl der Buchstaben, die vom Anfang oder Ende des
 *           Grundwortes geloescht werden, je nach dem, ob sie
 *           vor oder hinter dem '-' steht
 * <->     : Platzhalter fuer den wordrest
 * <Ende>  : Text, mit dem das entfgueltige word endet.
 *
 * Einzelne Teile koennen auch fehlen. Fehlt das Element <->, dann wird
 * das Wort direkt aus der Regel gebildet.
 *
 */
char *transformWord(char *word, char *rule)
{
  char *result      = (char *) malloc(strlen(word) + strlen(rule) + 1),
       *tmpRule     = strdup(rule),
       *tmpPtr,
       *beginOfWord = word;
  long sizeOfWord   = strlen(word);

  *result = '\0';
  if ((tmpPtr = index(tmpRule, '-')) == (char *) NULL)
  {
    strcpy(result, tmpRule);
  }
  else
  {
    if (tmpPtr != tmpRule)
    {
      *tmpPtr = '\0';
      if (isdigit(*(tmpPtr - 1)))
      {
      	beginOfWord += *(tmpPtr - 1) - '0';
      	sizeOfWord  -= *(tmpPtr - 1) - '0';
      	*(tmpPtr - 1) = '\0';
      }
      strcat(result, tmpRule);
    }
    if (isdigit(*(++tmpPtr)))
    {
      sizeOfWord -= *(tmpPtr++) - '0';
    }
    strncat(result, beginOfWord, sizeOfWord);
    strcat(result, tmpPtr);
  }

  return(result);
}

     
void appendListToGroup(char **mainList, long numberOfWords, char **listOfGroups, long groupNumber, char **wordList, char *baseWord, char *endings, bool allEndings, bool withBasewordOnly, long kindOfRule)
{
  char *newEntry;
  long i;
  
  if ((newEntry = appendListOfWords(mainList, numberOfWords, baseWord, endings, allEndings, withBasewordOnly, kindOfRule)) != (char *) NULL)
  {
    *newEntry = groupNumber;
    for (i = 0; i < numberOfWords && wordList[i] != (char *) NULL; i++);
    if (i != numberOfWords)
    {
      wordList[i] = newEntry;
    }
  }
}
  
char *appendListOfWords(char **mainList, long numberOfWords, char *baseWord, char *endings, bool allEndings, bool withBasewordOnly, long kindOfRule)
{
  char *result = (char *) NULL,
       *tmpPtr,
       *tmpWord;
  long position = exactFindWord(mainList, baseWord, numberOfWords, 0);
  
  if (position == -1)
  {
//    printf("word '%s' not available in the main list\n", baseWord);
  }
  if (position != -1 || !withBasewordOnly)
  {
    result = (char *) malloc(strlen(baseWord) + strlen(endings) + 20);
    sprintf(result, " %s, %ld", baseWord, kindOfRule);
    if (position != -1)
    {
      mainList[position] = NULL;
      printf("word '%s' deleted on position %ld\n", baseWord, position);
    }
    while ((tmpPtr = index(endings, ',')) != (char *) NULL)
    {
      *tmpPtr = '\0';
      while (*endings != '\0' && isspace(*endings))
        endings++;
      tmpWord = transformWord(baseWord, endings);
//      printf("'%s' transformed with '%s' to '%s'\n", baseWord, endings, tmpWord);
      position = exactFindWord(mainList, tmpWord, numberOfWords, 0);
  
      if (position == -1)
      {
//        printf("word '%s' not available in the main list\n", tmpWord);
      }
      if (position != -1 || allEndings)
      {
      	strcat(result, ", ");
      	strcat(result, endings);
        if (position != -1)
        {
          mainList[position] = NULL;
          printf("word '%s' deleted on position %ld\n", tmpWord, position);
        }
      }
      *(tmpPtr++) = ',';
      endings = tmpPtr;
//      printf("rest of endings: '%s'\n", endings);
      free(tmpWord);
    }
  }

  return(result);
}
