/*
 *  fb_parser.c
 *  Player
 *
 *  Created by Perette Barella on 2012-03-06.
 *  Copyright 2012-2014 Devious Fish. All rights reserved.
 *
 *	This file contains functions for adding/identifying command lines.
 *
 *	fb_create_parser creates an empty parser.  fb_parser_add_statements then accepts
 *	an array of strings/return values (FB_PARSER_DEFINITION) to add statements to
 *	the parser.  Multiple arrays of definitions can be added to a single parser.
 *
 *	Statement formats can be composed of:
 *		keyword		-- a bare word.
 *		{value}		-- accepts any value in that position
 *      {#value}    -- Accepts decimal numeric value in that position
 *      {#value:3-5}-- Accepts numeric range in that position.  See range note below.
 *		<one|two>	-- accepts any 1 of the required words in that position
 *		[optional]	-- accepts an optional word in the current position
 *      [four|five] -- accepts 0 or 1 optional words in the current position
 *		[{value}]	-- accepts an optional value, only as the final word
 *		...			-- allows 0 or more additional parameters
 *
 * Ranges:
 *      If either min or max in range have a decimal point, values are treated as
 *      doubles.  Otherwise, they are considered integers.  If either min or max
 *      has a leading zero, it enables radix detection: 0 -> octal, 0x -> hex, otherwise decimal.
 *      If neither max nor min has a leading 0, only base 10 is accepted.
 *
 *	Examples:
 *		{ CREATE_USER,	"create <admin|guest|user> {username} {password}" }
 *					-- Recognizes the 3 types of users and requires username & password parameter, nothing more.
 *		{ CREATE_USER_EXTENDED, "create <admin|guest|user> {username} {password} {additional} ..." }
 *					-- Recognizes the 3 types of users and requires username, password, & at least 1 additional parameter.
 *		{ CREATE_USER_OPTIONAL_EXTENDED, "create <admin|guest|user> {user} {password} ... }
 *					-- Recognizes the 3 types of users, requires username and password, allows additional parameters.
 *					   This definition is incompatible with CREATE_USER above--the parser would error when adding this
 *					   definition.  (There is no way to delineate that command and this command with 0 parameters.)
 *
 *		A completely different approach to the above would be:
 
 *		{ CREATE_ADMIN_USER,	"create admin {username} {password} ..." },
 *		{ CREATE_STANDARD_USER,	"create standard {username} {password} ..." },
 *		{ CREATE_GUEST_USER,	"create guest {username} {password} ..." }
 *					-- Returns different values for the different user types, so you can determine that
 *					   from the command's enumeration instead of having to further parse. As written, these
 *					   would each allow additional optional parameters beyond username and password.
 *
 * Since the parse tree is assumed to be hard-coded in your application, this module simply assert()s if it
 * finds problems in the parse definitions.  NDEBUG should not be defined in the development environment to
 * ensure these are caught; since statements are tested for validity in development, removal of the assertions
 * is acceptable for release.
 *
 */

#include <config.h>

#ifndef __FreeBSD__
#define _BSD_SOURCE /* snprintf() */
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <ctype.h>
#include <assert.h>
#include <search.h>
#include <sys/types.h>

#include "fb_public.h"
#include "fb_service.h"

/* Any given token can represent... */
typedef enum fb_parsecomponent_t {
	FB_PARSER_UNDETERMINED,
	FB_PARSER_KEYWORD,
	FB_PARSER_VALUE,
	FB_PARSER_OPENEND
} FB_PARSER_TYPE;

typedef enum fb_valuetype_t {
    FB_VALUE_STRING,
    FB_VALUE_NUMERIC,
    FB_VALUE_RANGED_INTEGER,
    FB_VALUE_RANGED_REAL
} FB_VALUETYPE;
/* Structure that will hold the parse tree */
struct fb_parser_t {
	FB_PARSER_TYPE type;
	FB_VALUETYPE value_type;
	char *word;
	int response;
	size_t subcomponent_count;
	size_t subcomponents_size;
	struct fb_parser_t *subcomponents;
	int result; /* The response value */
    long min_integer;
    long max_integer;
    double min_double;
    double max_double;
    int radix;
};



/* Create an argv-style array.
   Returns the number of populated things in the argv array.
   A command line with nothing on it results in 0 and a pointer to an array
   with one null.  A failure results in a negative number and a null pointer.
   */
int fb_create_argv (const char *commandline, char ***result) {
	/* ***result: Anything can be solved with sufficient indirection */
	/* Skip leading whitespace */
	while (*commandline && isspace (*commandline)) {
		commandline++;
	}
	/* Make a copy of the command line to scratch up */
	char *command = strdup (commandline);
	if (command == NULL) {
		perror ("fb_create_argv:strdup");
		*result = NULL;
		return -1;
	}
	/* First get a quick count of the words. */
	int wordcount = 0;
	char *c = command;
	while (*c) {
		wordcount++;
		while (*c && !isspace (*c)) {
			c++;
		}
		while (*c && isspace (*c)) {
			c++;
		}
	}
	/* Allocate a vector for the pointers and populate it */
	char **argv = (char **) calloc(wordcount + 1, sizeof (char *));
	if (argv == NULL) {
		free (command);
		perror ("fb_create_argv:calloc");
		*result = NULL;
		return -1;
	}
	c = command;
	wordcount = 0;
	while (*c) {
		argv [wordcount++] = c;
		if (*c == '\"') {
			/* Quoted string */
			/* Can't skip byte by changing address:
			   First parameter is the address used by malloc()/free().
			   Shift the text in the string instead. */
			/*JON: strcpy did not work(dropped 4th byte and duplicated 16th byte for some reason), make loop instead */
			/* strcpy man page: The source and destination strings should not overlap, as the behavior is undefined. */
			for (int i = 0; c[i] > 0; i++) {
				c[i] = c[i+1];
			}
			while (*c) {
				if (*c == '\"' && (isspace (*(c+1)) || *(c+1) == '\0')) {
					break;
				}
				c++; /* Find quote AT END OF WORD.  Ignores quotes in words. */
			}
		} else {
			/* Plain word */
			while (*c && !isspace (*c)) { /* skip the word */
				c++;
			}
		}
		if (*c) { /* Null terminate if needed */
			*(c++) = '\0';
		}
		while (*c && isspace (*c)) { /* Skip whitespace */
			c++;
		}
	}
	argv [wordcount] = NULL;
	if (wordcount == 0) {
		/* The command isn't actually used, so free it before it leaks */
		free (command);
	}
	*result = argv;
	return wordcount;
}

		   
/* Free up resources used by one of our argv arrays */
void fb_destroy_argv (char **argv) {
	if (argv) {
		/* The array is built from chopped up pieces of one line, so just free the first. */
		free (*argv);
		/* And free the vector itself */
		free (argv);
	}
}



/* Create a new, empty parser */
FB_PARSER *fb_create_parser (void) {
	FB_PARSER *parser;
	if ((parser = calloc (1, sizeof *parser))) {
		parser->type = FB_PARSER_UNDETERMINED;
	} else {
		perror ("fb_create_parser:calloc");
	}
	return (parser);
}


/* This function is a qsort() etc compliant callback */
static int fb_parse_compare (const void *elem1, const void *elem2) {
	return strcasecmp (((FB_PARSER *) elem1)->word, ((FB_PARSER *) elem2)->word);
}



static bool fb_parser_add (FB_PARSER *parser, const int response, char **argv);

static bool fb_parser_add_keyword (FB_PARSER *parser, const int response, char **argv, char *keyword) {
	/* Expand the array to accommodate new elements if required. */
    if (!fb_expandcalloc((void **) &parser->subcomponents, &parser->subcomponents_size,
                         parser->subcomponent_count + 1, sizeof (FB_PARSER))) {
        perror ("fb_parser_add_keyword:fb_expandcalloc");
        return 0;
	}
	
	/* See if this word already exists. */
	FB_PARSER *newitem = &(parser->subcomponents [parser->subcomponent_count]);
	newitem->word = keyword;
	FB_PARSER *found = lfind (newitem, parser->subcomponents, &parser->subcomponent_count, sizeof (FB_PARSER), fb_parse_compare);
	if (found == NULL) {
		if (!(newitem->word = strdup (keyword))) {
			perror ("fb_parser_add_keyword:strdup");
			return false;
		}
		parser->subcomponent_count += 1;
		newitem->type = FB_PARSER_UNDETERMINED;
		found = newitem;
	}
	/* Recurse on the next term */
	return fb_parser_add (found, response, argv+1);
}


/* Add a fill-in-the-blank to the parse tree */
static bool fb_parser_add_fill_in (FB_PARSER *parser, const int response, char **argv, char *blankname) {
	bool numeric = (blankname [1] == '#');
    long min_integer = 0, max_integer = 0;
    int min_double = 0, max_double = 0;
    int radix = 10;
    FB_VALUETYPE value_type = FB_VALUE_STRING;
    if (numeric) {
        value_type = FB_VALUE_NUMERIC;
        char *range = strchr (blankname, ':');
        if (range) {
            range++;
            value_type = (strchr (blankname, '.') ? FB_VALUE_RANGED_REAL : FB_VALUE_RANGED_INTEGER);
            char *upperrange = strchr (*range == '-' ? range + 1 : range, '-'); /* Might have negative min value */
            assert (upperrange);
            char *error;
            if (value_type == FB_VALUE_RANGED_REAL) {
                min_double = strtod (range, &error);
                assert (error && error == upperrange);
                max_double = strtod (upperrange + 1, &error);
                assert (error && *error == '}');
            } else {
                if ((*range == '0' && *(range + 1) != '-') ||
                    (*range == '-' && *(range + 1) == '0') ||
                    (*upperrange == '0' && *(range + 1) != '}') ||
                    (*upperrange == '-' && *(upperrange + 1) == '0')) {
                    radix = 0;
                }
                min_integer = strtol(range, &error, radix);
                assert (error && error == upperrange);
                max_integer = strtol(upperrange + 1, &error, radix);
                assert (error && *error == '}');
            }
        }
    }
	if (parser->type == FB_PARSER_VALUE) {
		assert (parser->subcomponents_size == 1);
		assert (parser->subcomponent_count == 1);
        assert (parser->value_type == value_type);
		assert (parser->value_type != FB_VALUE_RANGED_REAL || (parser->min_double == min_double && parser->max_double == max_double));
		assert (parser->value_type != FB_VALUE_RANGED_INTEGER || (parser->min_integer == min_integer && parser->max_integer == max_integer));
	} else if (parser->type == FB_PARSER_UNDETERMINED) {
		parser->type = FB_PARSER_VALUE;
		if ((parser->subcomponents = fb_create_parser()) &&
			(parser->subcomponents->word = strdup (blankname))) {
            char *endname = strchr (blankname, ':');
            if (endname) {
                *endname = '\0';
            }
			parser->subcomponents_size = 1;
			parser->subcomponent_count = 1;
			parser->value_type = value_type;
            parser->min_integer = min_integer;
            parser->max_integer = max_integer;
            parser->min_double = min_double;
            parser->max_double = max_double;
            parser->radix = radix;
		} else {
			return false;
		}
	} else {
		assert (0);
		fprintf (stderr, "Can not use {} alongside other term types.\n");
		return false;
	}
	/* Recurse on the next term */
	return fb_parser_add (parser->subcomponents, response, argv+1);
}



/* Add a new command to the parse tree. */
static bool fb_parser_add (FB_PARSER *parser, const int response, char **argv) {
	assert (parser);
	assert (parser->subcomponents_size >= parser->subcomponent_count);
	
	char *word = *argv;
	size_t wordlen = word ? strlen (word) : 0;
	
	/* Handle end of command or optional fill-in-the-blank at end of command */
	if (!word ||
		(word [0] == '[' && word [1] == '{' && word [wordlen - 2] == '}' && word [wordlen - 1] == ']')) {
		if (word && *(argv+1)) {
			assert (0);
			fprintf (stderr, "Optional fill-in-the-blank must be the last word.\n");
			return (false);
		}
		if (parser->response) {
			assert (0);
			fprintf (stderr, "Statement redefined.\n");
			return false;
		}
		parser->response = response;
		bool result = true;
		if (word) {
			// Take the optional-brackets off when we pass to fill-in subroutine.
			word [wordlen - 1] = '\0';
			result = fb_parser_add_fill_in (parser, response, argv, word + 1);
			// Put back in case we're invoked again via <alternation> or [optional].
			word [wordlen - 1] = ']';
		}
		return result;
	}

	/* Deal with open-ended commands (elipsis) */
	if (strcmp (word, "...") == 0) {
		if (parser->type != FB_PARSER_UNDETERMINED || parser->response) {
			assert (0);
			fprintf (stderr, "Can not use ... alongside other term types.\n");
			return false;
		}
		parser->type = FB_PARSER_OPENEND;
		parser->response = response;
		return true;
	}

	/* Handle fill-in-the-blanks */
	if (word [0] == '{' && word[wordlen - 1] == '}') {
		/* Recurse on the next term */
		return fb_parser_add_fill_in (parser, response, argv, word);
	}

	/* If none of the others fit, it must be a keyword match */
	if (parser->type == FB_PARSER_UNDETERMINED) {
		parser->type = FB_PARSER_KEYWORD;
	} else if (parser->type != FB_PARSER_KEYWORD) {
		assert (0);
		fprintf (stderr, "Can not use keyword alongside other types except full stop.\n");
		return false;
	}
	
	/* Keyword list/alternation, including optional ones */
	if ((word [0] == '<' && word[wordlen - 1] == '>') ||
        (word [0] == '[' && word[wordlen - 1] == ']')) {
		bool ok = true;
        /* Deal with the [optional] case */
        if (word [0] == '[') {
            ok = fb_parser_add (parser, response, argv + 1);
            if (!ok) {
                fprintf (stderr, "Previous errors refers to when optional word(s) %s omitted\n", word);
            }
        }
        /* Now do it while adding the keyword list. */
		char *commandcopy = strdup ((word)+1);
		if (!commandcopy) {
			perror ("fb_parser_add:strdup");
			return false;
		}
		commandcopy [strlen (commandcopy) - 1] = '\0';
		char *this_word = commandcopy;
		/* Repeat for each word in the alternation */
		while (*this_word) {
			char *endword = this_word;
			while (*endword && *endword != '|') {
				endword++;
			}
			char *nextword = *endword ? endword + 1 : endword;
			*endword = '\0';
			if (this_word != endword) { /* Don't allow zero-length words */
				ok = fb_parser_add_keyword (parser, response, argv, this_word) && ok;
			}
			this_word = nextword;
		}
		free (commandcopy);
		return (ok);
	}
	return fb_parser_add_keyword (parser, response, argv, word);
}



/* Recursively descends the parse tree, sorting all the keyword values
   so we can bsearch(3) instead of lsearch(3) on them later. */
static void fb_sort_all (FB_PARSER *parser) {
	/* If only 1 subcomponent, skip sort. */
	if (parser->subcomponent_count > 1) {
		qsort (parser->subcomponents, parser->subcomponent_count, sizeof (FB_PARSER), fb_parse_compare);
	}
	unsigned int i;
	for (i = 0; i < parser->subcomponent_count; i++) {
		fb_sort_all (&(parser->subcomponents[i]));
	}
}

						  
/* Add statements to the parser.  Return 1 on success, 0 on failure. */
bool fb_parser_add_statements (FB_PARSER *parser, const FB_PARSE_DEFINITION def[], const size_t count) {
	char **argv;
	int response;
	size_t i;
	/* For each command line in the array... */
	for (i = 0; i < count; i++) {
		if ((response = fb_create_argv (def[i].statement, &argv)) >= 0) {
			response = fb_parser_add (parser, def[i].response, argv);
			fb_destroy_argv (argv);
			if (!response) {
				return 0;
			}
		} else if (response < 0) {
			return 0;
		}
	}
	fb_sort_all (parser);
	return 1;
}




/* Interpret one token of a command in an argv array.  If the command is complete,
   return that number; if it continues, recurse; if it is invalid, return the
   appropriate problem type code. */
static int fb_interpret_recurse (const FB_PARSER *parser, char *const *argv, char **errorterm) {
	FB_PARSER matchthis;
	FB_PARSER *found;
	if (*argv == NULL && parser->response) {
		return parser->response;
	}
	switch (parser->type) {
	case FB_PARSER_UNDETERMINED:
		*errorterm = *argv;
		return FB_PARSE_EXTRA_TERMS;
	case FB_PARSER_VALUE:
        switch (parser->value_type) {
            case FB_VALUE_RANGED_INTEGER:
                {
                    char *error;
                    long value = strtol(*argv, &error, parser->radix);
                    if (error && *error) {
                        *errorterm = *argv;
                        return FB_PARSE_NUMERIC;
                    }
                    if (value < parser->min_integer ||
                        value > parser->max_integer) {
                        *errorterm = *argv;
                        return FB_PARSE_RANGE;
                    }
                }
                break;
            case FB_VALUE_RANGED_REAL:
                {
                    char *error;
                    double value = strtod(*argv, &error);
                    if (error && *error) {
                        *errorterm = *argv;
                        return FB_PARSE_NUMERIC;
                    }
                    if (value < parser->min_double ||
                        value > parser->max_double) {
                        *errorterm = *argv;
                        return FB_PARSE_RANGE;
                    }
                }
                break;
            case FB_VALUE_NUMERIC:
                if (parser->value_type) {
                    /* Verify the value is numeric. */
                    char *c = *argv;
                    if (*c == '-') {
                        c++;
                    }
                    while (*c) {
                        if (!isdigit (*c)) {
                            *errorterm = *argv;
                            return FB_PARSE_NUMERIC;
                        }
                        c++;
                    }
                }
                break;
            case FB_VALUE_STRING:
                break;
		}
		if (*argv) {
			return fb_interpret_recurse (parser->subcomponents, argv+1, errorterm);
		}
		*errorterm = parser->word;
		return FB_PARSE_INCOMPLETE;
	case FB_PARSER_OPENEND:
		return parser->response;
	case FB_PARSER_KEYWORD:
		if (*argv == NULL) {
            *errorterm = parser->word;
			return FB_PARSE_INCOMPLETE;
		}
		matchthis.word = *argv;
		found = bsearch (&matchthis, parser->subcomponents, parser->subcomponent_count, sizeof (FB_PARSER), fb_parse_compare);
		if (found == NULL) {
			*errorterm = *argv;
			return FB_PARSE_INVALID_KEYWORD;
		}
		return fb_interpret_recurse (found, argv+1, errorterm);
	}
	assert (0);
	return FB_PARSE_FAILURE;
}

/* Given a parser and an argv array with a command, lookup the command number.
   Returns the command number, or one of the FB_PARSER_* values to indicate
   the nature of why the command is invalid. */
int fb_interpret (const FB_PARSER *parser, char *const *argv, char **errorterm) {
	*errorterm = NULL;
	return fb_interpret_recurse (parser, argv, errorterm);
}


/* Destroy a parser and free up its resources. */
static void fb_parser_destroy_recurse (FB_PARSER *parser) {
	unsigned int i;
	for (i = 0; i < parser->subcomponent_count; i++) {
		fb_parser_destroy_recurse (&(parser->subcomponents [i]));
	}
	free (parser->word);
	free (parser->subcomponents);
}


/* Destroy a parser and free up its resources. */
void fb_parser_destroy (FB_PARSER *parser) {
	fb_parser_destroy_recurse (parser);
	free (parser);
}
