#! /usr/bin/env python

import argparse
import json
import re

# The types defined in vcsn::dyn that we wrap.
dyn_types = ['automaton', 'context', 'expansion', 'expression', 'label',
             'polynomial', 'weight']

# A regex to "parse" a function declaration in `vcsn/dyn/algos.hh`.
function_re = re.compile(r'''(?P<doc>(?:^\s*///[^\n]*\n)*)?
^\s*(?P<result>[:\w]+)
\s+(?P<dynfun>\w+)\s*\((?P<formals>.*?)\);''',
                    flags=re.DOTALL | re.MULTILINE | re.VERBOSE)

# A regex to parse the components of a C++ formal argument such
# as `const std::string& algo = "default"`.
formal_re = re.compile(r'^(?P<type>(?P<const>const\s+)?(?P<class>[:\w<>]+)&?)'
                       r'\s+'
                       r'(?P<arg>\w+)'
                       r'(\s*=\s*(?P<default>.*))?$',
                       flags=re.MULTILINE)


def formals_list(formals):
    # Using split(',\s*') is tempting, but will break default values
    # that contain quoted commas, for example
    # const std::string& param = "+, ., *=.2, w., .w, length=10"
    res = []
    s = ""
    in_string = False
    for c in formals:
        if c == ',' and not in_string:
            res.append(s.strip())
            s = ""
        elif c in ['"', "'"]:
            in_string = not in_string
            s += c
        else:
            s += c
    res.append(s.strip())
    return res

def process_doc(doc):
    res = {}
    # First we split doc in a list and
    # remove '/// ' and empty lines.
    doc = [re.match(r'\s*/// ?(?P<doc>.*)', line).group('doc')
               for line in doc.split('\n') if line.strip() != '']
    # We re-add an empty line for convenience.
    doc.append('')

    # then we take the short description,
    i = 0
    for i, line in enumerate(doc):
        if re.match(r'\s*\\\S* ', line) is not None: # We are now on a group.
            break

    res['desc'] = doc[:i]
    doc = doc[i:]

    # then each blocks,
    def find_block(name):
        start = None
        end = None
        for i, line in enumerate(doc):
            m = re.compile(r'\\{}'.format(name)).search(line)
            if m is not None:
                if start is None:
                    start = i
            else:
                # Are we indented ? If we are, this isn't the end of the block.
                if start is not None and line != '' and line[0] != ' ':
                    end = i
                    break

        if start is None:
            return []
        if end is None:
            end = len(doc)
        return doc[start:end]

    def split_params(params):
        res = []
        param = {}
        for line in params:
            m = re.compile(r'\\param\s+(?P<name>\S+)(?P<desc>.*)').search(line)
            if m is not None: # New parameter.
                if param != {}:
                    res.append(param)
                param = {'name': m.group('name'), 'desc': [m.group('desc')]}
            else:
                if param != {}:
                    param['desc'].append(line)

        if param != {}:
            res.append(param)
        return res

    # Parameters are special. We need to split and parse them.
    res['params'] = split_params(find_block('param'))
    res['pre'] = find_block('pre')
    res['returns'] = find_block('returns')

    return res

def formal_to_dict(formal):
    m = formal_re.match(formal)
    if m:
        return m.groupdict()
    else:
        raise RuntimeError("invalid formal parameter {}".format(formal))

def process_function(fun):
    # Split the formal parameters into a list.
    fun['formals'] = formals_list(fun['formals'])
    # And transform each of them into a dict.
    fun['formals'] = [formal_to_dict(f) for f in fun['formals']]
    fun['doc'] = process_doc(fun['doc'])
    return fun


def process_header(header, out):
    '''This tool parses dyn/algos.hh and generate a JSON file from it.'''
    algos = []
    with open(header, 'r') as f:
        for m in function_re.finditer(f.read()):
            algos.append(process_function(m.groupdict()))
    json.dump({'algos':algos, 'dyn_types':dyn_types}, out, sort_keys=True, indent=4)


parser = argparse.ArgumentParser(description='Generate `dyn.json` file.',
                                 formatter_class=argparse.RawDescriptionHelpFormatter)

parser.add_argument('--output',
                    type=argparse.FileType('w'), default = '-',
                    help='json file to generate')
parser.add_argument('input', type=str, default=None,
                    help='''header to process (`dyn/algos.hh`).''')
args = parser.parse_args()


process_header(args.input, args.output)
