/* tags.c -- recognize HTML tags

  (c) 1998-2000 (W3C) MIT, INRIA, Keio University
  (c) 2001 eGenix.com Software GmbH, Langenfeld
  See tidy.c for the copyright notice.


  The HTML tags are stored as 8 bit ASCII strings.
  Use lookupw() to find a tag given a wide char string.
*/


/* Include HTML Tidy Header */   /* platform independent stuff */
#include "htmltidy.h"       /* to pull in definition of nodes */

#define HASHSIZE 357

extern Bool XmlTags;

Dict *tag_html;
Dict *tag_head;
Dict *tag_title;
Dict *tag_base;
Dict *tag_meta;
Dict *tag_body;
Dict *tag_frameset;
Dict *tag_frame;
Dict *tag_noframes;
Dict *tag_hr;
Dict *tag_h1;
Dict *tag_h2;
Dict *tag_pre;
Dict *tag_listing;
Dict *tag_p;
Dict *tag_ul;
Dict *tag_ol;
Dict *tag_dl;
Dict *tag_dir;
Dict *tag_li;
Dict *tag_dt;
Dict *tag_dd;
Dict *tag_td;
Dict *tag_th;
Dict *tag_tr;
Dict *tag_col;
Dict *tag_br;
Dict *tag_a;
Dict *tag_link;
Dict *tag_b;
Dict *tag_i;
Dict *tag_strong;
Dict *tag_em;
Dict *tag_big;
Dict *tag_small;
Dict *tag_param;
Dict *tag_option;
Dict *tag_optgroup;
Dict *tag_img;
Dict *tag_map;
Dict *tag_area;
Dict *tag_nobr;
Dict *tag_wbr;
Dict *tag_font;
Dict *tag_layer;
Dict *tag_spacer;
Dict *tag_center;
Dict *tag_style;
Dict *tag_script;
Dict *tag_noscript;
Dict *tag_table;
Dict *tag_caption;
Dict *tag_form;
Dict *tag_textarea;
Dict *tag_blockquote;
Dict *tag_applet;
Dict *tag_object;
Dict *tag_div;
Dict *tag_span;

Dict *xml_tags;  /* dummy for xml tags */

static Dict *taghash[HASHSIZE];

static struct tag
{
    char *name;
    unsigned versions;
    unsigned model;
    Parser *parser;
    CheckAttribs *chkattrs;
} tags[] =
{
    {"html",       (VERS_ALL|VERS_FRAMES),     (CM_HTML|CM_OPT|CM_OMITST),  ParseHTML, CheckHTML},

    {"head",       (VERS_ALL|VERS_FRAMES),     (CM_HTML|CM_OPT|CM_OMITST), ParseHead, NULL},

    {"title",      (VERS_ALL|VERS_FRAMES),     CM_HEAD, ParseTitle, NULL},
    {"base",       (VERS_ALL|VERS_FRAMES),     (CM_HEAD|CM_EMPTY), NULL, NULL},
    {"link",       (VERS_ALL|VERS_FRAMES),     (CM_HEAD|CM_EMPTY), NULL, CheckLINK},
    {"meta",       (VERS_ALL|VERS_FRAMES),     (CM_HEAD|CM_EMPTY), NULL, NULL},
    {"style",      (VERS_FROM32|VERS_FRAMES),  CM_HEAD, ParseScript, CheckSTYLE},
    {"script",     (VERS_FROM32|VERS_FRAMES),  (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), ParseScript, CheckSCRIPT},
    {"server",     VERS_NETSCAPE,  (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), ParseScript, NULL},

    {"body",       VERS_ALL,     (CM_HTML|CM_OPT|CM_OMITST), ParseBody, NULL},
    {"frameset",   VERS_FRAMES,  (CM_HTML|CM_FRAMES), ParseFrameSet, NULL},

    {"p",          VERS_ALL,     (CM_BLOCK|CM_OPT), ParseInline, NULL},
    {"h1",         VERS_ALL,     (CM_BLOCK|CM_HEADING), ParseInline, NULL},
    {"h2",         VERS_ALL,     (CM_BLOCK|CM_HEADING), ParseInline, NULL},
    {"h3",         VERS_ALL,     (CM_BLOCK|CM_HEADING), ParseInline, NULL},
    {"h4",         VERS_ALL,     (CM_BLOCK|CM_HEADING), ParseInline, NULL},
    {"h5",         VERS_ALL,     (CM_BLOCK|CM_HEADING), ParseInline, NULL},
    {"h6",         VERS_ALL,     (CM_BLOCK|CM_HEADING), ParseInline, NULL},
    {"ul",         VERS_ALL,     CM_BLOCK, ParseList, NULL},
    {"ol",         VERS_ALL,     CM_BLOCK, ParseList, NULL},
    {"dl",         VERS_ALL,     CM_BLOCK, ParseDefList, NULL},
    {"dir",        VERS_LOOSE,   (CM_BLOCK|CM_OBSOLETE), ParseList, NULL},
    {"menu",       VERS_LOOSE,   (CM_BLOCK|CM_OBSOLETE), ParseList, NULL},
    {"pre",        VERS_ALL,     CM_BLOCK, ParsePre, NULL},
    {"listing",    VERS_ALL,     (CM_BLOCK|CM_OBSOLETE), ParsePre, NULL},
    {"xmp",        VERS_ALL,     (CM_BLOCK|CM_OBSOLETE), ParsePre, NULL},
    {"plaintext",  VERS_ALL,     (CM_BLOCK|CM_OBSOLETE), ParsePre, NULL},
    {"address",    VERS_ALL,     CM_BLOCK, ParseBlock, NULL},
    {"blockquote", VERS_ALL,     CM_BLOCK, ParseBlock, NULL},
    {"form",       VERS_ALL,     CM_BLOCK, ParseBlock, NULL},
    {"isindex",    VERS_LOOSE,   (CM_BLOCK|CM_EMPTY), NULL, NULL},
    {"fieldset",   VERS_HTML40,  CM_BLOCK, ParseBlock, NULL},
    {"table",      VERS_FROM32,  CM_BLOCK, ParseTableTag, CheckTABLE},
    {"hr",         VERS_ALL,     (CM_BLOCK|CM_EMPTY),  NULL, CheckHR},
    {"div",        VERS_FROM32,  CM_BLOCK, ParseBlock, NULL},
    {"multicol",   VERS_NETSCAPE,  CM_BLOCK, ParseBlock, NULL},
    {"nosave",     VERS_NETSCAPE, CM_BLOCK, ParseBlock, NULL},
    {"layer",      VERS_NETSCAPE, CM_BLOCK, ParseBlock, NULL},
    {"ilayer",     VERS_NETSCAPE, CM_INLINE, ParseInline, NULL},
    {"nolayer",    VERS_NETSCAPE, (CM_BLOCK|CM_INLINE|CM_MIXED), ParseBlock, NULL},
    {"align",      VERS_NETSCAPE, CM_BLOCK, ParseBlock, NULL},
    {"center",     VERS_LOOSE,   CM_BLOCK, ParseBlock, NULL},
    {"ins",        VERS_HTML40,  (CM_INLINE|CM_BLOCK|CM_MIXED), ParseInline, NULL},
    {"del",        VERS_HTML40,  (CM_INLINE|CM_BLOCK|CM_MIXED), ParseInline, NULL},

    {"li",         VERS_ALL,     (CM_LIST|CM_OPT|CM_NO_INDENT), ParseBlock, NULL},
    {"dt",         VERS_ALL,     (CM_DEFLIST|CM_OPT|CM_NO_INDENT), ParseInline, NULL},
    {"dd",         VERS_ALL,     (CM_DEFLIST|CM_OPT|CM_NO_INDENT), ParseBlock, NULL},

    {"caption",    VERS_FROM32,  CM_TABLE, ParseInline, CheckCaption},
    {"colgroup",   VERS_HTML40,  (CM_TABLE|CM_OPT), ParseColGroup, NULL},
    {"col",        VERS_HTML40,  (CM_TABLE|CM_EMPTY),  NULL, NULL},
    {"thead",      VERS_HTML40,  (CM_TABLE|CM_ROWGRP|CM_OPT), ParseRowGroup, NULL},
    {"tfoot",      VERS_HTML40,  (CM_TABLE|CM_ROWGRP|CM_OPT), ParseRowGroup, NULL},
    {"tbody",      VERS_HTML40,  (CM_TABLE|CM_ROWGRP|CM_OPT), ParseRowGroup, NULL},
    {"tr",         VERS_FROM32,  (CM_TABLE|CM_OPT), ParseRow, NULL},
    {"td",         VERS_FROM32,  (CM_ROW|CM_OPT|CM_NO_INDENT), ParseBlock, CheckTableCell},
    {"th",         VERS_FROM32,  (CM_ROW|CM_OPT|CM_NO_INDENT), ParseBlock, CheckTableCell},

    {"q",          VERS_HTML40,  CM_INLINE, ParseInline, NULL},
    {"a",          VERS_ALL,     CM_INLINE, ParseInline, CheckAnchor},
    {"br",         VERS_ALL,     (CM_INLINE|CM_EMPTY), NULL, NULL},
    {"img",        VERS_ALL,     (CM_INLINE|CM_IMG|CM_EMPTY), NULL, CheckIMG},
    {"object",     VERS_HTML40,  (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock, NULL},
    {"applet",     VERS_LOOSE,   (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock, NULL},
    {"servlet",    VERS_SUN,     (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), ParseBlock, NULL},
    {"param",      VERS_FROM32,  (CM_INLINE|CM_EMPTY), NULL, NULL},
    {"embed",      VERS_NETSCAPE, (CM_INLINE|CM_IMG|CM_EMPTY), NULL, NULL},
    {"noembed",    VERS_NETSCAPE, CM_INLINE, ParseInline, NULL},
    {"iframe",     VERS_HTML40_LOOSE, CM_INLINE, ParseBlock, NULL},
    {"frame",      VERS_FRAMES,  (CM_FRAMES|CM_EMPTY), NULL, NULL},
    {"noframes",   VERS_IFRAMES, (CM_BLOCK|CM_FRAMES), ParseNoFrames,  NULL},
    {"noscript",   (VERS_FRAMES|VERS_HTML40),  (CM_BLOCK|CM_INLINE|CM_MIXED), ParseBlock, NULL},
    {"b",          VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"i",          VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"u",          VERS_LOOSE,   CM_INLINE, ParseInline, NULL},
    {"tt",         VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"s",          VERS_LOOSE,   CM_INLINE, ParseInline, NULL},
    {"strike",     VERS_LOOSE,   CM_INLINE, ParseInline, NULL},
    {"big",        VERS_FROM32,  CM_INLINE, ParseInline, NULL},
    {"small",      VERS_FROM32,  CM_INLINE, ParseInline, NULL},
    {"sub",        VERS_FROM32,  CM_INLINE, ParseInline, NULL},
    {"sup",        VERS_FROM32,  CM_INLINE, ParseInline, NULL},
    {"em",         VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"strong",     VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"dfn",        VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"code",       VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"samp",       VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"kbd",        VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"var",        VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"cite",       VERS_ALL,     CM_INLINE, ParseInline, NULL},
    {"abbr",       VERS_HTML40,  CM_INLINE, ParseInline, NULL},
    {"acronym",    VERS_HTML40,  CM_INLINE, ParseInline, NULL},
    {"span",       VERS_FROM32,  CM_INLINE, ParseInline, NULL},
    {"blink",      VERS_PROPRIETARY, CM_INLINE, ParseInline, NULL},
    {"nobr",       VERS_PROPRIETARY, CM_INLINE, ParseInline, NULL},
    {"wbr",        VERS_PROPRIETARY, (CM_INLINE|CM_EMPTY), NULL, NULL},
    {"marquee",    VERS_MICROSOFT, (CM_INLINE|CM_OPT), ParseInline, NULL},
    {"bgsound",    VERS_MICROSOFT, (CM_HEAD|CM_EMPTY), NULL, NULL},
    {"comment",    VERS_MICROSOFT, CM_INLINE, ParseInline, NULL},
    {"spacer",     VERS_NETSCAPE, (CM_INLINE|CM_EMPTY), NULL, NULL},
    {"keygen",     VERS_NETSCAPE, (CM_INLINE|CM_EMPTY), NULL, NULL},
    {"nolayer",    VERS_NETSCAPE, (CM_BLOCK|CM_INLINE|CM_MIXED), ParseBlock, NULL},
    {"ilayer",     VERS_NETSCAPE, CM_INLINE, ParseInline, NULL},
    {"map",        VERS_FROM32,  CM_INLINE, ParseBlock, CheckMap},
    {"area",       VERS_ALL,     (CM_BLOCK|CM_EMPTY), NULL, CheckAREA},
    {"input",      VERS_ALL,     (CM_INLINE|CM_IMG|CM_EMPTY), NULL, NULL},
    {"select",     VERS_ALL,     (CM_INLINE|CM_FIELD), ParseSelect, NULL},
    {"option",     VERS_ALL,     (CM_FIELD|CM_OPT), ParseText, NULL},
    {"optgroup",   VERS_HTML40,  (CM_FIELD|CM_OPT), ParseOptGroup, NULL},
    {"textarea",   VERS_ALL,     (CM_INLINE|CM_FIELD), ParseText, NULL},
    {"label",      VERS_HTML40,  CM_INLINE, ParseInline, NULL},
    {"legend",     VERS_HTML40,  CM_INLINE, ParseInline, NULL},
    {"button",     VERS_HTML40,  CM_INLINE, ParseInline, NULL},
    {"basefont",   VERS_LOOSE,   (CM_INLINE|CM_EMPTY), NULL, NULL},
    {"font",       VERS_LOOSE,   CM_INLINE, ParseInline, NULL},
    {"bdo",        VERS_HTML40,  CM_INLINE, ParseInline, NULL},

  /* this must be the final entry */
    {NULL,         0,            0,          0,       0}
};

/* choose what version to use for new doctype */
int HTMLVersion(Lexer *lexer)
{
    unsigned int versions;

    versions = lexer->versions;

    if (versions & VERS_HTML20)
        return VERS_HTML20;

    if (versions & VERS_HTML32)
        return VERS_HTML32;

    if (versions & VERS_HTML40_STRICT)
        return VERS_HTML40_STRICT;

    if (versions & VERS_HTML40_LOOSE)
        return VERS_HTML40_LOOSE;

    if (versions & VERS_FRAMES)
        return VERS_FRAMES;

    return VERS_UNKNOWN;
}

static unsigned hash(char *s)
{
    unsigned hashval;

    for (hashval = 0; *s != '\0'; s++)
        hashval = *s + 31*hashval;

    return hashval % HASHSIZE;
}

static Dict *lookup(char *s)
{
    Dict *np;

    for (np = taghash[hash(s)]; np != NULL; np = np->next)
        if (wstrcmp(s, np->name) == 0)
            return np;
    return NULL;
}

static Dict *install(char *name, unsigned int versions, unsigned int model, 
                     Parser *parser, CheckAttribs *chkattrs)
{
    Dict *np;
    unsigned hashval;

    if ((np = lookup(name)) == NULL)
    {
        np = (Dict *)MemAlloc(sizeof(*np));

        if (np == NULL || (np->name = wstrdup(name)) == NULL)
            return NULL;

        hashval = hash(name);
        np->next = taghash[hashval];
        np->model = 0;
        taghash[hashval] = np;
    }

    np->versions = versions;
    np->model |= model;
    np->parser = parser;
    np->chkattrs = chkattrs;
    return np;
}

/* public interface for finding tag by name */
Bool FindTag(Lexer *lexer, Node *node)
{
    Dict *np;

    if (lexer->config->XmlTags)
    {
        node->tag = xml_tags;
        return yes;
    }

    if (node->element && (np = lookup(node->element)))
    {
        node->tag = np;
        return yes;
    }

    return no;
}

Parser *FindParser(Node *node)
{
        Dict *np;

        if (node->element && (np = lookup(node->element)))
            return np->parser;

        return NULL;
}

void DefineEmptyTag(char *name)
{
    install(name, VERS_PROPRIETARY, (CM_EMPTY|CM_NO_INDENT|CM_NEW), ParseBlock, NULL);
}

void DefineInlineTag(char *name)
{
    install(name, VERS_PROPRIETARY, (CM_INLINE|CM_NO_INDENT|CM_NEW), ParseBlock, NULL);
}

void DefineBlockTag(char *name)
{
    install(name, VERS_PROPRIETARY, (CM_BLOCK|CM_NO_INDENT|CM_NEW), ParseBlock, NULL);
}

void DefinePreTag(char *name)
{
    install(name, VERS_PROPRIETARY, (CM_BLOCK|CM_NO_INDENT|CM_NEW), ParsePre, NULL);
}

void InitTags(void)
{
    struct tag *tp;
    
    for(tp = tags; tp->name != NULL; ++tp)
        install(tp->name, tp->versions, tp->model, tp->parser, tp->chkattrs);

    tag_html = lookup("html");
    tag_head = lookup("head");
    tag_body = lookup("body");
    tag_frameset = lookup("frameset");
    tag_frame = lookup("frame");
    tag_noframes = lookup("noframes");
    tag_meta = lookup("meta");
    tag_title = lookup("title");
    tag_base = lookup("base");
    tag_hr = lookup("hr");
    tag_pre = lookup("pre");
    tag_listing = lookup("listing");
    tag_h1 = lookup("h1");
    tag_h2 = lookup("h2");
    tag_p  = lookup("p");
    tag_ul = lookup("ul");
    tag_ol = lookup("ol");
    tag_dir = lookup("dir");
    tag_li = lookup("li");
    tag_dl = lookup("dl");
    tag_dt = lookup("dt");
    tag_dd = lookup("dd");
    tag_td = lookup("td");
    tag_th = lookup("th");
    tag_tr = lookup("tr");
    tag_col = lookup("col");
    tag_br = lookup("br");
    tag_a = lookup("a");
    tag_link = lookup("link");
    tag_b = lookup("b");
    tag_i = lookup("i");
    tag_strong = lookup("strong");
    tag_em = lookup("em");
    tag_big = lookup("big");
    tag_small = lookup("small");
    tag_param = lookup("param");
    tag_option = lookup("option");
    tag_optgroup = lookup("optgroup");
    tag_img = lookup("img");
    tag_map = lookup("map");
    tag_area = lookup("area");
    tag_nobr = lookup("nobr");
    tag_wbr = lookup("wbr");
    tag_font = lookup("font");
    tag_spacer = lookup("spacer");
    tag_layer = lookup("layer");
    tag_center = lookup("center");
    tag_style = lookup("style");
    tag_script = lookup("script");
    tag_noscript = lookup("noscript");
    tag_table = lookup("table");
    tag_caption = lookup("caption");
    tag_form = lookup("form");
    tag_textarea = lookup("textarea");
    tag_blockquote = lookup("blockquote");
    tag_applet = lookup("applet");
    tag_object = lookup("object");
    tag_div = lookup("div");
    tag_span = lookup("span");

    /* create dummy entry for all xml tags */
    xml_tags = (Dict *)MemAlloc(sizeof(*xml_tags));
    xml_tags->name = NULL;
    xml_tags->versions = VERS_ALL;
    xml_tags->model = CM_BLOCK;
    xml_tags->parser = NULL;
    xml_tags->chkattrs = NULL;
}

void FreeTags(void)
{
    Dict *prev, *next;
    int i;

    MemFree(xml_tags);

    for (i = 0; i < HASHSIZE; ++i)
    {
        prev = NULL;
        next = taghash[i];

        while(next)
        {
            prev = next->next;
            MemFree(next->name);
            MemFree(next);
            next = prev;
        }

        taghash[i] = NULL;
    }
}
