/* retawq/parser.c - content parsing (HTML, ...)
   This file is part of retawq (<http://retawq.sourceforge.net/>), a network
   client created by Arne Thomassen; retawq is basically released under certain
   versions of the GNU General Public License and WITHOUT ANY WARRANTY.
   Read the file COPYING for license details, README for program information.
   Copyright (C) 2001-2005 Arne Thomassen <arne@arne-thomassen.de>
*/

#include "stuff.h"
#include "parser.h"

declare_local_i18n_buffer
#if CONFIG_DEBUG
static const_after_init int fd_parsertest;
static char debugstrbuf[STRBUF_SIZE];
#define prsdbg(msg) my_write_str(fd_parsertest, msg)
#endif

static const char strCommentTag[] = "!--", strSingleQuote[] = "'",
  strPipe[] = "|", strHref[] = "href", strType[] = "type",
  strName[] = "name", strValue[] = "value", strDisabled[] = "disabled",
  strTitle[] = "title", strStyle[] = "style", strReadonly[] = "readonly",
  strAlt[] = "alt", strLabel[] = "label", strSize[] = "size",
  strCenter[] = "center", strP[] = "p", strPi[] = "pi", strOtimes[] = "otimes";
#define strTimes (strOtimes + 1)

static const char strAcute[] = "acute", strCedil[] = "cedil",
  strCirc[] = "circ", strTilde[] = "tilde", strUml[] = "uml", strSup[] = "sup";

my_enum1 enum
{ htfNone = 0, htfRequireEndtag = 0x01, htfAllowEndtag = 0x02,
  htfForbidEndtag = 0x04, htfForbidPre = 0x08, htfBlock = 0x10,
  htfPar = 0x20, htfSoakUpText = 0x40
} my_enum2(unsigned char) tHtmlTagFlags;

/* begin-autogenerated */
#define TAGOFFSET (1)
#define NUM_TAGDATA (66)
static const struct
{ const char* name; /* (sorted in alphabetical order) */
  tHtmlTagFlags flags;
} tagdata[NUM_TAGDATA] =
{ { strA, htfRequireEndtag },
  { "address", htfRequireEndtag | htfBlock | htfPar },
  { "area", htfForbidEndtag },
  { "b", htfRequireEndtag },
  { "big", htfRequireEndtag },
  { "blockquote", htfRequireEndtag | htfBlock | htfPar },
  { "body", htfAllowEndtag },
  { "br", htfForbidEndtag },
  { strButton, htfRequireEndtag },
  { "caption", htfRequireEndtag | htfPar },
  { strCenter, htfRequireEndtag | htfBlock | htfPar }, /* deprecated */
  { "cite", htfRequireEndtag },
  { "dd", htfAllowEndtag | htfBlock },
  { "del", htfRequireEndtag },
  { "dfn", htfRequireEndtag },
  { "dir", htfRequireEndtag | htfBlock | htfPar }, /* deprecated */
  { "div", htfRequireEndtag | htfBlock | htfPar },
  { "dl", htfRequireEndtag | htfBlock | htfPar },
  { "dt", htfAllowEndtag | htfBlock },
  { "em", htfRequireEndtag },
  { "fieldset", htfRequireEndtag | htfBlock | htfPar },
  { "font", htfRequireEndtag }, /* deprecated */
  { "form", htfRequireEndtag | htfBlock | htfPar },
  { "frame", htfForbidEndtag | htfBlock },
  { "frameset", htfRequireEndtag | htfBlock | htfPar },
  { "h1", htfRequireEndtag | htfBlock | htfPar },
  { "h2", htfRequireEndtag | htfBlock | htfPar },
  { "h3", htfRequireEndtag | htfBlock | htfPar },
  { "h4", htfRequireEndtag | htfBlock | htfPar },
  { "h5", htfRequireEndtag | htfBlock | htfPar },
  { "h6", htfRequireEndtag | htfBlock | htfPar },
  { "head", htfAllowEndtag | htfForbidPre },
  { "hr", htfForbidEndtag | htfBlock },
  { strHtml, htfAllowEndtag },
  { "i", htfRequireEndtag },
  { "iframe", htfForbidEndtag | htfBlock },
  { "img", htfForbidEndtag },
  { "input", htfForbidEndtag },
  { "li", htfAllowEndtag | htfBlock },
  { "menu", htfRequireEndtag | htfBlock | htfPar }, /* deprecated */
  { "meta", htfForbidEndtag },
  { "noframes", htfRequireEndtag | htfBlock | htfPar },
  { "noscript", htfRequireEndtag | htfBlock | htfPar },
  { "object", htfRequireEndtag },
  { "ol", htfRequireEndtag | htfBlock | htfPar },
  { "optgroup", htfRequireEndtag },
  { "option", htfAllowEndtag | htfSoakUpText | htfForbidPre },
  { strP, htfAllowEndtag | htfBlock | htfPar },
  { "pre", htfRequireEndtag | htfBlock | htfPar },
  { "q", htfRequireEndtag }, /* stolen from J.B. & the next generation :-) */
  { "s", htfRequireEndtag }, /* deprecated */
  { "script", htfRequireEndtag | htfSoakUpText },
  { strSelect, htfRequireEndtag },
  { "strike", htfRequireEndtag }, /* deprecated */
  { "strong", htfRequireEndtag },
  { strStyle, htfRequireEndtag | htfSoakUpText },
  { "sub", htfRequireEndtag },
  { strSup, htfRequireEndtag },
  { "table", htfRequireEndtag | htfBlock | htfPar },
  { "td", htfAllowEndtag },
  { "textarea", htfRequireEndtag | htfSoakUpText },
  { "th", htfAllowEndtag },
  { strTitle, htfRequireEndtag | htfSoakUpText | htfForbidPre },
  { "tr", htfAllowEndtag | htfPar },
  { "u", htfRequireEndtag }, /* deprecated */
  { "ul", htfRequireEndtag | htfBlock | htfPar }
};

#if OPTION_CED > 0
#define NUM_CHENT (141)
#define CED01(ced0, ced1) ced1
#define DECLCED1(name, ced1) { name, ced1 },
#define CODE01(index0, index1) index1
#else
#define NUM_CHENT (89)
#define CED01(ced0, ced1) ced0
#define DECLCED1(name, ced1)
#define CODE01(index0, index1) index0
static const char chentcode_rerouter[] = "AaCcEeIiNnOoUuYy";
#endif

#define MAXLEN_CHENT (6)
static
#if OPTION_CED < 2
  const
#else
  const_after_init
#endif
  struct
{ const char* name; /* (sorted in strcmp() order) */
  const char* result;
} chent[NUM_CHENT] =
{ DECLCED1("AElig", "") /*0*/
  DECLCED1("Aacute", "")
  DECLCED1("Acirc", "")
  DECLCED1("Agrave", "")
  DECLCED1("Aring", "")
  DECLCED1("Atilde", "") /*5*/
  { "Auml", CED01("Ae", "") },
  DECLCED1("Ccedil", "")
  { "Dagger", strHm },
  { "ETH", CED01("DH", "") },
  DECLCED1("Eacute", "") /*10*/
  DECLCED1("Ecirc", "")
  DECLCED1("Egrave", "")
  DECLCED1("Euml", "")
  DECLCED1("Iacute", "")
  DECLCED1("Icirc", "") /*15*/
  DECLCED1("Igrave", "")
  DECLCED1("Iuml", "")
  DECLCED1("Ntilde", "")
  DECLCED1("Oacute", "")
  DECLCED1("Ocirc", "") /*20*/
  DECLCED1("Ograve", "")
  DECLCED1("Oslash", "")
  DECLCED1("Otilde", "")
  { "Ouml", CED01("Oe", "") },
  { "Prime", "''" }, /*25*/
  { "THORN", CED01("P", "") },
  DECLCED1("Uacute", "")
  DECLCED1("Ucirc", "")
  DECLCED1("Ugrave", "")
  { "Uuml", CED01("Ue", "") }, /*30*/
  DECLCED1("Yacute", "")
  DECLCED1("aacute", "")
  DECLCED1("acirc", "")
  { strAcute, CED01(strSingleQuote, "") },
  DECLCED1("aelig", "") /*35*/
  DECLCED1("agrave", "")
  { "amp", "&" },
  { "apos", strSingleQuote },
  DECLCED1("aring", "")
  DECLCED1("atilde", "") /*40*/
  { "auml", CED01("ae", "") },
  { "bdquo", strDoubleQuote },
  DECLCED1("beta", "")
  { "boxv", strPipe },
  { "brkbar", strPipe }, /*45*/
  { "brvbar", strPipe },
  { "bull", "o" },
  DECLCED1("ccedil", "")
  { strCedil, CED01(",", "") },
  { "cent", CED01("-c-", "") }, /*50*/
  { strCirc, "^" },
  { "copy", CED01("(C)", "") },
  { "curren", CED01("CUR", "") },
  { "darr", "v" },
  { "deg", CED01("DEG", "") }, /*55*/
  { "divide", CED01("/", "") },
  DECLCED1("eacute", "")
  DECLCED1("ecirc", "")
  DECLCED1("egrave", "")
  { "eth", CED01("dh", "") }, /*60*/
  DECLCED1("euml", "")
  { "euro", "EUR" }, /* inconsequent, but ISO */
  { "frac12", CED01(" 1/2", "") },
  { "frac14", CED01(" 1/4", "") },
  { "frac34", CED01(" 3/4", "") }, /*65*/
  { "frasl", "/" },
  { "ge", ">=" },
  { "gt", strGt },
  { "hArr", "<=>" },
  { "harr", "<->" }, /*70*/
  DECLCED1("iacute", "")
  DECLCED1("icirc", "")
  { "iexcl", CED01("!", "") },
  DECLCED1("igrave", "")
  { "iquest", CED01(strQm, "") }, /*75*/
  DECLCED1("iuml", "")
  { "lArr", "<=" },
  { "lang", strLt },
  { "laquo", strDoubleQuote },
  { "larr", "<-" }, /*80*/
  { "ldquo", strDoubleQuote },
  { "le", "<=" },
  { "lsaquo", strSingleQuote },
  { "lsquo", strSingleQuote },
  { "lt", strLt }, /*85*/
  { "mdash", strMinus },
  { "middot", CED01(".", "") },
  { "minus", strMinus },
  { "nbsp", strSpace }, /* FIXME! */
  { "ndash", strMinus }, /*90*/
  { "ne", "!=" }, /* CHECKME! */
  { "not", CED01("NOT", "") }, /* CHECKME! */
  DECLCED1("ntilde", "")
  DECLCED1("oacute", "")
  DECLCED1("ocirc", "") /*95*/
  DECLCED1("ograve", "")
  { "oplus", "+" },
  { "ordf", CED01("-a", "") },
  { "ordm", CED01("-o", "") },
  DECLCED1("oslash", "") /*100*/
  DECLCED1("otilde", "")
  { strOtimes, strAsterisk }, /* CHECKME! */
  { "ouml", CED01("oe", "") },
  { "para", CED01("par.", "") }, /* CHECKME! */
  { "permil", "o/oo" }, /*105*/
  { strPi, strPi },
  { "plusmn", CED01("+/-", "") },
  { "pound", CED01("-L-", "") },
  { "prime", strSingleQuote },
  { "quot", strDoubleQuote }, /*110*/
  { "rArr", "=>" },
  { "rang", strGt },
  { "raquo", strDoubleQuote },
  { "rarr", "->" },
  { "rdquo", strDoubleQuote }, /*115*/
  { "reg", CED01("(R)", "") },
  { "rsaquo", strSingleQuote },
  { "rsquo", strSingleQuote },
  { "sbquo", strSingleQuote },
  { "sdot", CED01(strAsterisk, "") }, /*120*/ /* CHECKME! */
  { "sect", CED01("sect.", "") }, /* CHECKME! */
  { "shy", strEmpty },
  { "sim", "~" },
  { "sup1", CED01("^1", "") },
  { "sup2", CED01("^2", "") }, /*125*/
  { "sup3", CED01("^3", "") },
  { "szlig", CED01("ss", "") },
  { "thorn", CED01(strP, "") },
  { strTilde, "~" },
  { strTimes, CED01(strAsterisk, "") }, /*130*/
  { "trade", "(tm)" },
  DECLCED1("uacute", "")
  { "uarr", "^" },
  DECLCED1("ucirc", "")
  DECLCED1("ugrave", "") /*135*/
  { strUml, CED01(strSpace, "") },
  { "uuml", CED01("ue", "") },
  DECLCED1("yacute", "")
  { "yen", CED01("-Y-", "") },
  DECLCED1("yuml", "")
};

typedef unsigned short tEntityCode;
#define NUM_CHENTCODE (139)
static const struct
{ tEntityCode code; /* (sorted in numerical order) */
  signed short index; /* chent[] or chentcode_rerouter[] index */
} chentcode[NUM_CHENTCODE] =
{ { 34, CODE01(63, 110) },
  { 38, CODE01(8, 37) },
  { 39, CODE01(9, 38) },
  { 60, CODE01(44, 85) },
  { 62, CODE01(31, 68) },
  { 160, CODE01(48, 89) },
  { 161, CODE01(34, 73) },
  { 162, CODE01(17, 50) },
  { 163, CODE01(61, 108) },
  { 164, CODE01(20, 53) },
  { 165, CODE01(88, 139) },
  { 166, CODE01(14, 46) },
  { 167, CODE01(74, 121) },
  { 168, CODE01(86, 136) },
  { 169, CODE01(19, 52) },
  { 170, CODE01(53, 98) },
  { 171, CODE01(38, 79) },
  { 172, CODE01(51, 92) },
  { 173, CODE01(75, 122) },
  { 174, CODE01(69, 116) },
  { 176, CODE01(22, 55) },
  { 177, CODE01(60, 107) },
  { 178, CODE01(78, 125) },
  { 179, CODE01(79, 126) },
  { 180, CODE01(7, 34) },
  { 182, CODE01(57, 104) },
  { 183, CODE01(46, 87) },
  { 184, CODE01(16, 49) },
  { 185, CODE01(77, 124) },
  { 186, CODE01(54, 99) },
  { 187, CODE01(66, 113) },
  { 188, CODE01(27, 64) },
  { 189, CODE01(26, 63) },
  { 190, CODE01(28, 65) },
  { 191, CODE01(35, 75) },
  { 192, CODE01(-1, 3) },
  { 193, CODE01(-1, 1) },
  { 194, CODE01(-1, 2) },
  { 195, CODE01(-1, 5) },
  { 196, CODE01(0, 6) },
  { 197, CODE01(-1, 4) },
  { 198, CODE01(-1, 0) },
  { 199, CODE01(-3, 7) },
  { 200, CODE01(-5, 12) },
  { 201, CODE01(-5, 10) },
  { 202, CODE01(-5, 11) },
  { 203, CODE01(-5, 13) },
  { 204, CODE01(-7, 16) },
  { 205, CODE01(-7, 14) },
  { 206, CODE01(-7, 15) },
  { 207, CODE01(-7, 17) },
  { 208, CODE01(2, 9) },
  { 209, CODE01(-9, 18) },
  { 210, CODE01(-11, 21) },
  { 211, CODE01(-11, 19) },
  { 212, CODE01(-11, 20) },
  { 213, CODE01(-11, 23) },
  { 214, CODE01(3, 24) },
  { 215, CODE01(83, 130) },
  { 216, CODE01(-11, 22) },
  { 217, CODE01(-13, 29) },
  { 218, CODE01(-13, 27) },
  { 219, CODE01(-13, 28) },
  { 220, CODE01(6, 30) },
  { 221, CODE01(-15, 31) },
  { 222, CODE01(5, 26) },
  { 223, CODE01(80, 127) },
  { 224, CODE01(-2, 36) },
  { 225, CODE01(-2, 32) },
  { 226, CODE01(-2, 33) },
  { 227, CODE01(-2, 40) },
  { 228, CODE01(10, 41) },
  { 229, CODE01(-2, 39) },
  { 230, CODE01(-2, 35) },
  { 231, CODE01(-4, 48) },
  { 232, CODE01(-6, 59) },
  { 233, CODE01(-6, 57) },
  { 234, CODE01(-6, 58) },
  { 235, CODE01(-6, 61) },
  { 236, CODE01(-8, 74) },
  { 237, CODE01(-8, 71) },
  { 238, CODE01(-8, 72) },
  { 239, CODE01(-8, 76) },
  { 240, CODE01(24, 60) },
  { 241, CODE01(-10, 93) },
  { 242, CODE01(-12, 96) },
  { 243, CODE01(-12, 94) },
  { 244, CODE01(-12, 95) },
  { 245, CODE01(-12, 101) },
  { 246, CODE01(56, 103) },
  { 247, CODE01(23, 56) },
  { 248, CODE01(-12, 100) },
  { 249, CODE01(-14, 135) },
  { 250, CODE01(-14, 132) },
  { 251, CODE01(-14, 134) },
  { 252, CODE01(87, 137) },
  { 253, CODE01(-16, 138) },
  { 254, CODE01(81, 128) },
  { 255, CODE01(-16, 140) },
  { 710, CODE01(18, 51) },
  { 732, CODE01(82, 129) },
  { 960, CODE01(59, 106) },
  { 8211, CODE01(49, 90) },
  { 8212, CODE01(45, 86) },
  { 8216, CODE01(43, 84) },
  { 8217, CODE01(71, 118) },
  { 8218, CODE01(72, 119) },
  { 8220, CODE01(40, 81) },
  { 8221, CODE01(68, 115) },
  { 8222, CODE01(11, 42) },
  { 8225, CODE01(1, 8) },
  { 8226, CODE01(15, 47) },
  { 8240, CODE01(58, 105) },
  { 8242, CODE01(62, 109) },
  { 8243, CODE01(4, 25) },
  { 8249, CODE01(42, 83) },
  { 8250, CODE01(70, 117) },
  { 8260, CODE01(29, 66) },
  { 8364, CODE01(25, 62) },
  { 8482, CODE01(84, 131) },
  { 8592, CODE01(39, 80) },
  { 8593, CODE01(85, 133) },
  { 8594, CODE01(67, 114) },
  { 8595, CODE01(21, 54) },
  { 8596, CODE01(33, 70) },
  { 8656, CODE01(36, 77) },
  { 8658, CODE01(64, 111) },
  { 8660, CODE01(32, 69) },
  { 8722, CODE01(47, 88) },
  { 8764, CODE01(76, 123) },
  { 8800, CODE01(50, 91) },
  { 8804, CODE01(41, 82) },
  { 8805, CODE01(30, 67) },
  { 8853, CODE01(52, 97) },
  { 8855, CODE01(55, 102) },
  { 8901, CODE01(73, 120) },
  { 9001, CODE01(37, 78) },
  { 9002, CODE01(65, 112) },
  { 9474, CODE01(12, 44) }
};

/* character entity suffix handling */
enum { csiMax=13, csiDash=9, csiSpace=10 }; /* "csi": chentsuffix[] index */
static const char* const chentsuffix[csiMax + 1] =
{ strAcute, strCedil, strCirc, "grave", "lig", "ring", "slash", strTilde,
  strUml, "dash", "sp", "caron", "comma", "cy"
};
static const unsigned char chentsuffixlen[csiMax + 1] =
{ 5, 5, 4, 5, 3, 4, 5, 5, 3, 4, 2, 5, 5, 2 };

static const struct
{ const char* str; /* (sorted in alphabetical order) */
  tAttributeName an;
} attrdata[] =
{ { "action", anAction },
  { "align", anAlign },
  { strAlt, anAlt },
  { "checked", anChecked },
  { "class", anClass },
  { "color", anColor },
  { "content", anContent },
  { "declare", anDeclare },
  { strDisabled, anDisabled },
  { "enctype", anEnctype },
  { "face", anFace },
  { strHref, anHref },
  { "http-equiv", anHttpEquiv },
  { "id", anId },
  { strLabel, anLabel },
  { "language", anLanguage },
  { "maxlength", anMaxlength },
  { "media", anMedia },
  { "method", anMethod },
  { "multiple", anMultiple },
  { strName, anName },
  { strReadonly, anReadonly },
  { "selected", anSelected },
  { strSize, anSize },
  { "src", anSrc },
  { strStyle, anStyle },
  { strTitle, anTitle },
  { strType, anType },
  { strValue, anValue },
  { "width", anWidth }
};

/* which HTML attributes need value conversion (bitfield) */
static const unsigned char attrvalueconv[5] = { 106, 12, 145, 251, 1 };
/* end-autogenerated */

enum
{ hpsText = 0, hpsTag = 1, hpsAttrName = 2, hpsEquals = 3, hpsAttrValue = 4,
  hpsDone = 5, hpsComment1 = 6, hpsComment2 = 7, hpsComment3 = 8
};
typedef unsigned char tHtmlParserState;
#define MAX_HPS (8)

#if CONFIG_DEBUG
static const char* const hps_name[MAX_HPS + 1] =
{ strText, "tag", "attrname", "=", "attrvalue", "done", "comment1", "comment2",
  "comment3"
};
#endif

static tHtmlParserState state;
static tAttributeName current_attr_name;
static tHtmlTagKind current_tagkind;
static char* current_unknown_tagname;
static tAttribute* current_attributes;

static unsigned int bufsize, maxbufsize;

static tBoolean is_endtag, tagblock_ends;
static unsigned char attrvalue_quotes; /* 0=none, 1=single-, 2=double-quotes */

static tCantent* current_cantent;
static tBoolean is_current_node_valid, is_parsing_done, inside_select;
static tHtmlNode *current_node, *current_node_in_tree, *previous_node_in_tree,
  *delayed_node, *select_node;
static const char* dataptr;
static char* buf;
static tContentblock *current_block, *lhpp_content;
static size_t current_block_sizeleft, lhpp_byte;
static tActiveElementBase curraebase, select_aebase, *aebase;
static tActiveElementNumber aenum, aemax;

#define IS_WHITESPACE(ch) \
  ( ((ch) == ' ') || ((ch) == '\t') || ((ch) == '\n') || ((ch) == '\r') )

static void buf_append(const char ch)
{ if (maxbufsize <= bufsize)
  { maxbufsize += 1024;
    buf = memory_reallocate(buf, maxbufsize, mapString);
  }
  buf[bufsize++] = ch;
}

static tHtmlInputLength attr2htmlinputlength(const tAttribute* attr,
  tHtmlInputLength _default)
{ tHtmlInputLength retval = _default;
  if (attr != NULL)
  { const char* value = attr->value;
    if (value != NULL)
    { int l;
      my_atoi(value, &l, NULL, MAX_HTML_INPUT_LENGTH + 1);
      if (l > MAX_HTML_INPUT_LENGTH) l = MAX_HTML_INPUT_LENGTH;
      else if (l < 1) l = 1;
      retval = (tHtmlInputLength) l;
    }
  }
  return(retval);
}

static one_caller tMbsIndex do_lookup_tagkind(void)
{ my_binary_search(0, NUM_TAGDATA - 1, strcmp(buf, tagdata[idx].name),
    return(idx))
}

static one_caller tHtmlTagKind lookup_tagkind(void)
/* transforms a tag name string (in <buf>) to the corresponding tag kind
   number */
{ tMbsIndex idx = do_lookup_tagkind();
  if (idx >= 0) return(((tHtmlTagKind) idx) + TAGOFFSET);
  else return(htkInvalid);
}

enum { ceisCopy = 0, ceisKind = 1, ceisNumkind = 2, ceisInside = 3 };
typedef unsigned char tCharacterEntityInterpreterState; /* (-: */

enum { ekString = 0, ekDecNumber = 1, ekHexNumber = 2 };
typedef unsigned char tEntityKind;

static one_caller tMbsIndex do_lookup_entity_string(const char* str,size_t len)
{ my_binary_search(0, NUM_CHENT - 1, strncmp(str, chent[idx].name, len),
    return(idx))
}

static one_caller tMbsIndex lookup_entity_string(const char* str, size_t len)
{ /* Try to find a "candidate": */
  tMbsIndex retval = do_lookup_entity_string(str, len);
  /* Check whether the candidate is okay: */
  if ( (retval >= 0) && (strlen(chent[retval].name) != len) )
    retval = INVALID_INDEX;
  return(retval);
}

#define cec(code1, code2) my_numcmp(code1, code2) /* compare entity codes */

static one_caller tMbsIndex lookup_entity_code(tEntityCode code)
{ my_binary_search(0, NUM_CHENTCODE - 1, cec(code, chentcode[idx].code),
    return(idx))
}

static one_caller tBoolean guessed_entity(const char* str, size_t len,
  char** _dest)
/* tries to "guess" the meaning of an unknown character entity by looking at
   its prefix or suffix, tries to make the best out of that, and returns
   whether all that worked */
{ if (len < 4) goto out; /* can't do anything here */
  if ( (!strncmp(str, strSup, 3)) && /* entity has prefix "sup" */
       (! ( (len == 4) && (str[3] == 'e') ) ) ) /* and isn't "&supe;" */
  { char* dest;
    str += 3; len -= 3;
    copy:
    dest = *_dest;
    while (len-- > 0) *dest++ = *str++;
    *_dest = dest;
    return(truE);
  }
  else /* check for suffices */
  { const char* end = str + len;
    unsigned short idx;
    for (idx = 0; idx <= csiMax; idx++)
    { size_t sufflen = (size_t) chentsuffixlen[idx];
      if (len > sufflen)
      { const char* suff = chentsuffix[idx];
        if (!strncmp(end - sufflen, suff, sufflen))
        { if (idx == csiDash) { str = strMinus; len = 1; }
          else if (idx == csiSpace) { str = strSpace; len = 1; }
          else len -= sufflen;
          goto copy;
        }
      }
    }
  }
  out:
  return(falsE); /* didn't find anything */
}

static one_caller tBoolean shall_interpret_chents_in_attrvalue(void)
{ tBoolean retval = cond2boolean( (current_attr_name < NUM_ATTRNAMES) &&
    (my_bit_test(attrvalueconv, current_attr_name)) );
  if (retval)
  { /* Have to handle some special cases. Let's thank the htmlspec writers for
       this bogosity... */
    if ( ( (current_tagkind == htkMeta) && (current_attr_name == anName) )
#if 0
      /* These aren't yet implemented. */
      || ( (current_tagkind == htkLi) && (current_attr_name == anValue) )
      || ( (current_tagkind == htkSelect) && (current_attr_name == anSize) )
#endif
       )
      retval = falsE;
  }
  return(retval);
}

static one_caller void interpret_character_entities(char* origdest,
  const char* src, tBoolean may_trim)
{ const char *start SHUT_UP_COMPILER(NULL), *start0 SHUT_UP_COMPILER(NULL);
  char* dest = origdest;
  tCharacterEntityInterpreterState ceis = ceisCopy;
  tEntityKind kind SHUT_UP_COMPILER(ekString);
  unsigned char lenleft SHUT_UP_COMPILER(0);
  if (may_trim) { while (IS_WHITESPACE(*src)) src++; }
  while (1)
  { char ch = *src;
    unsigned int _code;
    switch (ceis)
    {case ceisCopy: /* the most likely case */
      if (ch == '&') { start0 = src; ceis = ceisKind; }
      else { *dest++ = ch; if (ch == '\0') goto out; }
      break;
     case ceisKind: /* find out the "kind" of the entity */
      if (ch == '#') ceis = ceisNumkind; /* it's some numeric kind */
      else /* it's a string */
      { kind = ekString; start = src; lenleft = MAXLEN_CHENT + 1;
        ceis = ceisInside; goto inside;
      }
      break;
     case ceisNumkind: /* find out whether it's decimal or hex */
      lenleft = 4 + 1; ceis = ceisInside;
      if ( (ch == 'x') || (ch == 'X') ) { kind = ekHexNumber; start = src + 1;}
      else { kind = ekDecNumber; start = src; goto inside; }
      break;
     case ceisInside: /* "inside" the entity */
      inside:
      lenleft--;
      if ( (ch == '\0') || (ch == ';') || (ch == ' ') || (ch == '&') ||
           (!lenleft) )
      { /* found an end-point */
        if (src <= start + 1) /* can't have found anything useful - CHECKME! */
        { postcopy:
          while (start0 <= src) *dest++ = *start0++;
          if (ch == '\0') goto out;
        }
        else if (kind == ekString)
        { size_t len = src - start;
          tMbsIndex idx = lookup_entity_string(start, len);
          if (idx >= 0) /* found in list */
          { const char* temp = chent[idx].result;
            while (*temp) *dest++ = *temp++;
          }
          else if (!guessed_entity(start, len, &dest))
            goto postcopy; /* no idea */
        }
        else if (kind == ekDecNumber)
        { const char* temp = start;
          tMbsIndex idx;
          _code = 0;
          while (temp < src)
          { char c = *temp++;
            if (my_isdigit(c)) _code = 10 * _code + (c - '0');
            else goto postcopy; /* not a decimal number */
          }
          handle_code:
          if (_code > chentcode[NUM_CHENTCODE - 1].code) goto postcopy;
          idx = lookup_entity_code((tEntityCode) _code);
          if (idx < 0)
          { if ( (_code >= 32) &&
#if OPTION_CED == 0
                 (_code < 127)
#else
                 (_code <= 255) && (_code != 127)
#endif
              )
            { *dest++ = (char) _code; } /* interpreted as ASCII code */
            else goto postcopy;
          }
          else
          { const signed short i = chentcode[idx].index;
#if OPTION_CED == 0
            if (i < 0) *dest++ = chentcode_rerouter[-1 - i];
            else
#endif
            { temp = chent[i].result;
              while (*temp) *dest++ = *temp++;
            }
          }
        }
        else if (kind == ekHexNumber)
        { const char* temp = start;
          _code = 0;
          while (temp < src)
          { char c = *temp++;
            unsigned int add;
            if (my_isdigit(c))
            { add = c - '0';
              calc:
              _code = 16 * _code + add;
            }
            else if ( (c >= 'a') && (c <= 'f') )
            { add = c - 'a' + 10; goto calc; }
            else if ( (c >= 'A') && (c <= 'F') )
            { add = c - 'A' + 10; goto calc; }
            else goto postcopy; /* not a hexadecimal number */
          }
          goto handle_code;
        }
        ceis = ceisCopy;
      }
      break;
    }
    if (ch == '\0') { *dest = '\0'; goto out; }
    else src++;
  }
  out:
  if (may_trim) /* remove trailing whitespace */
  { dest = origdest + strlen(origdest) - 1; /* IMPROVEME? */
    while (dest >= origdest)
    { const char c = *dest;
      if (!IS_WHITESPACE(c)) break;
      *dest-- = '\0';
    }
  }
}

static tAttribute* find_and_detach_attribute(tAttribute** list,
  tAttributeName name)
/* searches and extracts an attribute of the given <name> from the <list> */
{ tAttribute *a = *list, *b;
  if (a == NULL) return(NULL);
  if (a->name == name) { *list = a->next; a->next = NULL; return(a); }
  while ( (b = a->next) != NULL )
  { if (b->name == name) { a->next = b->next; b->next = NULL; return(b); }
    a = b;
  }
  return(NULL);
}

static __my_inline tAttribute* _find_and_detach_attribute(void** list,
  tAttributeName name)
{ return(find_and_detach_attribute((tAttribute**) list, name));
    /* nasty casting rubbish */
}

#define __fada(list, name) find_and_detach_attribute(&list, name)
#define fada(name) __fada(current_attributes, name)

#if CONFIG_JAVASCRIPT
static one_caller tAttribute* fada_js(void** __list)
/* like find_and_detach_attribute(), but for Javascript-related attributes */
{ tAttribute **_list = (tAttribute**) __list; /* nasty casting rubbish */
  tAttribute *retval, *list = *_list;
  if (list == NULL) retval = NULL;
  else if (is_an_for_javascript(list->name))
  { retval = list; *_list = retval->next; retval->next = NULL; }
  else
  { tAttribute *a = list, *next;
    while ( (next = a->next) != NULL )
    { if (is_an_for_javascript(next->name))
      { a->next = next->next; retval = next; retval->next = NULL; goto out; }
      a = next;
    }
    retval = NULL;
  }
  out:
  return(retval);
}
#endif

/* prepare curraebase */
#define set_caeb(_kind) \
  do { my_memclr_var(curraebase); curraebase.kind = _kind; } while (0)

/* move attribute value */
#define __moav(dest, attr) \
  do { curraebase.dest = attr->value; attr->value = NULL; } while (0)
#define moavd(attr) __moav(data, attr)
#define moavr(attr) __moav(render, attr)

/* deallocate an attribute and all associated data */
#define __deattr(a) do { __dealloc(a->value); memory_deallocate(a); } while (0)
#define deattr(a) do { if (a != NULL) __deattr(a); } while (0)

#define NUM_INPUT_TYPE (10)
static const struct
{ const char* name; /* (sorted in alphabetical order) */
  tActiveElementKind kind;
} input_type[NUM_INPUT_TYPE] =
{ { strButton, aekFormButton },
  { strCheckbox, aekFormCheckbox },
  { strFile, aekFormFile },
  { "hidden", aekFormHidden },
  { strImage, aekFormImage },
  { "password", aekFormPassword },
  { "radio", aekFormRadio },
  { strReset, aekFormReset },
  { strSubmit, aekFormSubmit },
  { strText, aekFormText }
};

static one_caller tMbsIndex do_lookup_input_type(const char* str)
{ my_binary_search(0, NUM_INPUT_TYPE - 1, streqcase3(str,
    input_type[idx].name), return(idx))
}

static one_caller tActiveElementKind lookup_input_type(const tAttribute* attr)
{ tActiveElementKind retval = aekFormText; /* htmlspec default */
  if (attr != NULL)
  { const char* av = attr->value;
    if ( (av != NULL) && (*av != '\0') ) /* non-empty attribute value */
    { tMbsIndex idx = do_lookup_input_type(av);
      if (idx >= 0) retval = input_type[idx].kind;
      else retval = aekUnknown;
    }
  }
  return(retval);
}

#if CONFIG_JAVASCRIPT
static one_caller tMbsIndex lookup_javascript_event(void)
{ const char* str;
  size_t len = bufsize - 1;
  if ( (len < JAVASCRIPT_MIN_EVENT_NAME_LENGTH) ||
       (my_tolower(buf[0]) != 'o') || (my_tolower(buf[1]) != 'n') )
    return(INVALID_INDEX);
  str = buf + 2;
  my_binary_search(0, JAVASCRIPT_MAX_EVENT_CODE, strcmp(str, strJek[idx]),
    return(idx))
}
#endif

static one_caller tBoolean use_curraebase(void)
/* checks whether an active-element base should be created for the current node
   and prepares that if so */
{ tBoolean retval = falsE; /* the most likely result */
  switch (current_tagkind)
  { case htkA: case htkArea:
    { tAttribute* h = fada(anHref);
      if ( (h != NULL) && (h->value != NULL) && (h->value[0] != '\0') )
      { set_caeb(aekLink); moavd(h); retval = truE; }
      deattr(h);
    }
    break;
    case htkFrame: case htkIframe:
    { tAttribute *s = fada(anSrc), *t = fada(anTitle);
      if ( (s != NULL) && (s->value != NULL) && (s->value[0] != '\0') )
      { char* tv;
        set_caeb(aekLink); moavd(s); retval = truE;
        if ( (t != NULL) && ( (tv = t->value) != NULL ) && (*tv != '\0') )
          t->value = NULL; /* detach */
        else tv = my_strdup(_("[a frame]"));
        curraebase.render = tv;
      }
      deattr(s); deattr(t);
    }
    break;
    case htkInput:
    { tAttribute *t = fada(anType), *n = fada(anName), *v = fada(anValue),
        *s = fada(anSize), *m = fada(anMaxlength), *a = fada(anAlt),
        *ch = fada(anChecked), *di = fada(anDisabled), *re = fada(anReadonly);
      tActiveElementKind kind = lookup_input_type(t);
      if (kind != aekUnknown)
      { tActiveElementFlags flags = aefNone;
        const char* render;
        set_caeb(kind);
        if ( (n != NULL) && (n->value != NULL) && (n->value[0] != '\0') )
          moavd(n);
        switch (kind)
        { case aekFormSubmit:
           render = _("Submit");
           handle_render:
           if ( (v != NULL) && (v->value != NULL) && (v->value[0] != '\0') )
           { render = v->value; v->value = NULL; } /* explicit value given */
           else render = my_strdup(render);
           curraebase.render = render; break;
          case aekFormReset:
           render = _("Reset"); goto handle_render; /*@notreached@*/ break;
          case aekFormButton:
           render = _("[a push button]"); goto handle_render; /*@notreached@*/
           break;
          case aekFormImage:
           if ( (a != NULL) && (a->value != NULL) && (a->value[0] != '\0') )
           { curraebase.render = a->value; a->value = NULL; }
           else curraebase.render = my_strdup(_("[a form image]"));
           break;
          case aekFormText: case aekFormPassword: case aekFormRadio:
          case aekFormHidden: /* case aekFormFile: */
           /* Privacy Note: for aekFormFile, we don't store the default value
              because storing it might lead to an unwanted transmission of
              local file contents (if the user submits the form without
              recognizing that there is an aekFormFile element in it). See e.g.
              RFC1867, 8. */
           if ( (v != NULL) && (v->value != NULL) && (v->value[0] != '\0') )
             moavr(v); /* default text */
           break;
        }

        if (has_input_length(kind))
        { tHtmlInputLength max = attr2htmlinputlength(m,MAX_HTML_INPUT_LENGTH),
            size = attr2htmlinputlength(s, 20);
          if (size > max) size = max;
          curraebase.size = size; curraebase.maxlength = max;
        }

        if (ch != NULL) flags |= aefCheckedSelected;
        if (di != NULL) flags |= aefDisabled;
        if (re != NULL) flags |= aefReadonly;
        curraebase.flags = flags; retval = truE;
      }
      deattr(t); deattr(n); deattr(v); deattr(s); deattr(m); deattr(a);
      deattr(ch); deattr(di); deattr(re);
    }
    break;
    case htkTextarea:
    { tAttribute *n = fada(anName), *di = fada(anDisabled),
        *re = fada(anReadonly);
      tActiveElementFlags flags = aefNone;
      set_caeb(aekFormText);
      if ((n != NULL) && (n->value != NULL) && (n->value[0] != '\0')) moavd(n);
      if (di != NULL) flags |= aefDisabled;
      if (re != NULL) flags |= aefReadonly;
      curraebase.flags = flags; curraebase.size = 20; retval = truE;
      deattr(n); deattr(di); deattr(re);
    }
    break;
    case htkButton:
    { tAttribute *t = fada(anType), *n = fada(anName), *v = fada(anValue),
        *di = fada(anDisabled);
      tActiveElementKind kind = aekFormSubmit; /* htmlspec default */
      const char* temp;
      if ((n != NULL) && (n->value != NULL) && (n->value[0] != '\0')) moavd(n);
      if ( (t != NULL) && ( (temp = t->value) != NULL ) )
      { if (streqcase(temp, strButton)) kind = aekFormButton;
        else if (streqcase(temp, strReset)) kind = aekFormReset;
        else if (!streqcase(temp, strSubmit)) kind = aekUnknown;
      }
      if (kind != aekUnknown)
      { tActiveElementFlags flags = aefButtonTag;
        if (di != NULL) flags |= aefDisabled;
        set_caeb(kind);
        if (v != NULL) moavr(v);
        curraebase.flags = flags; retval = truE;
      }
      deattr(t); deattr(n); deattr(v); deattr(di);
    }
    break;
    case htkSelect:
    { tAttribute *n = fada(anName), *mu = fada(anMultiple),
        *di = fada(anDisabled);
      tActiveElementFlags flags = aefNone;
      set_caeb(aekFormSelect);
      if ((n != NULL) && (n->value != NULL) && (n->value[0] != '\0')) moavd(n);
      if (mu != NULL) flags |= aefMultiple;
      if (di != NULL) flags |= aefDisabled;
      curraebase.flags = flags;
      select_aebase = curraebase; inside_select = truE;
      /* retval = falsE; -- yes, not "truE" here; this one is special... */
      deattr(n); deattr(mu); deattr(di);
    }
    break;
  }
  return(retval);
}

static my_inline __sallocator tAttribute* __callocator
  create_attribute(const tAttributeName name)
{ tAttribute* retval = (tAttribute*) memory_allocate(sizeof(tAttribute),
    mapOther);
  retval->name = name;
  return(retval);
}

static my_inline void deallocate_attributes(const tAttribute* a)
/* deallocates the given attribute list */
{ while (a != NULL)
  { const tAttribute* next = a->next;
    __deattr(a); a = next;
  }
}

void deallocate_html_node(const tHtmlNode* node)
{ if (node->kind == htkText) __dealloc((const char*) (node->data));
  else deallocate_attributes((const tAttribute*) (node->data));
  memory_deallocate(node);
}

void deallocate_one_aebase(const tActiveElementBase* aeb)
{ __dealloc(aeb->data);
  if (aeb->kind != aekFormSelect) __dealloc(aeb->render); /* the simple case */
  else
  { const tHtmlOption* o = (const tHtmlOption*) aeb->render;
    while (o != NULL)
    { const tHtmlOption* next = o->next;
      __dealloc(o->value); __dealloc(o->render); memory_deallocate(o);
      o = next;
    }
  }
#if CONFIG_JAVASCRIPT
  javascript_remove_ehs(aeb->eh);
#endif
}

static my_inline tBoolean htk_has_flag(const tHtmlTagKind kind,
  const tHtmlTagFlags flag)
{ return(cond2boolean( (kind >= TAGOFFSET) && (kind < TAGOFFSET + NUM_TAGDATA)
    && (tagdata[kind - TAGOFFSET].flags & flag) ));
}

static __my_inline tBoolean htk_soaks_up_text(const tHtmlTagKind kind)
{ return(htk_has_flag(kind, htfSoakUpText));
}

__my_inline tBoolean htk_forbids_endtag(const tHtmlTagKind kind)
{ return(htk_has_flag(kind, htfForbidEndtag));
}

__my_inline tBoolean htk_forbids_pre(const tHtmlTagKind kind)
{ return(htk_has_flag(kind, htfForbidPre));
}

__my_inline tBoolean htk_is_block(const tHtmlTagKind kind)
{ return(htk_has_flag(kind, htfBlock));
}

__my_inline tBoolean htk_is_par(const tHtmlTagKind kind)
{ return(htk_has_flag(kind, htfPar));
}

static one_caller void create_html_form(const char* action,
  tHtmlFormFlags flags)
{ tHtmlFormNumber num = current_cantent->hfnum, max = current_cantent->hfmax;
  tHtmlForm* f;
  if (num >= max)
  { max += ( (max >= 9) ? 10 : 3 ); current_cantent->hfmax = max;
    current_cantent->form = memory_reallocate(current_cantent->form,
      max * sizeof(tHtmlForm), mapOther);
  }
  f = &(current_cantent->form[num]); f->action_uri = action; f->flags = flags;
  f->first_ae = f->last_ae = INVALID_AE; current_cantent->hfnum = num + 1;
#if CONFIG_DEBUG
  sprint_safe(debugstrbuf,
    "create_html_form(): num=%d, max=%d, action=*%s*, flags=%d\n",
    num, max, action, flags);
  debugmsg(debugstrbuf);
#endif
}

static void append_attribute_name(const tAttributeName name)
/* appends an attribute of the given <name> to current_attributes (avoiding
   duplicates) */
{ tAttribute* a = fada(name);
  if (a != NULL) dealloc(a->value); /* "forget" old value */
  else a = create_attribute(name);
  a->next = current_attributes;
  current_attributes = a;
}

static __my_inline void set_current_node(tHtmlNode* node)
{ current_node = node;
  is_current_node_valid = truE;
}

static void store_html_node(tHtmlNode* node, tBoolean do_skip_char)
/* stores the <node> in the tree, updates the lhpp_.... information and creates
   an active-element base if appropriate */
{ const tHtmlTagKind htk = node->kind;
  if (inside_select) /* don't store anything */
  { deallocate_html_node(node); return; }

  if (previous_node_in_tree != NULL)
  { previous_node_in_tree->next = node; previous_node_in_tree = node; }
  else
  { /* The <node> is the first one for the tree of the current resource: */
    current_cantent->tree = previous_node_in_tree = node;
  }
  node->flags |= hnfStoredInTree;
  lhpp_content = current_block;
  if (current_block == NULL)
  { /* This can e.g. happen if an HTML document ends with an opening <title>
       tag (incomplete document or just not yet completely received); in this
       case we have the call chain "parser_html_next() -> change_state(hpsDone)
       -> finish_delayed_node() -> store_html_node()". */
    lhpp_byte = 0;
  }
  else
  { lhpp_byte = current_block->used - current_block_sizeleft +
      boolean2bool(do_skip_char);
  }

  if (htk == htkTitle)
  { tAttribute* t = _find_and_detach_attribute(&(node->data), anInternalText);
    const char* tv;
    if ( (t != NULL) && ( (tv = t->value) != NULL ) && (*tv != '\0') )
    { __dealloc(current_cantent->major_html_title);
      current_cantent->major_html_title = tv; t->value = NULL;
    }
    deattr(t);
  }

  if (node->flags & hnfHasAeBase)
  { if (aenum >= aemax) /* need to allocate more memory */
    { aemax += aenum_incvalue(aemax);
      current_cantent->aebase = aebase = memory_reallocate(aebase, aemax *
        sizeof(tActiveElementBase), mapOther);
    }
#if CONFIG_JAVASCRIPT
    { /* extract event handlers */
      const tJavascriptEventHandler* javascript_ehs = NULL;
      const tAttribute* a;
      while ( (a = fada_js(&(node->data))) != NULL )
      { const char* v = a->value;
        const tJavascriptCode* code;
        if ( (v != NULL) && (*v != '\0') &&
             ( (code = javascript_compile(v)) != NULL ) )
        { tJavascriptEventHandler* eh =
            javascript_create_eh(a->name - anJavascriptBegin, code);
          eh->next = javascript_ehs; javascript_ehs = eh;
        }
      }
      curraebase.eh = javascript_ehs;
    }
#endif
    if (htk == htkTextarea)
    { tAttribute* t = _find_and_detach_attribute(&(node->data),anInternalText);
      if ( (t != NULL) && (t->value != NULL) && (t->value[0] != '\0') )
        moavr(t);
      deattr(t);
    }
    aebase[aenum++] = curraebase;
  }
  set_current_node(node);
#if CONFIG_EXDEBUG
  sprint_safe(debugstrbuf, "stored node %p, %d, %d\n", node, htk, node->flags);
  prsdbg(debugstrbuf);
#endif
}

static __sallocator char* __callocator prepare_text(tBoolean may_trim)
{ char* text = (char*) __memory_allocate(bufsize, mapString);
  interpret_character_entities(text, buf, may_trim);
  return(text);
}

static one_caller void finish_delayed_node(tBoolean got_text,
  tBoolean do_skip_char)
{ tHtmlTagKind htk = delayed_node->kind;
  if ( (got_text) && (htk != htkScript) && (htk != htkStyle) )
  { /* add the text as an internal attribute */
    tAttribute* a = create_attribute(anInternalText);
    tBoolean may_trim = cond2boolean((htk == htkOption) || (htk == htkTitle));
    a->value = prepare_text(may_trim); a->next = delayed_node->data;
    delayed_node->data = a;
  }
  if ( (inside_select) && (htk == htkOption) )
  { /* add this option to the current <select> data */
    tAttribute *list = (tAttribute*) delayed_node->data,
      *v = __fada(list, anValue), *l = __fada(list, anLabel),
      *t = __fada(list, anInternalText),
      *se = __fada(list, anSelected), *di = __fada(list, anDisabled);
    tHtmlOption *option = __memory_allocate(sizeof(tHtmlOption), mapOther),
      *o = (tHtmlOption*) select_aebase.render;
    tHtmlOptionFlags hof = hofNone;
    char *value, *render;

    delayed_node->data = (void*) list; deallocate_html_node(delayed_node);

    /* What to render? */
    if ( (l != NULL) && (l->value != NULL) && (l->value[0] != '\0') )
    { render = l->value; l->value = NULL; }
    else if ( (t != NULL) && (t->value != NULL) && (t->value[0] != '\0') )
    { render = my_strdup(t->value); }
    else render = my_strdup(_("[an option]"));

    /* What to submit? */
    if ( (v != NULL) && (v->value != NULL) && (v->value[0] != '\0') )
    { value = v->value; v->value = NULL; }
    else if ( (t != NULL) && (t->value != NULL) && (t->value[0] != '\0') )
    { value = t->value; t->value = NULL; }
    else value = NULL;

    if (se != NULL) hof |= hofSelected;
    if (di != NULL) hof |= hofDisabled;
    option->next = NULL; option->value = value; option->render = render;
    option->flags = hof;
    if (o == NULL) select_aebase.render = (char*) option;
    else
    { while (o->next != NULL) o = o->next; /* IMPROVEME? */
      o->next = option;
    }
    select_aebase.maxlength++; /* option counter */
    deattr(v); deattr(l); deattr(t); deattr(se); deattr(di);
  }
  else if (!is_parsing_done) store_html_node(delayed_node, do_skip_char);
  else deallocate_html_node(delayed_node); /* just forget it */
  delayed_node = NULL; /* done with this one */
}

static my_inline void deallocate_current_attributes(void)
{ if (current_attributes != NULL)
  { deallocate_attributes(current_attributes); current_attributes = NULL; }
}

static one_caller void handle_meta_tag(void)
{ const tAttribute *n1 = fada(anName), *n2 = fada(anHttpEquiv), *n,
    *c = fada(anContent);
  const char* cv;
  if ( (c == NULL) || ( (cv = c->value) == NULL ) ) goto out; /* no content */
  n = ( (n1 != NULL) ? n1 : n2 );
  if ( (n == NULL) || (n->value == NULL) ) goto out; /* no name */
  if (!streqcase(n->value, "refresh")) goto out; /* unknown name */

  if (current_cantent->redirection != NULL)
  { /* don't override HTTP redirection - the dynamic server usually knows
       better than the static HTML document does */
    goto out;
  }
  /* The refresh content isn't "really" standardized, so let's be careful: */
  while (IS_WHITESPACE(*cv)) cv++;
  if (my_isdigit(*cv))
  { /* We skip and ignore the number of seconds. It is often 0 anyway, and if
       it _isn't_ 0 it often _should_ be 0 in order not to waste users' time,
       and users can get back to the original document easily with a single
       keyboard command at any later time. So we redirect immediately... */
    int dummy;
    my_atoi(cv, &dummy, &cv, 9);
    while (IS_WHITESPACE(*cv)) cv++;
  }
  if ( (*cv == ',') || (*cv == ';') ) cv++;
  while (IS_WHITESPACE(*cv)) cv++;
  if (strneqcase(cv, "url=", 4)) { cv += 4; while (IS_WHITESPACE(*cv)) cv++; }
  if (*cv != '\0')
  { current_cantent->redirection = my_strdup(cv);
    current_cantent->caf |= cafHtmlRedirection;
#if CONFIG_DEBUG
    { char* spfbuf;
      my_spf(debugstrbuf, STRBUF_SIZE, &spfbuf, "HTML redirection: *%s*\n",cv);
      debugmsg(spfbuf); my_spf_cleanup(debugstrbuf, spfbuf);
    }
#endif
  }
  out:
  deattr(n1); deattr(n2); deattr(c);
}

static void build_html_node(void)
{ tHtmlNode* node = memory_allocate(sizeof(tHtmlNode), mapHtmlNode);
  const tBoolean is_textual = cond2boolean(current_tagkind == htkText),
    ucb = ( (is_endtag || is_textual || is_parsing_done) ? falsE
      : use_curraebase() );
  tHtmlNodeFlags hnf = hnfNone;
  if (is_endtag) hnf |= hnfIsEndtag;
  if (tagblock_ends) hnf |= hnfTagblockEnds;

  /* Build the node: */
  if (is_textual) node->data = prepare_text(falsE);
  else
  { if (current_tagkind == htkInvalid)
    { append_attribute_name(anInternalTagname);
      current_attributes->value = current_unknown_tagname;
        /* CHECKME: that's unclean! */
      current_unknown_tagname = NULL;
    }
    else if ( (!is_endtag) && (!is_parsing_done) )
    { if (current_tagkind == htkCenter) hnf |= hnfAlignCenter;
      else if (current_tagkind == htkMeta) handle_meta_tag();
      else if (current_tagkind == htkForm)
      { tAttribute *a = fada(anAction), *m = fada(anMethod),
          *e = fada(anEnctype);
        if ( (a != NULL) && (a->value != NULL) && (a->value[0] != '\0') )
        { /* seems we have an "action" attribute, let's create a form */
          tHtmlFormFlags ff = hffNone;
          hnf |= hnfGoodForm;
          if ((m != NULL) && (m->value != NULL) && streqcase(m->value, "post"))
            ff |= hffMethodPost;
          if ( (e != NULL) && (e->value != NULL) &&
               (streqcase(e->value, "multipart/form-data")) )
            ff |= hffEncodingMultipart;
          create_html_form(a->value, ff);
          a->value = NULL;
        }
        deattr(a); deattr(m); deattr(e);
      }
    }

    /* convert some general attributes to a more efficient representation */
    { const tAttribute* a = fada(anAlign);
      const char* av;
      if ( (a == NULL) /* most likely */ || (current_tagkind == htkTable) ||
           (current_tagkind == htkTh) || (current_tagkind == htkTd) )
        goto dont_align; /* CHECKME! */
      if ( ( (av = a->value) != NULL ) && (*av != '\0') )
      { if (streqcase(av, "left")) hnf |= hnfAlignLeft;
        else if (streqcase(av, strCenter)) hnf |= hnfAlignCenter;
        else if (streqcase(av, "right")) hnf |= hnfAlignRight;
      }
      dont_align: {}
      deattr(a);
    }
    node->data = current_attributes; current_attributes = NULL; /* detach */
  }
  node->kind = current_tagkind; node->flags = hnf; node->next = NULL;

  /* Now decide what to do with the node: */
  if ( (!is_endtag) && (htk_soaks_up_text(current_tagkind)) )
  { if (current_tagkind == htkTextarea) node->flags |= hnfHasAeBase;
    delayed_node = node; /* We wanna soak up any immediately following text. */
  }
  else if (current_tagkind == htkSelect)
  { if (!is_endtag) select_node = node;
    else if (inside_select) /* finish and store the <select> info */
    { tHtmlOption *o, *o0;
      tBoolean found_selected_option = falsE,
        is_multiple = cond2boolean(select_aebase.flags & aefMultiple);
      inside_select = falsE; curraebase = select_aebase;
      select_node->flags |= hnfHasAeBase;
      /* Make sure one option is selected (and _only_ one if non-multiple) */
      o = o0 = (tHtmlOption*) curraebase.render;
      while (o != NULL)
      { if (o->flags & hofSelected)
        { if (found_selected_option)
          { /* more than one option selected in a non-multiple <select> */
            o->flags &= ~hofSelected;
          }
          else
          { found_selected_option = truE;
            if (is_multiple) break; /* don't care about further selections */
          }
        }
        o = o->next;
      }
      if ( (!found_selected_option) && (o0 != NULL) ) o0->flags |= hofSelected;
      /* Store the <select> node */
      store_html_node(select_node, truE); select_node = NULL;
    }
  }
  else if (!is_parsing_done) /* the most likely case */
  { tBoolean do_skip_char = cond2boolean(!is_textual);
    if (ucb) node->flags |= hnfHasAeBase;
    store_html_node(node, do_skip_char);
  }
  else if (is_textual)
  { /* Just imagine the HTML document currently ends with a longish text - the
       user should see it while receiving. Or imagine that a web page author
       simply forgot something like "</body></html>" after the last text run in
       a document... */
    set_current_node(node);
  }
  else deallocate_html_node(node); /* just forget it */
  deallocate_current_attributes();
}

static one_caller tMbsIndex do_lookup_attrname(void)
{ my_binary_search(0, ARRAY_ELEMNUM(attrdata) - 1,
    strcmp(buf, attrdata[idx].str), return(idx))
}

static one_caller tAttributeName lookup_attrname(void)
{ tMbsIndex idx = do_lookup_attrname();
  return( (idx < 0) ? anUnknown : attrdata[idx].an );
}

#define aai(cond) if (cond) current_attr_name = an /* "accept <an> if" */

static void change_state(tHtmlParserState new_state)
{ tAttributeName an;
  buf_append('\0'); /* (for simplicity) */
  if (new_state == hpsDone) is_parsing_done = truE;

  switch (state)
  {case hpsText: /* reached the end of a text run */
    if (delayed_node != NULL)
      finish_delayed_node(cond2boolean(bufsize > 1), falsE);
    else if (bufsize > 1) /* got some text for a "normal" text node */
    { current_tagkind = htkText; build_html_node(); }
    break;
   case hpsTag:
    /* We store only tags we can make sense of. (I.e. we'll store htkInvalid
       tags only if they have some of the "general" attributes.) */
    current_tagkind = htkInvalid; /* default */
    if (bufsize > 1)
    { if (!strcmp(buf, strCommentTag)) new_state = hpsComment1;
      else
      { current_tagkind = lookup_tagkind(); __dealloc(current_unknown_tagname);
        current_unknown_tagname = ( (current_tagkind != htkInvalid) ? NULL :
          my_strdup(buf) );
      }
    }
    break;
   case hpsAttrName:
    /* We store only attributes we can make sense of. */
    current_attr_name = anUnknown; /* default */
    if (is_endtag) goto an_ignore; /* no attributes allowed */
    an = lookup_attrname();
    if (an == anUnknown) goto an_ignore;

    /* general attributes (allowed for almost all tags) */
    aai( (an == anAlign) || (an == anId) );
#if CONFIG_CSS
    else aai( (an == anClass) || (an == anStyle) );
#endif
#if CONFIG_JAVASCRIPT
    else
    { tMbsIndex idx = lookup_javascript_event();
      if (idx >= 0)
        current_attr_name = ((tAttributeName) idx) + anJavascriptBegin;
    }
#endif
    if (current_attr_name != anUnknown) goto an_append;

    /* specific attributes */
    if (current_tagkind == htkInvalid) goto an_ignore;
    switch (current_tagkind)
    {case htkA: aai( (an == anHref) || (an == anName) ); break;
     case htkArea: aai( (an == anHref) || (an == anAlt) ); break;
     case htkButton:
      aai( (an == anType) || (an == anName) || (an == anValue) ||
           (an == anDisabled) );
      break;
     case htkForm:
      aai( (an == anAction) || (an == anMethod) || (an == anEnctype) ); break;
     case htkFrame: case htkIframe:
      aai( (an == anSrc) || (an == anTitle) ); break;
     case htkHr: aai(an == anWidth); break;
     case htkImg: aai( (an == anAlt) || (an == anSrc) ); break;
     case htkInput:
      aai( (an == anType) || (an == anName) || (an == anValue) || (an == anAlt)
           || (an == anSize) || (an == anMaxlength) || (an == anChecked) ||
           (an == anDisabled) || (an == anReadonly) );
      break;
     case htkObject: aai( (an == anType) || (an == anDeclare) ); break;
     case htkOptgroup: aai( (an == anLabel) || (an == anDisabled) ); break;
     case htkOption:
      aai( (an == anValue) || (an == anLabel) || (an == anSelected) ||
           (an == anDisabled) );
      break;
     case htkSelect:
      aai( (an == anName) || (an == anMultiple) || (an == anDisabled) ); break;
     case htkTextarea:
      aai( (an == anName) || (an == anDisabled) || (an == anReadonly) ); break;
     case htkMeta:
      aai( (an == anName) || (an == anHttpEquiv) || (an == anContent) ); break;
     case htkFont:
      aai(an == anColor);
#if TGC_IS_GRAPHICS
      else aai( (an == anSize) || (an == anFace) );
#endif
      break;
#if CONFIG_CSS
     case htkStyle: aai(an == anMedia); break;
#endif
    }

    if (current_attr_name != anUnknown)
    { an_append: append_attribute_name(current_attr_name); }
    an_ignore: {}
    break;
   case hpsAttrValue:
    if ( (bufsize > 1) && (current_attr_name != anUnknown) &&
         (current_attributes != NULL) &&
         (current_attributes->name == current_attr_name) )
    { /* (The two latter tests "should" be unnecessary.) */
      current_attributes->value = (shall_interpret_chents_in_attrvalue() ?
        prepare_text(truE) : my_strdup(buf));
    }
    break;
  }

#if CONFIG_DEBUG
  if (! ( ( (state == hpsText) || (state == hpsAttrName) ) && (bufsize < 2) ) )
  { char* spfbuf;
    my_spf(debugstrbuf, STRBUF_SIZE, &spfbuf, "%s: *%s%s*\n", hps_name[state],
      ( ( (state == hpsTag) && (is_endtag) ) ? (strSlash) : (strEmpty) ),
      ( (bufsize > 1) ? (buf) : ("(nothing)") ));
    prsdbg(spfbuf); my_spf_cleanup(debugstrbuf, spfbuf);
  }
#endif

  state = new_state; bufsize = 0;
  if (state == hpsAttrValue) attrvalue_quotes = 0;
  else if (state == hpsText) /* reached the end of an HTML tag */
  { if ( (current_tagkind != htkInvalid) || (current_attributes != NULL) )
    { /* reached the end of 1. a _known_ HTML tag or 2. an htkInvalid tag with
         "general" attributes */
      build_html_node();
    }
  }
}

#undef aai

void parser_html_start(tCantent* cantent)
/* prepares the parser for parsing the <cantent> */
{ size_t parsedsize, usedsize;

  current_cantent = cantent;
  current_node_in_tree = (tHtmlNode*) cantent->tree;
  current_node = delayed_node = select_node = previous_node_in_tree = NULL;

  /* If we haven't yet parsed the whole content, we might reach a point where
     we actually have to parse something, so we setup the parser here: */
  current_block = lhpp_content = cantent->lhpp_content;
  if (current_block != NULL)
  { parsedsize = lhpp_byte = cantent->lhpp_byte;
    dataptr = current_block->data + parsedsize;
    usedsize = current_block->used;
    if (parsedsize >= usedsize) current_block_sizeleft = 0;
    else current_block_sizeleft = usedsize - parsedsize;
  }
  else
  { parsedsize = lhpp_byte = cantent->lhpp_byte;
    dataptr = NULL; usedsize = 0; current_block_sizeleft = 0;
  }
  aebase = cantent->aebase; aenum = cantent->aenum; aemax = cantent->aemax;

  state = hpsText;
  buf = current_unknown_tagname = NULL;
  maxbufsize = bufsize = 0;

  current_tagkind = htkInvalid; current_attr_name = anUnknown;
  current_attributes = NULL;
  is_current_node_valid = is_parsing_done = inside_select = falsE;

#if CONFIG_DEBUG
  sprint_safe(debugstrbuf, "\nparser_html_start(): %p,%p,%p,%d,%d\n",
    cantent, current_block, dataptr, parsedsize, current_block_sizeleft);
  prsdbg(debugstrbuf);
#endif
}

const tHtmlNode* parser_html_next(tBoolean inside_pre)
/* returns the next node (or NULL) for the currently parsed resource */
{ tBoolean found_whitespace = falsE;
  char ch;
  if (current_node_in_tree != NULL)
  { /* need not actually parse something, found a node inside the tree
       (generated during earlier passes) */
    tHtmlNode* retval = previous_node_in_tree = current_node_in_tree;
    current_node_in_tree = current_node_in_tree->next;
    return(retval);
  }

  /* Reached the end of the already generated tree, have to parse: */
  loop:
  if (is_current_node_valid)
  { if (is_parsing_done)
    { tHtmlNode* retval = current_node; current_node = NULL; return(retval); }
    else { is_current_node_valid = falsE; return(current_node); }
  }
  if (current_block_sizeleft <= 0)
  { if (current_block != NULL) current_block = current_block->next;
    if (current_block != NULL)
    { dataptr = current_block->data;
      current_block_sizeleft = current_block->used;
    }
    else /* reached the end of the last content block in the list */
    { change_state(hpsDone);
      if (!is_current_node_valid)
      { current_node = NULL; is_current_node_valid = truE; } /* (extra care) */
    }
    goto loop;
  }
  ch = *dataptr++;
  switch (state)
  {case hpsText:
    if (ch == '<')
    { if (found_whitespace) { buf_append(' '); found_whitespace = falsE; }
      change_state(hpsTag); is_endtag = falsE;
    }
    else if (!inside_pre)
    { if (IS_WHITESPACE(ch)) found_whitespace = truE;
      else
      { if (found_whitespace) { buf_append(' '); found_whitespace = falsE; }
        buf_append(ch);
      }
    }
    else buf_append(ch);
    break;
   case hpsTag:
    if (IS_WHITESPACE(ch))
    { if (bufsize > 0) { change_state(hpsAttrName); tagblock_ends = falsE; } }
    else if (ch == '>') change_state(hpsText);
    else if (ch == '/')
    { if (bufsize == 0) is_endtag = truE;
      else { change_state(hpsAttrName); tagblock_ends = truE; }
    }
    else
    { buf_append(my_tolower(ch)); /* case-insensitivity: htmlspec, 3.2.1 */
      if ( (bufsize == 3) && (!strncmp(buf, strCommentTag, 3)) )
        change_state(hpsComment1);
    }
    break;
   case hpsAttrName:
    if (IS_WHITESPACE(ch)) { if (bufsize > 0) change_state(hpsEquals); }
    else if (ch == '=') change_state(hpsAttrValue);
    else if (ch == '>') change_state(hpsText);
    else if (ch == '/') tagblock_ends = truE; /* CHECKME: chg..(hpsAttrName)?*/
    else
    { attr_name_append:
      buf_append(my_tolower(ch)); /* case-insensitivity: htmlspec, 3.2.2 */
    }
    break;
   case hpsEquals:
    if (IS_WHITESPACE(ch)) { /* nothing */ }
    else if (ch == '=') change_state(hpsAttrValue);
    else if (ch == '>') change_state(hpsText);
    else { change_state(hpsAttrName); goto attr_name_append; } /* no value */
    break;
   case hpsAttrValue:
    if (IS_WHITESPACE(ch))
    { if (attrvalue_quotes != 0) buf_append(ch);
      else if (bufsize > 0) change_state(hpsAttrName);
    }
    else if (ch == '"')
    { if ( (attrvalue_quotes == 0) && (bufsize == 0) ) attrvalue_quotes = 2;
      else if (attrvalue_quotes == 2) change_state(hpsAttrName); /* value end*/
      else buf_append(ch);
    }
    else if (ch == '\'')
    { if ( (attrvalue_quotes == 0) && (bufsize == 0) ) attrvalue_quotes = 1;
      else if (attrvalue_quotes == 1) change_state(hpsAttrName); /* value end*/
      else buf_append(ch);
    }
    else if ( (ch == '>') && (attrvalue_quotes == 0) ) change_state(hpsText);
    else buf_append(ch);
    break;
   /* For most of the hpsComment states, we need not call change_state()
      because these are rather some kind of "sub-states": */
   case hpsComment1:
    if (ch == '-') state = hpsComment2;
    break;
   case hpsComment2:
    if (ch == '-') state = hpsComment3;
    else state = hpsComment1;
    break;
   case hpsComment3:
    if (ch == '>') change_state(hpsText);
    else if ( (!IS_WHITESPACE(ch)) && (ch != '-') ) state = hpsComment1;
    /* "else": stick to hpsComment3! htmlspec, 3.2.4: "White space is not per-
       mitted between the markup declaration open delimiter("<!") and the com-
       ment open delimiter ("--"), but is permitted between the comment close
       delimiter ("--") and the markup declaration close delimiter (">")."
       Additionally, we leniently allow extra "-" characters because a web page
       author might accidentally write e.g. "--->" instead of "-->"... */
    break;
  }
  current_block_sizeleft--;
  goto loop;
}

void parser_html_finish(void)
/* finishes the parsing of the current cantent */
{ if (inside_select)
  { deallocate_one_aebase(&select_aebase);
    if (select_node != NULL) deallocate_html_node(select_node);
  }
  if (delayed_node != NULL) deallocate_html_node(delayed_node);
  deallocate_attributes(current_attributes);
  current_cantent->lhpp_content = lhpp_content;
  current_cantent->lhpp_byte = lhpp_byte;
  current_cantent->aebase = aebase;
  current_cantent->aenum = aenum; current_cantent->aemax = aemax;
  __dealloc(buf);
  i18n_cleanup /* FIXME on interface change: only do this if parser usedepth is
    zero! */
}

one_caller void __init parser_initialize(void)
{
#if CONFIG_DEBUG
  static const char headline[] = "retawq " RETAWQ_VERSION
    " HTML parser debugging file (<http://retawq.sourceforge.net/>)\n";
  fd_parsertest = my_create("htmldebug.txt", O_CREAT | O_TRUNC | O_WRONLY,
    S_IRUSR | S_IWUSR);
  if (fd_parsertest < 0)
    fatal_error(errno, "can't create HTML parser debugging file");
  make_fd_cloexec(fd_parsertest);
  prsdbg(headline);
#endif
}
