/* rbmake.c
 *
 * The main() routine for the rbmake utility.  This builds a .rb file for
 * use by the Rocket eBook.
 */
/* This software is copyrighted as detailed in the LICENSE file. */

#include <config.h>
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#include <ctype.h>
#include <time.h>
#include <rbmake/rbmake.h>

#define DEFAULT_EDGE_ENHANCE 7

/* Various options */

static bool webPageArgs, outputInfoFile, noExtraMatchRules, unjoin;
static bool promptForBookInfo, nonInteractive, dumpOptions;
static char *coverImage;
static MArray *allowedUrls, *excludedUrls, *substRuleUrls, *enhanceUrls;
static int edgeEnhance = DEFAULT_EDGE_ENHANCE;

/* Some file-scoped state */

static MArray *argsForLoadOptions;
static char *infoFile, *rbName;
static const char *rbFile;
static RbMake *rb_out;
static MBuf *stdinBuf;
static bool readSubstFromStdin;
static int maxPageCnt, maxTocCnt;

static char opts[] = "a:bc:dDeE:f:g:hiI:jJ:kl:L:m:M:nN:o:OpPRs:t:T:uU:vVwWx:z";

/* Our local functions */

static char *parseFileArg(const char *arg);
void parseMenuArg(char *arg);
static void outputOptions(int argc, char *argv[], int xargc, char *xargv[]);
static char **loadOptions(const char *url, int *cntp);
static char *findNextLine(char *bp, bool nullTerminate);
static bool shouldAllowURL(RbPage *pg, const char *url, int pt);
static void scheduleURL(RbMake *rb, RbPage *pg, const char *url, int pt);
static const char *askForAuthInfo(const char *url, const char *realm);
static void fetchStatus(RbMake *rb, const char *tocname, const char *url,
			const char *errstr, int joinedCnt);
static void manipulateImage(RbImage *img, const char *url);
static void finishInfoPage(RbPage *pg, RbInfoHash *infoHash);
static const char *maybePlural(unsigned int val);
static void myFatalError(const char *msg, va_list ap);
static void myWarning(const char *msg, va_list ap);
static void usage(void);

int
main(int argc, char *argv[])
{
    RbInfoHash *infoHash;
    MArray *argHold = MArray_new(argc, 0);
    char *cp, *urlInfo, **extraArgv = NULL;
    int ch, htmlDropCnt, binaryDropCnt, extraArgc = 0;
    const char *arg;
    bool firstArg;

    allowedUrls = MArray_new(16, 4096);
    excludedUrls = MArray_new(16, 4096);
    substRuleUrls = MArray_new(16, 4096);
    enhanceUrls = MArray_new(16, 4096);
    argsForLoadOptions = MArray_new(16, 4096);

    RbError_init(myFatalError, myWarning);
    rb_out = RbMake_new(&shouldAllowURL, &scheduleURL, &finishInfoPage);
    RbMake_setGenerator(rb_out, "rbmake v" RBMAKE_VERSION);
    infoHash = RbMake_getInfoHash(rb_out);

    while ((ch = getopt(argc, argv, opts)) != EOF) {
	switch (ch) {
	  case 'a':
	    RbInfoHash_store(infoHash, "AUTHOR", optarg);
	    break;
	  case 'b':
	    RbMake_setBookParagraphDepth(rb_out, RbMake_getBookParagraphDepth(rb_out) + 1);
	    break;
	  case 'c':
	    RbMake_setCoverImage(rb_out, parseFileArg(coverImage = optarg));
	    break;
	  case 'D':
	  case 'd': /* Remove soon */
	    dumpOptions = true;
	    break;
	  case 'e':
	    RbMake_setEnhancePunctuation(rb_out, true);
	    break;
	  case 'E':
	    arg = optarg;
	    ch = atoi(arg);
	    if ((!ISDIGIT(*arg) && *arg != 'n') || ch < 0 || ch > 9)
		RbError_exit("-E must be followed by a digit from 0 - 9 or \"none\"\n");
	    while (*arg && !ISSPACE(*arg) && *arg != '=' && *arg != '(') arg++;
	    while (ISSPACE(*arg)) arg++;
	    if (*arg == '=') {
		do arg++; while (ISSPACE(*arg));
		MArray_appendPtr(enhanceUrls, arg);
		MArray_append(enhanceUrls, ch);
	    }
	    else
		edgeEnhance = ch;
	    break;
	  case 'f':
	    if (ISDIGIT(*optarg))
		RbMake_setFollowLinks(rb_out, atoi(optarg));
	    else
		RbMake_setFollowLinks(rb_out, strchr("nN", *optarg)? 0 : -1);
	    break;
	  case 'g':
	    parseMenuArg(optarg);
	    break;
	  case 'i':
	    RbMake_setIncludeImages(rb_out, RB_INCLUDE_YES);
	    RbMake_setIncludeAudio(rb_out, RB_INCLUDE_YES);
	    break;
	  case 'I':
	    infoFile = optarg;
	    break;
	  case 'j':
	    RbMake_setPageJoining(rb_out, 1);
	    break;
	  case 'J':
	    RbMake_setPageJoining(rb_out, atoi(optarg));
	    break;
	  case 'k':
	    RbMake_setCreateHkeyFile(rb_out, true);
	    break;
	  case 'l':
	    extraArgv = loadOptions(parseFileArg(optarg), &extraArgc);
	    break;
	  case 'L':
	    MArray_appendPtr(argsForLoadOptions, optarg);
	    break;
	  case 'M':
	    noExtraMatchRules = true;
	    /* FALL THROUGH */
	  case 'm':
	    MArray_appendPtr(allowedUrls, rbUrlUnescape(optarg));
	    if (!RbMake_getFollowLinks(rb_out))
		RbMake_setFollowLinks(rb_out, -1);
	    break;
	  case 'n':
	    nonInteractive = true;
	    break;
	  case 'N':
	    maxPageCnt = atoi(optarg);
	    maxTocCnt = maxPageCnt * 2 + 7;
	    break;
	  case 'o':
	    rbName = optarg;
	    break;
	  case 'O':
	    rbName = "";
	    break;
	  case 'p':
	    promptForBookInfo = true;
	    break;
	  case 'P':
	    if ((arg = askForAuthInfo(NULL, NULL)) != NULL)
		printf("%s\n", arg);
	    exit(0);
	  case 'R':
	    rbName = "";
	    infoFile = ".";
	    break;
	  case 's':
	    MArray_appendPtr(substRuleUrls, optarg);
	    if (strEQ(optarg, "-"))
		readSubstFromStdin = true;
	    break;
	  case 't':
	    RbInfoHash_store(infoHash, "TITLE", optarg);
	    break;
	  case 'T':
	    if (ISDIGIT(*optarg))
		RbMake_setTextConversion(rb_out, atoi(optarg));
	    else if ((ch = RbMake_findTextConvOpt(optarg,1)) >= 0)
		RbMake_setTextConversion(rb_out, ch);
	    else
		RbError_exit("Unknown text-conversion type: -T %s\n", optarg);
	    break;
	  case 'u':
	    unjoin = true;
	    break;
	  case 'U':
	    cp = rbGetFileSuffix(optarg);
	    if (cp && rbIsRbSuf(cp))
		cp[-1] = '\0';
	    if (strnNE("ebook:", optarg, 6)) {
		MBuf *tmp = MBuf_new(32, 0);
		MBuf_vwrite(tmp, "ebook:", 6, optarg, -1, NULL);
		optarg = MBuf_toBuffer(tmp, NULL);
	    }
	    RbInfoHash_store(infoHash, "URL", optarg);
	    break;
	  case 'v':
	    RbMake_setVerboseOutput(rb_out, true);
	    break;
	  case 'V':
	    RbError_warn("%s\n", RbMake_getGenerator(rb_out));
	    exit(0);
	  case 'w':
	    webPageArgs = true;
	    break;
	  case 'W':
	    outputInfoFile = true;
	    break;
	  case 'x':
	    MArray_appendPtr(excludedUrls, optarg);
	    break;
	  case 'z':
	    RbMake_setAllowHRSize0PageBreaks(rb_out, true);
	    break;
	  case 'h':
	  default:
	    usage();
	}
    }
    argc -= optind;
    argv += optind;

    if ((RbMake_getIncludeImages(rb_out) != RB_INCLUDE_NO
      || RbMake_getCoverImage(rb_out) != NULL)
      && !rbLibHasImageSupport()) {
	printf("No images will be included since no image support was compiled into rbmake.\n");
	RbMake_setIncludeImages(rb_out, RB_INCLUDE_NO);
	RbMake_setCoverImage(rb_out, NULL);
    }

    if (argc)
	optarg = *argv;
    else if (extraArgc)
	optarg = *extraArgv;
    else if (!dumpOptions) {
	RbError_exit("You didn't specify any filenames or web pages.  "
		     "Use the -h option to get help.\n");
    }

    if (infoFile) {
	if (strEQ(infoFile, "-")) {
	    if (readSubstFromStdin)
		RbError_exit("You can't read both the info data and the substitution rules from stdin.\n");
	    RbInfoHash_mergeFromFile(infoHash, infoFile);
	}
	else if (dumpOptions)
	    ;
	else if (strEQ(infoFile, ".")) {
	    cp = rbGetFileSuffix(optarg);
	    if (rbIsRbSuf(cp)) {
		RbFile *rbf = RbFetch_cachedOpenUrl(parseFileArg(optarg), 0);
		if (rbf)
		    RbInfoHash_mergeFromRbFile(infoHash, rbf);
	    }
	    else {
		char *fn = Mem_alloc(strlen(optarg) + 5 + 1);
		strcpy(fn, optarg);
		cp = rbGetFileSuffix(fn);
		if (rbIsRbSuf(cp) || rbIsHtmlSuf(cp) || rbIsTextSuf(cp))
		    cp[-1] = '\0';
		strcat(fn, ".info");
		RbInfoHash_mergeFromFile(infoHash, fn);
	    }
	}
	else if (rbIsRbSuf(rbGetFileSuffix(infoFile))) {
	    RbFile *rbf = RbFetch_cachedOpenUrl(parseFileArg(infoFile), 0);
	    if (rbf)
		RbInfoHash_mergeFromRbFile(infoHash, rbf);
	}
	else
	    RbInfoHash_mergeFromFile(infoHash, parseFileArg(infoFile));
    }

    if (dumpOptions) {
	outputOptions(argc, argv, extraArgc, extraArgv);
	exit(0);
    }

    if ((urlInfo = RbInfoHash_fetch(infoHash, "URL")) == NULL) {
	char buf[64];
	srand(time(NULL));
	sprintf(buf, "ebook:rbmk-%08lx%04x%04lx", (long)time(NULL),
		(int)(rand()&0xFFFF), (long)getpid());
	urlInfo = Mem_strdup(buf);
	RbInfoHash_store(infoHash, "URL", urlInfo);
    }

    if (rbName) {
	if (strEQ(rbName, "."))
	    rbName = optarg;
	else if (!*rbName) {
	    if (!infoFile)
		rbName = urlInfo + 6;
	    else if (strEQ(infoFile, "."))
		rbName = optarg;
	    else
		rbName = infoFile;
	}
    }
    else
	rbName = urlInfo + 6;

    rbFile = RbMake_mungeBookName(rb_out, rbName);
    rbName = Mem_strdup(rbFile);
    rbName[strlen(rbName+3)] = '\0';

    if (MArray_itemCnt(substRuleUrls)) {
	MBuf *mb;
	const char *msg;
	while ((arg = MArray_fetchPtr(substRuleUrls)) != NULL) {
	    if (strEQ(arg, "-")) {
		if (stdinBuf) {
		    mb = stdinBuf;
		    stdinBuf = NULL;
		}
		else
		    mb = GrabUrl_read("-", NULL);
	    }
	    else
		mb = GrabUrl_read(parseFileArg(arg), NULL);
	    if (!mb)
		RbError_exit("Unable to read substitute-rule file: %s\n", arg);
	    msg = RbMake_addSubstRules(rb_out, mb);
	    if (msg)
		RbError_exit("%s\n", msg);
	    MBuf_delete(mb);
	}
    }

    RbMake_init();
    RbFetch_init(&fetchStatus);
    RbImage_init(&manipulateImage);
    GrabUrl_init(&askForAuthInfo);

    firstArg = true;
    while (1) {
	if (argc)
	    arg = *argv++, argc--;
	else if (extraArgc)
	    arg = *extraArgv++, extraArgc--;
	else
	    break;
	arg = parseFileArg(arg);
	if (rbIsRbSuf(rbGetFileSuffix(arg)))
	    RbFetch_prepareForRbContents(rb_out, arg, unjoin, firstArg);
	else {
	    if (!RbMake_addPageName(rb_out, arg, RB_PAGETYPE_UNKNOWN)) {
		RbError_warn("Filename `%s' was rejected.", rbStripCwd(arg));
		continue;
	    }
	    if (!noExtraMatchRules && RbMake_getFollowLinks(rb_out)) {
		char *uarg = rbUrlUnescape(arg);
		MBuf *mb = Wild_escapeWildcards(uarg);
		char *url = MBuf_dataPtr(mb, NULL);
		if ((cp = strrchr(url, '/')) == NULL)
		    cp = strrchr(url, ':');
		MBuf_truncate(mb, cp - url + 1);
		MBuf_putc(mb, '*');
		MArray_appendPtr(allowedUrls, MBuf_toBuffer(mb, NULL));
		Mem_free(uarg);
	    }
	}
	MArray_appendPtr(argHold, arg);
	firstArg = false;
    }

    if (!RbMake_create(rb_out, rbFile, maxTocCnt))
	exit(1);
    if (*rbFile == '_')
	rbFile = NULL;
    else
	printf("Creating %s:\n", rbFile);

    while ((arg = MArray_fetchPtr(argHold)) != NULL) {
	if (rbIsRbSuf(rbGetFileSuffix(arg)))
	    RbFetch_getRbContents(rb_out, arg, unjoin);
	else
	    RbFetch_getURL(rb_out, NULL, arg, RB_PAGETYPE_UNKNOWN);
    }

    RbFetch_loop();

    htmlDropCnt = RbMake_getHtmlDropCount(rb_out);
    binaryDropCnt = RbMake_getBinaryDropCount(rb_out);
    if (htmlDropCnt | binaryDropCnt) {
	fputs("Had to omit", stdout);
	if (htmlDropCnt) {
	    printf(" %d html page%s", htmlDropCnt, maybePlural(htmlDropCnt));
	    if (binaryDropCnt)
		fputs(" and", stdout);
	}
	if (binaryDropCnt) {
	    printf(" %d binary page%s", binaryDropCnt,
		   maybePlural(binaryDropCnt));
	}
	fputs(".\n", stdout);
    }
    printf("Finishing up %s\n", RbMake_getFileName(rb_out));
    RbMake_finish(rb_out);

    RbFetch_cleanup();
    RbMake_cleanup();

    return 0;
}

/* We must use rbFreeURL() to free the memory from this function. */
static char *
parseFileArg(const char *arg)
{
    const char *cp;
    char *url, *tmp;

    if (strEQ(arg, "-"))
	return rbBuildURL(arg, "");

    for (cp = arg; ISALPHA(*cp); cp++) {}
    if (*cp == ':') {
#ifdef DOS_FILENAMES
	if (cp - arg == 1)
	    goto file_arg;
#endif
	return rbBuildURL(arg, NULL);
    }

    if (webPageArgs && *arg != '/' && *arg != '\\') {
	MBuf *mb = MBuf_new(32, 0);
	MBuf_vwrite(mb, "http://",7, arg,-1, NULL);
	url = rbBuildURL(MBuf_dataPtr(mb, NULL), NULL);
	MBuf_delete(mb);
	return url;
    }

#ifdef DOS_FILENAMES
  file_arg:
#endif
    tmp = rbUrlEscape(arg);
    url = rbBuildURL(tmp, NULL);
    Mem_free(tmp);
    return url;
}

void
parseMenuArg(char *arg)
{
    char *url, *eq = strchr(arg, '=');
    if (!eq)
	return;
    *eq = '\0';
    url = parseFileArg(eq+1);
    if (url)
	RbMake_addMenuItem(rb_out, arg, url);
    *eq = '=';
    rbFreeURL(url);
}

static void
outputStrOpt(const char *name, const char *value, const char *comment)
{
    printf("%s:", name);
    if (value) {
	const char *cp, *dollar;
	putchar(' ');
	for (cp = value; (dollar = strchr(cp, '$')) != NULL; cp = dollar+1)
	    printf("%.*s$", dollar - cp + 1, cp);
	fputs(cp, stdout);
    }
    if (comment)
	printf("  (%s)", comment);
    putchar('\n');
}

static void
outputNumOpt(const char *name, int value, const char *comment)
{
    char buf[32];
    sprintf(buf, "%d", value);
    printf("%s: %s", name, buf);
    if (comment)
	printf("  (%s)", comment);
    putchar('\n');
}

static void
outputBoolOpt(const char *name, bool value)
{
    printf("%s: %s\n", name, value? "yes" : "no");
}

static void
outputListOpt(const char *name, int value, int cnt, char *(*func)(int))
{
    char *pre = "(";

    printf("%s: %s  ", name, func(value));

    for (value = 0; value < cnt; value++) {
	printf("%s%s", pre, func(value));
	pre = ", ";
    }

    printf(")\n");
}

static void
outputOptions(int argc, char *argv[], int xargc, char *xargv[])
{
    RbInfoHash *infoHash;
    MArray *menuItems;
    bool appendStdin = false;
    char *ifn, *rbn, *arg, buf[2048];
    char *opt, *msg;
    int x, len;
    MBuf *mb;

    if ((ifn = infoFile) != NULL && strEQ(ifn, "-"))
	ifn = NULL;
    if ((rbn = rbName) != NULL) {
	if (*rbn)
	    rbn = Mem_strdup(rbn);
	else if (ifn)
	    rbn = Mem_strdup(ifn);
	else
	    rbn = NULL;
	if (rbn) {
	    char *cp = rbGetFileSuffix(rbn);
	    if (rbIsRbSuf(cp) || rbIsHtmlSuf(cp) || rbIsTextSuf(cp)
	     || rbIsInfoSuf(cp))
		cp[-1] = '\0';
	}
    }

    printf("#\
 All comments in the options section must occupy a full line.  If the\n#\
 Substitution-Rule-File option has a value of \"-\" AND there is a dash-\n#\
 divider line present (to indicate where the data begins), then the rules\n#\
 will be read from this file rather than stdin.\n\n");

    outputStrOpt("Book-Filename", rbn, NULL);
    putchar('\n');

    outputStrOpt("Import-Info-From", ifn, NULL);
    infoHash = RbMake_getInfoHash(rb_out);
    mb = RbInfoHash_toMBuf(infoHash);
    if (MBuf_getLength(mb)) {
	while ((len = MBuf_gets(mb, buf, sizeof buf)) > 0)
	    printf("Set-Info: %s", buf);
    }
    else if (!ifn)
	printf("#Set-Info: TITLE=\n#Set-Info: AUTHOR=\n");
    putchar('\n');

    outputStrOpt("Cover-Image", coverImage, NULL);
    if (!argc && !xargc)
	argc = 1;
    while (1) {
	if (argc)
	    arg = *argv++, argc--;
	else if (xargc)
	    arg = *xargv++, xargc--;
	else
	    break;
	outputStrOpt("Input-File", arg, NULL);
    }
    outputBoolOpt("Input-Files-Default-To-Web-Pages", webPageArgs);
    putchar('\n');

    opt = "Follow-Links";
    msg = "no, # of links deep, yes";
    if (RbMake_getFollowLinks(rb_out) <= 0) {
	outputStrOpt(opt, RbMake_getFollowLinks(rb_out) < 0? "yes" : "no",
		     msg);
    }
    else
	outputNumOpt(opt, RbMake_getFollowLinks(rb_out), msg);
    outputListOpt("Include-Images", RbMake_getIncludeImages(rb_out),
		  RB_INCLUDE_OPT_CNT, RbMake_getIncludeOptName);
    outputListOpt("Include-Audio", RbMake_getIncludeAudio(rb_out),
		  RB_INCLUDE_OPT_CNT, RbMake_getIncludeOptName);
    putchar('\n');

    if (MArray_itemCnt(allowedUrls)) {
	MArray_setFetchPos(allowedUrls, 0);
	while ((arg = MArray_fetchPtr(allowedUrls)) != NULL)
	    outputStrOpt("Accept-URLs-Matching", arg, NULL);
    }
    else
	outputStrOpt("Accept-URLs-Matching", NULL, NULL);
    outputBoolOpt("Auto-Accept-Input-File-Dirs", !noExtraMatchRules);
    putchar('\n');

    if (MArray_itemCnt(excludedUrls)) {
	MArray_setFetchPos(excludedUrls, 0);
	while ((arg = MArray_fetchPtr(excludedUrls)) != NULL)
	    outputStrOpt("Exclude-URLs-Matching", arg, NULL);
    }
    else
	outputStrOpt("Exclude-URLs-Matching", NULL, NULL);
    putchar('\n');

    menuItems = RbMake_getMenuItems(rb_out);
    if (MArray_itemCnt(menuItems)) {
	MArray_setFetchPos(menuItems, 0);
	while ((arg = MArray_fetchPtr(menuItems)) != NULL)
	    outputStrOpt("Menu-Item", arg, NULL);
    }
    else
	outputStrOpt("Menu-Item", NULL, NULL);
    putchar('\n');

    opt = "Use-Book-Paragraphs";
    msg = "yes, no, DEPTH";
    if ((x = RbMake_getBookParagraphDepth(rb_out)) <= 1)
	outputStrOpt(opt, x? "yes" : "no", msg);
    else
	outputNumOpt(opt, x, msg);
    opt = "Enhance-Punctuation";
    msg = "yes, no, ['\"-]";
    if ((x = RbMake_getEnhancePunctuation(rb_out)) == RB_ENHANCE_ALL || x == 0)
	outputStrOpt(opt, x? "yes" : "no", msg);
    else {
	*buf = '\0';
	if (x & RB_ENHANCE_SQUOTES)
	    strcat(buf, "'");
	if (x & RB_ENHANCE_DQUOTES)
	    strcat(buf, "\"");
	if (x & RB_ENHANCE_EMDASHES)
	    strcat(buf, "-");
	if (x & RB_ENHANCE_ELLIPSES)
	    strcat(buf, ".");
	outputStrOpt(opt, buf, msg);
    }
    outputBoolOpt("Allow-Old-Style-Page-Breaks",
		  RbMake_getAllowHRSize0PageBreaks(rb_out));
    opt = "Page-Joining";
    msg = "none, all, # HTML pages per joined page";
    if (RbMake_getPageJoining(rb_out) > 1)
	outputNumOpt(opt, RbMake_getPageJoining(rb_out), msg);
    else
	outputStrOpt(opt, RbMake_getPageJoining(rb_out)?"all":"none", msg);
    outputListOpt("Text-Conversion", RbMake_getTextConversion(rb_out),
		  RB_TEXTCONV_OPT_CNT, RbMake_getTextConvOptName);
    opt = "Image-Edge-Enhancement";
    msg = "none, 1 - 9";
    if (MArray_itemCnt(enhanceUrls)) {
	MArray_setFetchPos(enhanceUrls, 0);
	while ((arg = MArray_fetchPtr(enhanceUrls)) != NULL) {
	    sprintf(buf, "%d", MArray_fetch(enhanceUrls));
	    printf("%s: %s = %s\n", opt, buf, arg);
	}
    }
    if (edgeEnhance > 0)
	outputNumOpt(opt, edgeEnhance, msg);
    else
	outputStrOpt(opt, "none", msg);
    putchar('\n');

    outputBoolOpt("Non-Interactive", nonInteractive);
    outputBoolOpt("Prompt-For-Book-Info", promptForBookInfo);
    outputBoolOpt("Make-Dictionary-Index", RbMake_getCreateHkeyFile(rb_out));
    outputBoolOpt("Unjoin-Rb-Files", unjoin);
    outputBoolOpt("Verbose-Output", RbMake_getVerboseOutput(rb_out));
    putchar('\n');

    if (MArray_itemCnt(substRuleUrls)) {
	while ((arg = MArray_fetchPtr(substRuleUrls)) != NULL) {
	    outputStrOpt("Substitution-Rule-File", arg, NULL);
	    if (strEQ(arg, "-"))
		appendStdin = true;
	}
    }
    else
	outputStrOpt("Substitution-Rule-File", NULL, NULL);

    if (appendStdin) {
	fputs("\n--- Substitution-Rule Data Follows ---\n", stdout);
	if (stdinBuf) {
	    fwrite(MBuf_dataPtrAt(stdinBuf, 0, NULL), 1,
		   MBuf_getLength(stdinBuf), stdout);
	}
	else {
	    while (fgets(buf, sizeof buf, stdin))
		fputs(buf, stdout);
	}
    }
}

static char *
argExpandAndRewrite(const char *name, const char *str, MArray *optSubstList)
{
    MBuf *mb;
    int len;

    if (!str)
	return NULL;

    mb = MBuf_new(1024, 0);
    while (*str) {
	if (*str == '$' && *++str != '$') {
	    char *arg;
	    int i;
	    if (*str == '{') {
		i = atoi(str+1) - 1;
		while (*++str && *str != '}') {}
	    }
	    else {
		i = atoi(str) - 1;
		while (ISDIGIT(*str)) str++;
	    }
	    if ((arg = MArray_fetchPtrAt(argsForLoadOptions, i)) != NULL)
		MBuf_puts(mb, arg);
	    continue;
	}
	MBuf_putc(mb, *str++);
    }

    len = MBuf_getLength(mb);
    if (len) {
	char *cp = MBuf_dataPtr(mb, NULL) + len - 1;
	while (len && ISSPACE(*cp)) cp--, len--;
	MBuf_truncate(mb, len);
    }
    if (optSubstList && len) {
	mb = Subst_runRules(optSubstList, name, mb);
	len = MBuf_getLength(mb);
    }
    if (!len) {
	MBuf_delete(mb);
	return NULL;
    }
    return MBuf_toBuffer(mb, NULL);
}

static char **
loadOptions(const char *url, int *cntp)
{
    MArray *argArray = MArray_new(16, 0);
    MArray *optSubstList = NULL;
    MBuf *mb = GrabUrl_read(url, NULL);
    char *line, *nl, *colon, *arg;
    bool sawSubstDash = false;

    if (!mb)
	RbError_exit("Unable to read option file: %s\n", url);

    for (line = MBuf_dataPtr(mb, NULL); line && *line; line = nl) {
	if (*line == '-')
	    break;
	nl = findNextLine(line, true);
	while (ISSPACE(*line)) line++;
	if (*line == '#' || *line == '\0')
	    continue;
	if (*line == '"' || *line == '/' || *line == 'm') {
	    if (!optSubstList)
		optSubstList = MArray_new(16, 0);
	    if (!nl[-1])
		nl[-1] = '\n';
	    else
		nl[-2] = '\r';
	    while (nl && *nl != '-')
		nl = findNextLine(nl, false);
	    if (!nl) {
		RbError_exit("Didn't find dash line following subst section in option file: %s\n",
			     url);
	    }
	    *nl = '\0';
	    arg = Subst_parseRules(optSubstList, line);
	    if (arg)
		RbError_exit("%s\n", arg);
	    nl = findNextLine(nl+1, false);
	    continue;
	}
	if (!(colon = strchr(line, ':')))
	    RbError_exit("Invalid syntax in option file %s:\n%s\n", url, line);
	*colon = '\0';
	for (arg = colon+1; ISSPACE(*arg); arg++) {}
	arg = argExpandAndRewrite(line, arg, optSubstList);
	if (strcaseEQ(line, "Accept-URLs-Matching")) {
	    if (arg) {
		MArray_appendPtr(allowedUrls, rbUrlUnescape(arg));
		Mem_free(arg);
	    }
	}
	else if (strcaseEQ(line, "Allow-Old-Style-Page-Breaks")) {
	    RbMake_setAllowHRSize0PageBreaks(rb_out, arg && strncaseEQ(arg,"y",1));
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Auth-Info")) {
	    if (arg) {
		char *realm, *auth = NULL;
		if ((realm = strchr(arg, '|')) == NULL
		 || (auth = strchr(realm+1, '|')) == NULL)
		    RbError_exit("Invalid Auth-Info syntax\n");
		*realm++ = '\0';
		*auth++ = '\0';
		if (strchr(auth, ':') != NULL) {
		    char buf[384];
		    int len = strlen(auth);
		    if (RB_BASE64_LENGTH(len) >= sizeof buf)
			RbError_exit("Auth-Info has too long of a username + password.\n");
		    strcpy(buf, auth);
		    rbBase64Encode(buf, len);
		    GrabUrl_setAuthInfo(arg, realm, buf);
		}
		else
		    GrabUrl_setAuthInfo(arg, realm, auth);
		Mem_free(arg);
	    }
	}
	else if (strcaseEQ(line, "Auto-Accept-Input-File-Dirs")) {
	    noExtraMatchRules = arg && strncaseNE(arg, "y", 1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Book-Filename")) {
	    rbName = arg;
	    if (rbName && *rbName == ':') {
		Mem_free(rbName);
		rbName = "";
	    }
	}
	else if (strcaseEQ(line, "Cover-Image")) {
	    if ((coverImage = arg) != NULL)
		RbMake_setCoverImage(rb_out, parseFileArg(coverImage));
	    else
		RbMake_setCoverImage(rb_out, NULL);
	}
	else if (strcaseEQ(line, "Enhance-Punctuation")) {
	    if (arg) {
		if (strncaseEQ(arg, "y", 1))
		    RbMake_setEnhancePunctuation(rb_out, RB_ENHANCE_ALL);
		else if (strncaseEQ(arg, "n", 1))
		    RbMake_setEnhancePunctuation(rb_out, 0);
		else {
		    int x = 0;
		    char *cp;
		    for (cp = arg; *cp; cp++) {
			switch (*cp) {
			  case '\'':
			    x |= RB_ENHANCE_SQUOTES;
			    break;
			  case '"':
			    x |= RB_ENHANCE_DQUOTES;
			    break;
			  case '-':
			    x |= RB_ENHANCE_EMDASHES;
			    break;
			  case '.':
			    x |= RB_ENHANCE_ELLIPSES;
			    break;
			}
		    }
		    RbMake_setEnhancePunctuation(rb_out, x);
		}
		Mem_free(arg);
	    }
	    else
		RbMake_setEnhancePunctuation(rb_out, 0);
	}
	else if (strcaseEQ(line, "Exclude-URLs-Matching")) {
	    if (arg) {
		MArray_appendPtr(excludedUrls, rbUrlUnescape(arg));
		Mem_free(arg);
	    }
	}
	else if (strcaseEQ(line, "Follow-Links")) {
	    if (!arg || strncaseEQ(arg, "n", 1))
		RbMake_setFollowLinks(rb_out, 0);
	    else if (ISDIGIT(*arg))
		RbMake_setFollowLinks(rb_out, atoi(arg));
	    else
		RbMake_setFollowLinks(rb_out, -1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "HTTP-Header")) {
	    if (!arg || GrabUrl_setHttpHeader(arg) < 0) {
		*colon = ':';
		RbError_exit("Invalid HTTP-Header option in file %s:\n%s\n",
			     url, line);
	    }
	    Mem_free(arg);
	}
	else if (strcaseEQ(line, "Image-Edge-Enhancement")
	      || strcaseEQ(line, "Image-Edge-Enhancment")) {
	    if (arg) {
		int e = *arg != '('? atoi(arg) : DEFAULT_EDGE_ENHANCE;
		char *cp = arg;
		if (e < 0 || e > 9)
		    RbError_exit("Image-Edge-Enhancement must be 0 - 9 or \"none\"\n");
		while (*cp && !ISSPACE(*cp) && *cp != '=' && *cp != '(') cp++;
		while (ISSPACE(*cp)) cp++;
		if (*cp == '=') {
		    do cp++; while (ISSPACE(*cp));
		    MArray_appendPtr(enhanceUrls, cp);
		    MArray_append(enhanceUrls, e);
		}
		else {
		    edgeEnhance = e;
		    Mem_free(arg);
		}
	    }
	    else
		edgeEnhance = DEFAULT_EDGE_ENHANCE;
	}
	else if (strcaseEQ(line, "Import-Info-From")) {
	    infoFile = arg;
	}
	else if (strcaseEQ(line, "Include-Audio")) {
	    if (arg) {
		int num;
		if ((num = RbMake_findIncludeOpt(arg, 1)) >= 0)
		    RbMake_setIncludeAudio(rb_out, num);
		else
		    RbMake_setIncludeAudio(rb_out, RB_INCLUDE_NO);
		Mem_free(arg);
	    }
	    else
		RbMake_setIncludeAudio(rb_out, RB_INCLUDE_NO);
	}
	else if (strcaseEQ(line, "Include-Images")) {
	    if (arg) {
		int num;
		if ((num = RbMake_findIncludeOpt(arg, 1)) >= 0)
		    RbMake_setIncludeImages(rb_out, num);
		else
		    RbMake_setIncludeImages(rb_out, RB_INCLUDE_NO);
		Mem_free(arg);
	    }
	    else
		RbMake_setIncludeImages(rb_out, RB_INCLUDE_NO);
	}
	else if (strcaseEQ(line, "Input-File")) {
	    if (arg)
		MArray_appendPtr(argArray, arg);
	}
	else if (strcaseEQ(line, "Input-Files-Default-To-Web-Pages")) {
	    webPageArgs = arg && strncaseEQ(arg, "y", 1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Make-Dictionary-Index")) {
	    RbMake_setCreateHkeyFile(rb_out, arg && strncaseEQ(arg, "y", 1));
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Menu-Item")) {
	    if (arg) {
		parseMenuArg(arg);
		Mem_free(arg);
	    }
	}
	else if (strcaseEQ(line, "Non-Interactive")) {
	    nonInteractive = arg && strncaseEQ(arg, "y", 1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Page-Joining")
	      || strcaseEQ(line, "Page-Joining-Mode")) {
	    if (arg) {
		if (ISDIGIT(*arg))
		    RbMake_setPageJoining(rb_out, atoi(arg));
		else
		    RbMake_setPageJoining(rb_out, (*arg == 'a'));
		Mem_free(arg);
	    }
	    else
		RbMake_setPageJoining(rb_out, 0);
	}
	else if (strcaseEQ(line, "Prompt-For-Book-Info")
	      || strcaseEQ(line, "Prompt-For-Information")) {
	    promptForBookInfo = arg && strncaseEQ(arg, "y", 1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Set-Info")) {
	    if (arg) {
		char *cp = strchr(arg, '=');
		if (!cp) {
		    *colon = ':';
		    RbError_exit("Invalid Set-Info option in file %s:\n%s\n",
				 url, line);
		}
		*cp = '\0';
		RbInfoHash_store(RbMake_getInfoHash(rb_out), arg, cp+1);
		Mem_free(arg);
	    }
	}
	else if (strcaseEQ(line, "Substitution-Rule-File")) {
	    if (arg) {
		MArray_appendPtr(substRuleUrls, arg);
		if (strEQ(arg, "-"))
		    sawSubstDash = true;
	    }
	}
	else if (strcaseEQ(line, "Text-Conversion")
	      || strcaseEQ(line, "Text-Conversion-Mode")) {
	    if (arg) {
		int num;
		if ((num = RbMake_findTextConvOpt(arg, 1)) >= 0)
		    RbMake_setTextConversion(rb_out, num);
		else if (*arg == '(')
		    RbMake_setTextConversion(rb_out, RB_TEXTCONV_PRE);
		else
		    RbError_exit("Unknown Text-Conversion type: %s\n", arg);
		Mem_free(arg);
	    }
	    else
		RbMake_setTextConversion(rb_out, RB_TEXTCONV_NONE);
	}
	else if (strcaseEQ(line, "Use-Book-Paragraphs")) {
	    if (!arg || strncaseEQ(arg, "n", 1))
		RbMake_setBookParagraphDepth(rb_out, 0);
	    else if (ISDIGIT(*arg))
		RbMake_setBookParagraphDepth(rb_out, atoi(arg));
	    else
		RbMake_setBookParagraphDepth(rb_out, 1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Unjoin-Rb-Files")) {
	    unjoin = arg && strncaseEQ(arg, "y", 1);
	    if (arg)
		Mem_free(arg);
	}
	else if (strcaseEQ(line, "Verbose-Output")) {
	    RbMake_setVerboseOutput(rb_out, arg && strncaseEQ(arg, "y", 1));
	    if (arg)
		Mem_free(arg);
	}
	else {
	    *colon = ':';
	    RbError_exit("Unknown options line in file %s:\n%s\n", url, line);
	}
    }

    if (sawSubstDash) {
	if (line && *line == '-' && (nl = findNextLine(line,false)) != NULL) {
	    int pos = nl - MBuf_dataPtr(mb, NULL);
	    int len = MBuf_getLength(mb) - pos;
	    MBuf_memcpy(mb, 0, pos, len);
	    MBuf_truncate(mb, len);
	    stdinBuf = mb;
	}
	else {
	    readSubstFromStdin = true;
	    MBuf_delete(mb);
	}
    }
    else
	MBuf_delete(mb);

    MArray_truncate(argsForLoadOptions, 0);

    *cntp = MArray_itemCnt(argArray);
    return (char**)MBuf_toBuffer(argArray, NULL);
}

static char *
findNextLine(char *bp, bool nullTerminate)
{
    char *nl;

    for (nl = bp; ; nl++) {
	if (!*nl) {
	    nl = NULL;
	    break;
	}
	if (*nl == '\r') {
	    if (nullTerminate)
		*nl = '\0';
	    if (nl[1] == '\n')
		nl++;
	    break;
	}
	if (*nl == '\n') {
	    if (nullTerminate)
		*nl = '\0';
	    break;
	}
    }
    return nl + 1;
}

/* This function is a call-back from the HTML parser. */
static bool
shouldAllowURL(RbPage *pg, const char *url, int pt)
{
    const char *wild;
    char *uurl = rbUrlUnescape(url);
    RbMake *rb = RbPage_getRbMake(pg);

    MArray_setFetchPos(excludedUrls, 0);
    while ((wild = MArray_fetchPtr(excludedUrls)) != NULL) {
	if (Wild_EQ(wild, uurl)) {
	    Mem_free(uurl);
	    return false;
	}
    }
    if ((pt == RB_PAGETYPE_IMAGE && RbMake_getIncludeImages(rb) != RB_INCLUDE_MATCH)
     || (pt == RB_PAGETYPE_AUDIO && RbMake_getIncludeAudio(rb) != RB_INCLUDE_MATCH))
	return true;
    if (maxPageCnt
     && RbMake_getPageCount(rb) - RbMake_getBinaryCount(rb) >= maxPageCnt)
	return false;
    MArray_setFetchPos(allowedUrls, 0);
    while ((wild = MArray_fetchPtr(allowedUrls)) != NULL) {
	if (Wild_EQ(wild, uurl)) {
	    Mem_free(uurl);
	    return true;
	}
    }
    Mem_free(uurl);
    return false;
}

/* This function is a call-back from the HTML parser. */
static void
scheduleURL(RbMake *rb, RbPage *pg, const char *url, int pt)
{
    RbFetch_getURL(rb, pg, url, pt);
}

static const char *
askForAuthInfo(const char *url, const char *realm)
{
    char buf[384], *cp;
    int len, ulen, plen;
#ifdef HAVE_GETPASS
    char *pass;
#else
    char pass[128];
#endif
    FILE *fp;

    if (nonInteractive)
	return NULL;

    fp = fopen("/dev/tty", "r");
    if (!fp)
	fp = stdin;
    if (url) {
	fprintf(stderr, "Authorization required for realm `%s' to access:\n%s\n",
		realm, url);
    }
    fprintf(stderr, "\nUsername: ");
    if (!fgets(buf, (sizeof buf) / 3, fp)) {
	putchar('\n');
	return NULL;
    }
    if ((cp = strchr(buf, '\n')) != NULL)
	*cp = '\0';

#ifdef HAVE_GETPASS
    if (!(pass = getpass("Password: "))) {
	putchar('\n');
	return NULL;
    }
#else
    fprintf(stderr, "Password: ");
    if (!fgets(pass, sizeof pass, fp)) {
	putchar('\n');
	return NULL;
    }
    if ((cp = strchr(pass, '\n')) != NULL)
	*cp = '\0';
#endif
    putchar('\n');

    ulen = strlen(buf);
    plen = strlen(pass);
    len = ulen + 1 + plen;
    if (RB_BASE64_LENGTH(len) >= sizeof buf)
	RbError_exit("Username + password is too long.\n");
    sprintf(buf + ulen, ":%s", pass);
    for (cp = pass + plen; cp != pass; ) *--cp = '\0';
    rbBase64Encode(buf, len);
    buf[RB_BASE64_LENGTH(len)] = '\0';

    return url? GrabUrl_setAuthInfo(url, realm, buf) : Mem_strdup(buf);
}

static void
finishInfoPage(RbPage *pg, RbInfoHash *infoHash)
{
    char *cp, buf[1024];
    RbMake *rb = RbPage_getRbMake(pg);
    char *title = RbInfoHash_asciiFetch(infoHash, "TITLE");
    char *author = RbInfoHash_asciiFetch(infoHash, "AUTHOR");

    if (!rbFile) {
	rbFile = RbMake_getFileName(rb);
	printf("Creating %s:\n", rbFile);
	Mem_free(rbName);
	rbName = Mem_strdup(rbFile);
	rbName[strlen(rbName+3)] = '\0';
    }
    if (promptForBookInfo) {
	FILE *fp = fopen("/dev/tty", "r");
	if (!fp)
	    fp = stdin;
	fprintf(stderr, "\nTitle?  [%s] ", title);
	if (fgets(buf, sizeof buf, stdin) && *buf != '\n') {
	    if ((cp = strchr(buf, '\n')) != NULL)
		*cp = '\0';
	    RbInfoHash_store(infoHash, "TITLE", buf);
	}
	fprintf(stderr, "Author?  [%s] ", author);
	if (fgets(buf, sizeof buf, stdin) && *buf != '\n') {
	    if ((cp = strchr(buf, '\n')) != NULL)
		*cp = '\0';
	    RbInfoHash_store(infoHash, "AUTHOR", buf);
	}
	putchar('\n');
    }
    else {
	printf("Title: %s\n", title);
	printf("Author: %s\n", author);
    }

    if (outputInfoFile) {
	MBuf *mb = RbInfoHash_toMBuf(infoHash);
	MBuf *fn = MBuf_new(128, 0);
	FILE *fp;
	MBuf_vwrite(fn, rbName,-1, ".info",5, NULL);
	if ((fp = fopen(MBuf_dataPtr(fn, NULL), "w")) != NULL) {
	    while (1) {
		int cnt = MBuf_getLength(mb);
		char *bp = MBuf_dataPtr(mb, &cnt);
		if (bp == NULL)
		    break;
		fwrite(bp, 1, cnt, fp);
	    }
	    fclose(fp);
	}
	MBuf_delete(fn);
	MBuf_delete(mb);
    }
    Mem_free(title);
    Mem_free(author);
}

/* This function is a call-back from the page-fetching library routines. */
static void
fetchStatus(RbMake *rb, const char *tocname, const char *url,
	    const char *errstr, int joinedCnt)
{
    if (errstr)
	printf("ERROR: %s: %s\n", errstr, rbStripCwd(url));
    else if (joinedCnt > 0)
	printf("Wrote %d pages as %s\n", joinedCnt, tocname);
    else {
	printf("%s %s (%d of %d)\n", joinedCnt? "Got" : "Wrote",
	    tocname, RbMake_getDoneCount(rb) + 1, RbMake_getPageCount(rb));
    }
}

static void
manipulateImage(RbImage *img, const char *url)
{
    char *uurl = rbUrlUnescape(url);

    if (MArray_itemCnt(enhanceUrls)) {
	char *wild;
	MArray_setFetchPos(enhanceUrls, 0);
	while ((wild = MArray_fetchPtr(enhanceUrls)) != NULL) {
	    int e = MArray_fetch(enhanceUrls);
	    if (Wild_EQ(wild, url)) {
		Mem_free(uurl);
		RbImage_enhanceGray(img, e);
		return;
	    }
	}
    }
    Mem_free(uurl);
    RbImage_enhanceGray(img, edgeEnhance);
}

static const char *
maybePlural(unsigned int val)
{
    return val == 1? "" : "s";
}

static void
myFatalError(const char *msg, va_list ap)
{
    const char *fn;
    if (rb_out && (fn = RbMake_getNewFileName(rb_out)) != NULL)
	unlink(fn);
    vfprintf(stderr, msg, ap);
}

static void
myWarning(const char *msg, va_list ap)
{
    vfprintf(stderr, msg, ap);
}

static void
usage()
{
    printf("\
Usage: rbmake [-OPTIONS] ARG [...]\n\
\n\
-a* Specify the book's author.  See the -t option for an example.\n\
-b  Book-style paragraphs (instead of web-style).\n\
-c* Cover image: the specified image will be the first thing placed in\n\
    the first page of HTML.  This can be used even without specifying -i.\n\
-D  Dump an option file to stdout and quit without doing any other work.\n\
-e  Enhance the punctuation using improved quotes and dashes.\n\
-E# Edge-enhance images: none or 1 - 9 (9 = lots, default is 7).\n\
-f* Follow links in the HTML documents to find new ones to include.  If * is\n\
    a number, we follow links up to N links away from a user-specified page.\n\
    Otherwise specify \"yes\" for unlimited depth, or '0' for no following.\n\
-g* Add a menu-item to the ReB's \"Go To\" menu.\n\
-h  Print this help message.\n\
-i  Include images and audio (.wav) files.\n\
-I* Specify a .info file to use as the basis for creating the new info page.\n\
    If you specify '-', rbmake will read from stdin.  If you specify '.', the\n\
    first ARG will be used to find the info information.  You can specify the\n\
    name of a .rb file here if you want the .info page read from there.\n\
-j  Join all the pages into a single, unified HTML page.\n\
-J# Join every # pages together into a unified HTML page.\n\
-k  Generate a .hkey (dictionary index) for the root .html page.\n\
-l* Load the indicated option file.  See -D for how to easily create one.\n\
-L* Specify an arg for an option file.  E.g.: rbmake -L arg1 -L arg2 -l foo.opt\n\
-m* Add the URL prefix to the match-list that determines which links we\n\
    follow.  Maybe be repeated as many times as needed.  Implies -f.\n\
-M* This works just like -m, plus it indicates that the -m/-M options are the\n\
    only match-items that should be used (normally each ARG item adds its\n\
    path to the match-list).\n\
-n  Non-interactive mode (e.g. avoid username/password prompting).\n\
-N# Specify the maximum # of pages that we should fetch.\n\
-o* Specify the output name of the .rb file (default: the URL-name).\n\
    If you specify '.', the name portion of the first ARG will be used.\n\
-O  The output name for the .rb file will be the name from the -I option.\n\
-p  Prompt for the title and author information (not affected by -n).\n\
-P  Prompt for a username and password and output them Base-64 encoded.\n\
-R  Rewrite a .rb file (which must be the first argument).  Using \"-R foo.rb\"\n\
    is short for \"-OI foo.rb foo.rb\".  Note: images are stripped without -i.\n\
-s* Read text-substitution rules from the indicated file/url (\"-\" = stdin).\n\
-t* Specify the book's title.  You'll probably need to quote the string.  For\n\
    example:  rbmake -pib -t 'War and Peace' -a 'Tolstoy, Leo' wp.html\n\
-T* Specify the .txt translation mode.  p=preformatted (default), s=simple\n\
    paragraph translation (i.e. 2 newlines indicate a new paragraph).\n\
-u  Unjoin the pages from inside an rbmake-joined .rb file.\n\
-U* Specify the value of the (.info) URL-name (default: a unique value).\n\
-v  Output verbosely about problems found in the HTML.\n\
-V  Output the version of rbmake.\n\
-w  ARG names default to web pages (http://) rather than files (file:$CWD).\n\
-W  Write the created .info page using the .rb file's name & a .info suffix.\n\
-x* Exclude the matching URLs from being included (both links and images).\n\
    May be repeated as many times as needed.\n\
-z  Allow <HR SIZE=0> to specify a page break (in addition to <HR NEW-PAGE>).\n\
\n\
Note that ARG can be an existing .rb file, in which case its contents are\n\
included in the .rb file being created.\n\
");
    exit(0);
}
