/* rbfetch.c
 *
 * These routines provide a simple mechanism for reading and/or fetching
 * files for inclusion in a .rb file.  The code can either use the nano
 * HTTP accessors from libxml or the fetching support from libwww.
 * Someone wishing to add other access methods could easily supersede
 * these routines and directly build the RbPage objects.
 *
 * This file uses some user-supplied call-back routines (see rbfetch.h).
 */
/* This software is copyrighted as detailed in the LICENSE file. */

#include <config.h>
#include <rbmake/rbmake.h>
#include <ctype.h>
#ifdef USE_LIBWWW
#include <WWWLib.h>
#include <WWWInit.h>
#include <HTAncMan.h>
#include <HTStream.h>
#else
#include <libxml/nanohttp.h>
#endif
#include "rbmake.h"
#include "rbfile.h"
#include "rbpage.h"
#include "mbuf.h"

static MArray *textItems, *binaryItems;
static RbFetchStatusFunc fetchStatus;

static void getURL(RbPage *pg);
static void loopUntilDone(void);
static void finishFetch(RbPage *pg, const char *errstr);
#ifdef USE_LIBWWW
static int finishRequest(HTRequest *req, HTResponse *response, void *param, int status);
static HTStream *PageStream_new(RbPage *pageObj);
#endif

void
RbFetch_init(RbFetchStatusFunc statFunc)
{
    if (!textItems) {
	textItems = MArray_new(32, 4096);
	binaryItems = MArray_new(32, 4096);

#ifdef USE_LIBWWW
	HTEventInit();
	HTProfile_newNoCacheClient("rbmake", "1.0");
	HTNet_addAfter(finishRequest, NULL, NULL, HT_ALL, HT_FILTER_LAST);
	HTAlert_setInteractive(NO);
	HTHost_setEventTimeout(15000); /* 15 seconds */
#else
	xmlNanoHTTPInit();
#endif
    }
    fetchStatus = statFunc;
}

void
RbFetch_cleanup()
{
    if (textItems) {
	MArray_delete(textItems);
	MArray_delete(binaryItems);
	textItems = NULL;

#ifdef USE_LIBWWW
	HTEventTerminate();
	HTProfile_delete();
#else
	xmlNanoHTTPCleanup();
#endif
    }
}

void
RbFetch_loop()
{
#ifdef USE_LIBWWW
    HTEventList_newLoop();
#else
    loopUntilDone();
#endif
    RbFetch_cachedOpenUrl(NULL, 0);
}

static void
loopUntilDone()
{
    RbPage *pg;

    while ((pg = MArray_fetchPtr(textItems)) != NULL)
	getURL(pg);

    while ((pg = MArray_fetchPtr(binaryItems)) != NULL)
	getURL(pg);
}

void
RbFetch_getURL(RbMake *rb, RbPage *fromPage, const char *url, int pt)
{
    RbPage *pg;

    if (pt == RB_PAGETYPE_UNKNOWN)
	pt = rbUrlToPageType(url);
    if (pt == RB_PAGETYPE_RB) {
	RbFetch_getRbContents(rb, url, false);
	return;
    }

    if (!(pg = RbPage_new(rb, fromPage, url, pt)))
	return;
    switch (pg->type) {
      case RB_PAGETYPE_HTML:
      case RB_PAGETYPE_TEXT:
      case RB_PAGETYPE_MAYBE_HTML:
      case RB_PAGETYPE_RAW_TEXT:
#ifdef USE_LIBWWW
	getURL(pg);
#else
	MArray_appendPtr(textItems, pg);
#endif
	break;
      case RB_PAGETYPE_IMAGE:
      case RB_PAGETYPE_COVER_IMAGE:
      case RB_PAGETYPE_AUDIO:
	MArray_appendPtr(binaryItems, pg);
	break;
      default:
	RbPage_drop(pg);
	break;
    }
}

static void
getURL(RbPage *pg)
{
#ifdef USE_LIBWWW
    HTRequest *req = HTRequest_new();
    HTRequest_setContext(req, pg);
    HTRequest_setOutputFormat(req, WWW_SOURCE);
    HTLoadToStream(pg->url, PageStream_new(pg), req);
#else
    const char *err, *fn;
    char buf[2048];
    int cnt;
    if ((fn = rbGetUrlFn(pg->url)) != NULL) {
	FILE *fp = fopen(fn, "rb");
	Mem_free((void*)fn);
	if (!fp) {
	    finishFetch(pg, "unable to open file");
	    return;
	}
	while ((cnt = fread(buf, 1, sizeof buf, fp)) > 0) {
	    if ((err = RbPage_appendContent(pg, buf, cnt)) != NULL) {
		fclose(fp);
		finishFetch(pg, err);
		return;
	    }
	}
	RbPage_finishContent(pg);
	fclose(fp);
    }
    else {
	char *contentType = NULL;
	void *ctxt;

	GrabUrl_guessAuthHeader(pg->url);

      try_http:
	ctxt = xmlNanoHTTPMethod(pg->url, NULL, NULL, &contentType,
				 GrabUrl_getHttpHeaders(), 0);
	if (!ctxt) {
	    if (contentType)
		xmlFree(contentType);
	    finishFetch(pg, "unable to fetch web page");
	    return;
	}
	if (xmlNanoHTTPReturnCode(ctxt) / 100 != 2) {
	    if (contentType)
		xmlFree(contentType);
	    if (GrabUrl_askForAuthInfo(pg->url, ctxt)) {
		contentType = NULL;
		goto try_http;
	    }
	    else {
		switch (xmlNanoHTTPReturnCode(ctxt)) {
		  case 401:
		    finishFetch(pg, "no authorization");
		    break;
		  case 407:
		    finishFetch(pg, "no proxy authorization");
		    break;
		  default:
		    finishFetch(pg, "fetch failed");
		    break;
		}
	    }
	    return;
	}
	if (pg->type == RB_PAGETYPE_MAYBE_HTML) {
	    if (contentType && strncaseEQ(contentType, "text", 4)
	     && (contentType[4] == '/' || !contentType[4])) {
		if (contentType[4] == '/'
		 && strncaseEQ(contentType+5, "html", 4))
		    RbPage_changeType(pg, RB_PAGETYPE_HTML);
		else
		    RbPage_changeType(pg, RB_PAGETYPE_TEXT);
	    }
	}
	if (contentType)
	    xmlFree(contentType);
	while ((cnt = xmlNanoHTTPRead(ctxt, buf, sizeof buf)) > 0) {
	    if ((err = RbPage_appendContent(pg, buf, cnt)) != NULL) {
		xmlNanoHTTPClose(ctxt);
		finishFetch(pg, err);
		return;
	    }
	}
	RbPage_finishContent(pg);
	xmlNanoHTTPClose(ctxt);
    }
    finishFetch(pg, NULL);
#endif
}

RbFile *
RbFetch_cachedOpenUrl(const char *url, int openFlags)
{
    static RbFile *rbIn;
    static int lastFlags;
    const char *fn;

    if (!url) {
	if (rbIn) {
	    RbFile_close(rbIn);
	    rbIn = NULL;
	}
	return NULL;
    }

    if ((fn = rbGetUrlFn(url)) == NULL) {
	RbError_exit("Unable to open %s:\n"
		     "RbFetch_cachedOpenUrl() only supports local "
		     ".rb files at present.\n", url);
    }

    if (rbIn) {
	if (openFlags == lastFlags && strEQ(fn, rbIn->fileName)) {
	    Mem_free((void*)fn);
	    return rbIn;
	}
	RbFile_close(rbIn);
    }

    lastFlags = openFlags;
    if (!(rbIn = RbFile_open(fn, openFlags)))
	RbError_warn("RbFetch_cachedOpenUrl() failed to open: %s\n", url);
    Mem_free((void*)fn);

    return rbIn;
}

int
RbFetch_prepareForRbContents(RbMake *rb, const char *url, bool unjoin,
			     bool mergeInfo)
{
    RbFile *rbf = RbFetch_cachedOpenUrl(url,
			      (rb->includeImages? RB_OPENFLAG_INCLUDE_IMAGES : 0)
			    | (rb->includeAudio? RB_OPENFLAG_INCLUDE_AUDIO : 0)
			    | (unjoin? RB_OPENFLAG_UNJOIN : 0));
    const char *cp;
    MBuf *tmpBuf;
    ToC *toc;

    if (!rbf)
	return -1;
    if (mergeInfo)
	RbInfoHash_mergeFromRbFile(rb->infoHash, rbf);
    tmpBuf = MBuf_new(64, 0);
    for (toc = rbf->tocHead; toc; toc = toc->next) {
	if (toc->flags & RB_TOCFLAG_INFOPAGE)
	    continue;
	if ((cp = strchr(url, ':')) == NULL)
	    cp = url;
	while (*++cp == '/') {}
	MBuf_truncate(tmpBuf, 0);
	MBuf_vwrite(tmpBuf, "rb://",5, cp,-1, "/",1, toc->name,-1, NULL);
	if (!RbMake_addPageName(rb, MBuf_dataPtr(tmpBuf, NULL), toc->type))
	    break;
    }
    MBuf_delete(tmpBuf);

    return 0;
}

void
RbFetch_getRbContents(RbMake *rb, const char *url, bool unjoin)
{
    RbFile *rbf = RbFetch_cachedOpenUrl(url,
			      (rb->includeImages? RB_OPENFLAG_INCLUDE_IMAGES : 0)
			    | (rb->includeAudio? RB_OPENFLAG_INCLUDE_AUDIO : 0)
			    | (unjoin? RB_OPENFLAG_UNJOIN : 0));
    const char *cp;
    MBuf *tmpBuf;
    ToC *toc;

    tmpBuf = MBuf_new(64, 0);
    for (toc = rbf->tocHead; toc; toc = toc->next) {
	if (toc->flags & RB_TOCFLAG_INFOPAGE)
	    continue;
	if ((cp = strchr(url, ':')) == NULL)
	    cp = url;
	while (*++cp == '/') {}
	MBuf_truncate(tmpBuf, 0);
	MBuf_vwrite(tmpBuf, "rb://",5, cp,-1, "/",1, toc->name,-1, NULL);
	RbFetch_getRbPage(rb, rbf, toc, MBuf_dataPtr(tmpBuf, NULL));
    }
}

void
RbFetch_getRbPage(RbMake *rb, RbFile *rbf, ToC *toc, const char *url)
{
    RbPage *pg = RbPage_new(rb, NULL, url, toc->type);

    pg->charEncoding = XML_CHAR_ENCODING_8859_1;
    pg->tocFlags = (pg->tocFlags & ~0xFF) | toc->flags
		 | (rbf->tocUnjoin? RB_TOCFLAG_DEMANGLE : 0)
		 | RB_TOCFLAG_HR_SIZE_0_OK;
    pg->convertImage = false;
    RbFile_readPage(rbf, toc, pg,
		    (void (*)(void*,const char*,int))RbPage_appendContent);
    RbPage_finishContent(pg);
    finishFetch(pg, NULL);
}

static void
finishFetch(RbPage *pg, const char *errstr)
{
    RbMake *rb = pg->rb;
    char *tn = Mem_strdup(pg->tocName);
    char *url = Mem_strdup(pg->url);
    int joinOrd = pg->tagTreeRoot? pg->joinOrd : 0;
    JoinGroup *jg = NULL;

    if (joinOrd)
	jg = pg->joinGroup;

    if (!errstr)
	errstr = RbPage_write(pg);

    if (joinOrd) {
	char *cp = strchr(tn, '#');
	char *pn = Mem_alloc(strlen(cp+1) + 5 + 1);
	sprintf(pn, "%s.html", cp+1);
	fetchStatus(rb, pn, url, errstr, -1);
	if (errstr)
	    RbPage_drop(pg);
	if (jg->todo == 0) {
	    rb->doneCnt++;
	    *cp = '\0';
	    fetchStatus(rb, tn, NULL, NULL, jg->pageCount);
	    *cp = '#';
	}
	else
	    rb->joinCnt++;
	Mem_free(pn);
    }
    else {
	fetchStatus(rb, tn, url, errstr, 0);
	if (errstr)
	    RbPage_drop(pg);
	rb->doneCnt++;
    }

#ifdef USE_LIBWWW
    if (rb->htmlCnt == rb->doneCnt)
	loopUntilDone();

    if (rb->htmlCnt + rb->binaryCnt == rb->doneCnt)
	HTEventList_stopLoop();
#endif
}

#ifdef USE_LIBWWW
static int
finishRequest(HTRequest *req, HTResponse *response, void *param, int status)
{
    RbPage *pg = (RbPage*)HTRequest_context(req);
    char *err;

    HTRequest_delete(req);

    if (status == HT_LOADED) {
	if (pg->type == RB_PAGETYPE_MAYBE_HTML) {
	    HTFormat f = HTResponse_format(response);
	    if (f == WWW_HTML)
		RbPage_changeType(pg, RB_PAGETYPE_HTML);
	    else if (f == WWW_PLAINTEXT)
		RbPage_changeType(pg, RB_PAGETYPE_TEXT);
	}
	err = NULL;
    }
    else
	err = "unable to fetch file";

    finishFetch(pg, err);

    return 1;
}

/* Take data from a stream and give it to the RbPage. */

typedef struct {
    const HTStreamClass *isa;
    RbPage *page;
} PageStream;

static int
PageStream_putblk(HTStream *s, const char *bp, int len)
{
    PageStream *me = (PageStream*)s;
    RbPage_appendContent(me->page, bp, len);
    return HT_OK;
}

static int
PageStream_putch(HTStream *s, char ch)
{
    PageStream *me = (PageStream*)s;
    RbPage_appendContent(me->page, &ch, 1);
    return HT_OK;
}

static int
PageStream_putstr(HTStream *s, const char *str)
{
    PageStream *me = (PageStream*)s;
    RbPage_appendContent(me->page, str, strlen(str));
    return HT_OK;
}

static int
PageStream_flush(HTStream *s)
{
    PageStream *me = (PageStream*)s;
    RbPage_finishContent(me->page);
    return HT_OK;
}

static int
PageStream_free(HTStream *s)
{
    PageStream *me = (PageStream*)s;
    RbPage_finishContent(me->page);
    Mem_free(me);
    return HT_OK;
}

static int
PageStream_abort(HTStream *me, HTList *e)
{
    PageStream_free(me);
    return HT_ERROR;
}

static const HTStreamClass PageStreamClass = {		
    "PageStream",
    PageStream_flush,
    PageStream_free,
    PageStream_abort,
    PageStream_putch,
    PageStream_putstr,
    PageStream_putblk
}; 

static HTStream *
PageStream_new(RbPage *pageObj)
{
    PageStream *me;
    if ((me = (PageStream*)HT_CALLOC(1, sizeof *me)) == NULL)
	HT_OUTOFMEM("newPageStream");
    me->isa = &PageStreamClass;
    me->page = pageObj;
    return (HTStream*)me;
}

#endif /* USE_LIBWWW */
