/*	$Id: nroff_smartypants.c,v 1.15 2017/09/10 00:26:01 kristaps Exp $ */
/*
 * Copyright (c) 2008, Natacha Porté
 * Copyright (c) 2011, Vicent Martí
 * Copyright (c) 2014, Xavier Mendez, Devin Torres and the Hoedown authors
 * Copyright (c) 2016, Kristaps Dzonsons
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include "config.h"

#include <sys/queue.h>

#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "lowdown.h"
#include "extern.h"

/*
 * Remember which kind of quote we're currently within: whether we've
 * already printed an "opening" single or double quote.
 */
struct 	sm_dat {
	int 	 in_squote;
	int 	 in_dquote;
};

typedef	size_t (*sm_cb_ptr)(hbuf *, struct sm_dat *,
	char, const char *, size_t);

static size_t sm_cb_amp(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_backtick(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_dash(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_dot(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_dquote(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_esc(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_number(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_parens(hbuf *, struct sm_dat *,
	char, const char *, size_t);
static size_t sm_cb_squote(hbuf *, struct sm_dat *,
	char, const char *, size_t);

static	sm_cb_ptr sm_cb_ptrs[] = {
	NULL,		/* 0 */
	sm_cb_dash,	/* 1 */
	sm_cb_parens,	/* 2 */
	sm_cb_squote,	/* 3 */
	sm_cb_dquote,	/* 4 */
	sm_cb_amp,	/* 5 */
	sm_cb_esc,	/* 6 */
	sm_cb_number,	/* 7 */
	sm_cb_dot,	/* 8 */
	sm_cb_backtick, /* 9 */
	NULL,		/* 10 */
};

static const int sm_cb_chars[UINT8_MAX+1] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* nul -- si */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* dle -- us */
	0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 8, 0, /* sp -- / */
	0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 -- ? */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @ -- O */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, /* P -- _ */
	9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ` -- o */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* p -- del */
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

static int
word_boundary(char c)
{

	return c == 0 || isspace(c) || ispunct(c);
}

/*
 * If 'text' begins with any kind of single quote (e.g. "'" or "&apos;"
 * etc.), returns the length of the sequence of characters that makes up
 * the single- quote.  Otherwise, returns zero.
 */
static size_t
squote_len(const char *text, size_t size)
{
	const char	 **p;
	size_t		   len;
	static const char *single_quote_list[] = {
		"'", "&#39;", "&#x27;", "&apos;", NULL
	};

	for (p = single_quote_list; *p; ++p) {
		len = strlen(*p);
		if (size >= len && memcmp(text, *p, len) == 0)
			return len;
	}

	return 0;
}

/*
 * Converts " or ' at very beginning or end of a word to left or right
 * quote.
 */
static int
sm_quotes(hbuf *ob, char previous_char,
	char next_char, char quote, int *is_open)
{

	if (*is_open && !word_boundary(next_char))
		return 0;

	if (!(*is_open) && !word_boundary(previous_char))
		return 0;

	assert('d' == quote || 's' == quote);

	if ('d' == quote)
		hbuf_puts(ob, *is_open ? "\\(rq" : "\\(lq");
	else
		hbuf_puts(ob, *is_open ? "\\(cq" : "\\(oq");

	*is_open = !(*is_open);
	return 1;
}

/*
 * Converts ' to left or right single quote; but the initial ' might be
 * in different forms, e.g. &apos; or &#39; or &#x27;.
 * 'squote_text' points to the original single quote, and 'squote_size'
 * is its length.  "text" points at the last character of the
 * single-quote, e.g. "'" or ";".
 */
static size_t
sm_squote(hbuf *ob, struct sm_dat *smrt, char previous_char,
	const char *text, size_t size,
	const char *squote_text, size_t squote_size)
{
	char	 t1, t2, next_char;
	size_t	 next_squote_len;

	if (size >= 2) {
		t1 = tolower((int)text[1]);
		next_squote_len = squote_len(text+1, size-1);

		/* convert '' to &ldquo; or &rdquo; */
		if (next_squote_len > 0) {
			next_char = (size > 1+next_squote_len) ?
				text[1+next_squote_len] : 0;
			if (sm_quotes(ob, previous_char,
			    next_char, 'd', &smrt->in_dquote))
				return next_squote_len;
		}

		/* Tom's, isn't, I'm, I'd */
		if ((t1 == 's' || t1 == 't' || t1 == 'm' ||
		     t1 == 'd') && (size == 3 ||
		    word_boundary(text[2]))) {
			HBUF_PUTSL(ob, "\\(cq");
			return 0;
		}

		/* you're, you'll, you've */
		if (size >= 3) {
			t2 = tolower((int)text[2]);
			if (((t1 == 'r' && t2 == 'e') ||
		   	     (t1 == 'l' && t2 == 'l') ||
			     (t1 == 'v' && t2 == 'e')) &&
			    (size == 4 || word_boundary(text[3]))) {
				HBUF_PUTSL(ob, "\\(cq");
				return 0;
			}
		}
	}

	if (sm_quotes(ob, previous_char,
	    size > 0 ? text[1] : 0, 's', &smrt->in_squote))
		return 0;

	hbuf_put(ob, squote_text, squote_size);
	return 0;
}

/*
 * Converts ' to left or right single quote.
 */
static size_t
sm_cb_squote(hbuf *ob, struct sm_dat *smrt, char previous_char, const char *text, size_t size)
{
	return sm_squote(ob, smrt,
		previous_char, text, size, text, 1);
}

/*
 * Converts (c), (r), (tm).
 */
static size_t
sm_cb_parens(hbuf *ob, struct sm_dat *smrt, char previous_char, const char *text, size_t size)
{
	char	 t1, t2;

	if (size >= 3) {
		t1 = tolower((int)text[1]);
		t2 = tolower((int)text[2]);

		if (t1 == 'c' && t2 == ')') {
			HBUF_PUTSL(ob, "\\(co");
			return 2;
		}

		if (t1 == 'r' && t2 == ')') {
			HBUF_PUTSL(ob, "\\(rg");
			return 2;
		}

		if (size >= 4 && t1 == 't' &&
		    t2 == 'm' && text[3] == ')') {
			HBUF_PUTSL(ob, "\\(tm");
			return 3;
		}
	}

	hbuf_putc(ob, text[0]);
	return 0;
}

/*
 * Converts "--" to em-dash, etc.
 */
static size_t
sm_cb_dash(hbuf *ob, struct sm_dat *smrt, char previous_char, const char *text, size_t size)
{

	if (size >= 3 && text[1] == '-' && text[2] == '-') {
		HBUF_PUTSL(ob, "\\(em");
		return 2;
	}

	if (size >= 2 && text[1] == '-') {
		HBUF_PUTSL(ob, "\\(en");
		return 1;
	}

	hbuf_putc(ob, text[0]);
	return 0;
}

/*
 * Converts &quot; etc.
 */
static size_t
sm_cb_amp(hbuf *ob, struct sm_dat *smrt,
	char previous_char, const char *text, size_t size)
{
	size_t	len;

	if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
		if (sm_quotes(ob, previous_char,
		    size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
			return 5;
	}

	len = squote_len(text, size);
	if (len > 0)
		return (len-1) + sm_squote(ob, smrt,
			previous_char, text+(len-1),
			size-(len-1), text, len);

	if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
		return 3;

	hbuf_putc(ob, '&');
	return 0;
}

/*
 * A code span (within \f[CR]) shouldn't be escaped.
 */
static size_t
sm_cb_esc(hbuf *ob, struct sm_dat *smrt,
	char previous_char, const char *text, size_t size)
{
	size_t	 	 i = 0;
	const char	*cp;

	if ((size >= 3 && 0 == memcmp(text + 1, "f[C", 3))) {
		i = 3;
		hbuf_put(ob, text, i);
		/* FIXME: check size - i >= 3 */
		cp = memmem(text + i, size - i, "\\f[", 3);
		assert(NULL != cp);
		hbuf_put(ob, text + i, cp - (text + i));
		i += cp - (text + i) - 1;
	} else
		hbuf_putc(ob, text[0]);

	return i;
}

/*
 * See if we're in a code block (DS/DE) and don't smartypants anything
 * inside of that.
 */
static size_t
sm_cb_dot(hbuf *ob, struct sm_dat *smrt,
	char previous_char, const char *text, size_t size)
{
	size_t	 	 i = 0;
	const char	*cp;

	if ((0 == previous_char || '\n' == previous_char) &&
	    (size >= 6 && 0 == memcmp(text + 1, "ft CR\n", 6))) {
		i = 6;
		hbuf_put(ob, text, i);
		/* FIXME: check size - i >= 4 */
		cp = memmem(text + i, size - i, "\n.ft\n", 4);
		assert(NULL != cp);
		hbuf_put(ob, text + i, cp - (text + i));
		i += cp - (text + i) - 1;
	} else
		hbuf_putc(ob, text[0]);

	return i;
}

/*
 * Converts `` to opening double quote.
 */
static size_t
sm_cb_backtick(hbuf *ob, struct sm_dat *smrt,
	char previous_char, const char *text, size_t size)
{

	if (size >= 2 && text[1] == '`') {
		if (sm_quotes(ob, previous_char,
		    size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
			return 1;
	}

	hbuf_putc(ob, text[0]);
	return 0;
}

/*
 * Converts 1/2, 1/4, 3/4.
 */
static size_t
sm_cb_number(hbuf *ob, struct sm_dat *smrt,
	char previous_char, const char *text, size_t size)
{

	if (word_boundary(previous_char) && size >= 3) {
		/* 1/2 */
		if (text[0] == '1' &&
		    text[1] == '/' && text[2] == '2') {
			if (size == 3 || word_boundary(text[3])) {
				HBUF_PUTSL(ob, "\\[12]");
				return 2;
			}
		}
		/* 1/4 */
		if (text[0] == '1' &&
		    text[1] == '/' && text[2] == '4') {
			if (size == 3 || word_boundary(text[3]) ||
			    (size >= 5 &&
			     tolower((int)text[3]) == 't' &&
			     tolower((int)text[4]) == 'h')) {
				HBUF_PUTSL(ob, "\\[14]");
				return 2;
			}
		}
		/* 3/4 */
		if (text[0] == '3' &&
		    text[1] == '/' && text[2] == '4') {
			if (size == 3 || word_boundary(text[3]) ||
			    (size >= 6 &&
			     tolower((int)text[3]) == 't' &&
			     tolower((int)text[4]) == 'h' &&
			     tolower((int)text[5]) == 's')) {
				HBUF_PUTSL(ob, "\\[34]");
				return 2;
			}
		}
	}

	hbuf_putc(ob, text[0]);
	return 0;
}

/*
 * Converts " to left or right double quote.
 */
static size_t
sm_cb_dquote(hbuf *ob, struct sm_dat *smrt,
	char previous_char, const char *text, size_t size)
{

	if ( ! sm_quotes(ob, previous_char,
	    size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
		HBUF_PUTSL(ob, "\\(dq");

	return 0;
}

/*
 * Process a nroff snippet using SmartyPants for smart punctuation.
 */
void
lowdown_nroff_smrt(hbuf *ob, const char *text, size_t size)
{
	size_t 		 i, org, bscan;
	struct sm_dat	 smrt;
	char		 action = 0;

	if (NULL == text || 0 == size)
		return;

	memset(&smrt, 0, sizeof(struct sm_dat));

	hbuf_grow(ob, size);

	for (i = 0; i < size; ++i) {
		action = 0;

		org = i;
		while (i < size && 
		       (action = sm_cb_chars[(unsigned char)text[i]]) == 0)
			i++;

		if (i > org)
			hbuf_put(ob, text + org, i - org);

		/* Don't convert quotes on macro lines. */

		if (i < size && 
		    ('"' == text[i] || '\'' == text[i])) {
			assert('\n' != text[i]);
			for (bscan = i; bscan > 0; bscan--)
				if ('\n' == text[bscan]) {
					bscan++;
					break;
				}
			assert(bscan <= i);
			if ('.' == text[bscan]) {
				hbuf_putc(ob, text[i]);
				continue;
			}
		}

		if (i < size)
			i += sm_cb_ptrs[(int)action](ob,
				&smrt, i ? text[i - 1] : 0,
				text + i, size - i);
	}
}
