/*====================================================================

			     ¤

                                               S.Kurohashi 91. 6.25
                                               S.Kurohashi 93. 5.31

    $Id: para_analysis.c,v 1.34.6.1 2009/10/19 23:34:31 kawahara Exp $
====================================================================*/
#include "knp.h"

#define PENALTY		1 /* 2 */
#define BONUS   	2 /* 6 */
#define MINUS   	7
#define PENA_MAX   	1000
#define ENOUGH_MINUS	-100.0
#define START_HERE	-1

#define STRONG_PARA_TH	3.9

int 	score_matrix[BNST_MAX][BNST_MAX];
int 	prepos_matrix[BNST_MAX][BNST_MAX];
int 	maxpos_array[BNST_MAX];
int 	maxsco_array[BNST_MAX];
int 	penalty_table[BNST_MAX];

float	norm[] = {
    1.00,  1.00,  1.59,  2.08,  2.52,  2.92,  3.30,  3.66,  4.00,  4.33,
    4.64,  4.95,  5.24,  5.53,  5.81,  6.08,  6.35,  6.61,  6.87,  7.12,
    7.37,  7.61,  7.85,  8.09,  8.32,  8.55,  8.78,  9.00,  9.22,  9.44,
    9.65,  9.87, 10.08, 10.29, 10.50, 10.70, 10.90, 11.10, 11.30, 11.50,
   11.70, 11.89, 12.08, 12.27, 12.46, 12.65, 12.84, 13.02, 13.21, 13.39};

extern QUOTE_DATA quote_data;

/*==================================================================*/
	void mask_quote_scope(SENTENCE_DATA *sp, int key_pos)
/*==================================================================*/
{
    int i, j, k, l;

    /* ̤¤ϰϤ¤ߤ */

    for (k = 0; quote_data.in_num[k] >= 0; k++) {

	int start = quote_data.in_num[k];
	int end = quote_data.out_num[k];

	/* ˳̤ */

	if (key_pos < start) {
	    for (i = 0; i < start; i++)
		for (j = start; j < end; j++)
		    restrict_matrix[i][j] = 0;
	} 

	/* ˳̤ (ξ) */

	else if (end <= key_pos) {
	    for (i = start + 1; i <= end; i++)
		for (j = end + 1; j < sp->Bnst_num; j++)
		    restrict_matrix[i][j] = 0;
	}

	/* ̤ˤ */

	else {
	    for (i = 0; i <= end; i++)
		for (j = start; j < sp->Bnst_num; j++)
		    if (i < start || end < j)
			restrict_matrix[i][j] = 0;

	    /* ̤˶ */
	    for (l = start; l < end; l++)
		if (check_feature(sp->bnst_data[l].f, ":ʸ"))
		    for (i = start; i <= l; i++)
			for (j = l + 1; j <= end; j++)
			    restrict_matrix[i][j] = 0;
	}
    }

    if (k && OptDisplay == OPT_DEBUG)
	print_matrix(sp, PRINT_RSTQ, key_pos);
}

/*==================================================================*/
	int bnst_match(SENTENCE_DATA *sp, int pos1, int pos2)
/*==================================================================*/
{
    int flag1, flag2;
    char *cp1, *cp2;
    BNST_DATA *ptr1 = &(sp->bnst_data[pos1]);
    BNST_DATA *ptr2 = &(sp->bnst_data[pos2]);

    /*
      ѥΥ׻ˤƶڤڥʥƥcancel
    	Ʊ
	ѸǤ뤫ɤƱ
	뤫ʤƱ
	
        ϾˤƤ롥꤬ж 
    */

    cp1 = (char *)check_feature(ptr1->f, "");
    cp2 = (char *)check_feature(ptr2->f, "");
    if (!cp1 || !cp2 || strcmp(cp1, cp2)) return 0;
	
    flag1 = check_feature(ptr1->f, "Ѹ") ? 1 : 0;
    flag2 = check_feature(ptr2->f, "Ѹ") ? 1 : 0;
    if (flag1 != flag2) return 0;

    if (check_feature(ptr1->f, "Ѹ")) {
	cp1 = (char *)check_feature(ptr1->f, "ID");
	cp2 = (char *)check_feature(ptr2->f, "ID");
	if (!cp1 || !cp2 || strcmp(cp1, cp2)) return 0;
    }
    
    flag1 = check_feature(ptr1->f, "") ? 1 : 0;
    flag2 = check_feature(ptr2->f, "") ? 1 : 0;
    if (flag1 != flag2) return 0;

    return 1;
}

/*==================================================================*/
int calc_static_level_penalty(SENTENCE_DATA *sp, int key_pos, int pos)
/*==================================================================*/
{
    int minus_score = 0;
    int level1 = sp->bnst_data[key_pos].sp_level;
    int level2 = sp->bnst_data[pos].sp_level;

    if (level1 <= level2)
	minus_score = MINUS * (level2 - level1 + 1);

    return minus_score;
}
/*==================================================================*/
int calc_dynamic_level_penalty(SENTENCE_DATA *sp, int key_pos, int pos1, int pos2)
/*==================================================================*/
{
    if (sp->bnst_data[pos1].sp_level == sp->bnst_data[pos2].sp_level &&
	bnst_match(sp, pos1, pos2) &&
	!bnst_match(sp, pos1, key_pos))
	return 0;
    else if (check_feature(sp->bnst_data[pos1].f, "") &&
	     check_feature(sp->bnst_data[pos2].f, ""))
	return 0;
			/* ֡ϡפξ̵ͭ,٥̵ */
    else
	return(penalty_table[pos1] + penalty_table[pos2]);
}

/*==================================================================*/
   int calc_starting_bonus_score(SENTENCE_DATA *sp, int istart_pos, PARA_DATA *p_ptr)
/*==================================================================*/
{
    BNST_DATA *b_ptr;

    b_ptr = &sp->bnst_data[istart_pos];

    if (p_ptr->type == PARA_KEY_I) { 
	return 0;
    } 
    else if (p_ptr->type == PARA_KEY_N) {
	if (check_feature(b_ptr->f, "̾»"))
	    return BONUS;
	else return 0;
    }
    else if (p_ptr->type == PARA_KEY_P) {
	if (check_feature(b_ptr->f, "»"))
	    return BONUS;
	else return 0;
    }
    else {
	return 0;
    }
}

/*==================================================================*/
   int calc_ending_bonus_score(SENTENCE_DATA *sp, int jend_pos, PARA_DATA *p_ptr)
/*==================================================================*/
{
    BNST_DATA *b_ptr;

    b_ptr = &sp->bnst_data[jend_pos];

    if (p_ptr->type == PARA_KEY_I) { 
	return 0;
    } 
    else if (p_ptr->type == PARA_KEY_N) {
        if (check_feature(sp->bnst_data[p_ptr->key_pos].f, ":ȳ") &&
	    check_feature(b_ptr->f, "̾½ȡ")) 
	    return BONUS;
	if (check_feature(b_ptr->f, "̾½"))
	    return BONUS;
	else return 0;
    }
    else if (p_ptr->type == PARA_KEY_P) {
	if (check_feature(b_ptr->f, "½"))
	    return BONUS;
	else return 0;
    }
    else {
	return 0;
    }
}

/*==================================================================*/
void dp_search_scope(SENTENCE_DATA *sp, int key_pos, int iend_pos, int jend_pos)
/*==================================================================*/
{
    int i, j, current_max, score_upward, score_sideway;
    
    /* ģХޥå */

    for (j = jend_pos; j > key_pos; j--)  {

	/* Ǳν */
	
	if (j == jend_pos) {
	    score_matrix[iend_pos][jend_pos] = match_matrix[iend_pos][jend_pos];
	    prepos_matrix[iend_pos][jend_pos] = START_HERE;
	    for (i=iend_pos-1; i>=0; i--)
	      score_matrix[i][jend_pos] = - PENA_MAX;
	}
	
	else {

	    /* ǲԤν */

	    score_sideway = score_matrix[iend_pos][j+1] 
	      		    - PENALTY - penalty_table[j];
	    score_matrix[iend_pos][j] = score_sideway;
	    prepos_matrix[iend_pos][j] = iend_pos;

	    /* ¾ιԤν:ȺΥ */

	    for (i=iend_pos-1; i>=0; i--) {
		score_upward = (Language == CHINESE) ? 
		    match_matrix[i][j] + maxsco_array[i+1] : 
		    match_matrix[i][j] + maxsco_array[i+1] - calc_dynamic_level_penalty(sp, key_pos, i, j);
		score_sideway = (Language == CHINESE) ? 
		    score_matrix[i][j+1] - PENALTY: 
		    score_matrix[i][j+1] - PENALTY - penalty_table[j];
		
		if (score_upward >= score_sideway) {
		    score_matrix[i][j] = score_upward;
		    prepos_matrix[i][j] = maxpos_array[i+1];
		} 
		else {
		    score_matrix[i][j] = score_sideway;
		    prepos_matrix[i][j] = i;
		}
	    }
	}

	/* Τ˺֤͡׻ */

	current_max = score_matrix[iend_pos][j];
	maxpos_array[iend_pos] = iend_pos;
	maxsco_array[iend_pos] = score_matrix[iend_pos][j];

	for (i=iend_pos-1; i>=0; i--) {

	    current_max -= (Language == CHINESE) ? PENALTY : (PENALTY + penalty_table[i]);
	    if (current_max <= score_matrix[i][j]) {
		current_max = score_matrix[i][j];
		maxpos_array[i] = i;
		maxsco_array[i] = current_max;
	    } else {
		maxpos_array[i] = maxpos_array[i+1];
		maxsco_array[i] = current_max;
	    }
	}
    }
}

/*==================================================================*/
void _detect_para_scope(SENTENCE_DATA *sp, int para_num, PARA_DATA *ptr, int jend_pos)
/*==================================================================*/
{
    int i, j, flag, nth;
    int key_pos = ptr->key_pos;
    int iend_pos = ptr->iend_pos;
    int starting_bonus_score;
    int ending_bonus_score;
    int max_pos = -1;
    float current_score, sim_threshold, new_threshold,
	max_score = ENOUGH_MINUS, pure_score = 0;
    char *cp;
    FEATURE *fp;

    /*							    */
    /* Ȱ(jend_pos)βϤ˹Ԥɤ */
    /*							    */

    for (i = iend_pos; i >= 0; i--) {
	Para_matrix[para_num][i][jend_pos] = -INT_MAX;
    }

    /* ٤0ʤ */

    if (match_matrix[iend_pos][jend_pos] == 0) return;

    /* restrict_matrixǲǽʤ */

    flag = FALSE;
    for (i = 0; i <= iend_pos; i++) {
	if (restrict_matrix[i][jend_pos]) {
	    flag = TRUE; break;
	}
    }
    if (flag == FALSE) return;

    /* ֡פȤ */

    if (key_pos + 1 == jend_pos &&	
	check_feature(sp->bnst_data[jend_pos].f, "ؼ"))
	return;

    /* 롼ˤ(ͤ) */

    /* 郎ʤͤ0.0 */
    if ((ptr->f_pattern).fp[0] == NULL) {
	sim_threshold = 0.0;
    } 
    /* 郎СޥåΤǺͤ */
    else {
	sim_threshold = 100.0;
	nth = 0;
	while ((fp = (ptr->f_pattern).fp[nth])) {
	    if (feature_AND_match(fp, sp->bnst_data[jend_pos].f,
				  sp->bnst_data + key_pos,
				  sp->bnst_data + jend_pos) == TRUE) {
		if ((cp = (char *)check_feature(fp, "&ST"))) {
		    sscanf(cp, "&ST:%f", &new_threshold);
		} else {
		    new_threshold = 0.0;
		}
		if (new_threshold < sim_threshold )
		    sim_threshold = new_threshold;
	    }
	    nth++;
	}
	if (sim_threshold == 100.0) return;
    }


    /*		    */
    /* DP MATCHING  */
    /*		    */

    dp_search_scope(sp, key_pos, iend_pos, jend_pos);


    /* ѥθ */

    ending_bonus_score = calc_ending_bonus_score(sp, jend_pos, ptr);
    for (i = iend_pos; i >= 0; i--) {
	starting_bonus_score = calc_starting_bonus_score(sp, i, ptr);
	current_score = 
	    (float)score_matrix[i][key_pos+1] / norm[jend_pos - i + 1]
	    + starting_bonus_score + ending_bonus_score;

	if (restrict_matrix[i][jend_pos] && 
	    max_score < current_score) {
	    max_score = current_score;
	    pure_score = 
		(float)score_matrix[i][key_pos+1] / norm[jend_pos - i + 1];
	    /* pure_score ɽΥܡʥ */
	    max_pos = i;
	}

	/* ΨŪ¤ϤΤ٤¸ */
	if (restrict_matrix[i][jend_pos] && 
	    pure_score >= sim_threshold) {
	    Para_matrix[para_num][i][jend_pos] = current_score;
	}
    }


    /* ٤0ʤ 01/07/12 */
    if (max_score < 0.0) return;


    /*  (a...)(b)Ȥϰʤ̤¤ʤɤǤʤ餶
       򤨤ʤϡȤǧʤȤˤ (Ū) */

    /* ֡ϤפΰäѤäƤä
    if (key_pos + 1 == jend_pos && max_pos != key_pos) {
	max_pos = i;
	max_score = -100;
	return;
    }
    */

    /*
      ͤۤơޤstatus x ʤ n 
      ͤۤơstatus n ʤ 
      ͤۤʤƤ⡤ͤΤ᥹򵭲
    */
    flag = FALSE;
    if (sim_threshold <= pure_score &&
	ptr->status == 'x') {
	ptr->status = 'n';
	flag = TRUE;
    }
    else if (sim_threshold <= pure_score &&
	     ptr->status == 'n' &&
	     ptr->max_score < max_score) {
	flag = TRUE;
    }
    else if (ptr->status == 'x' &&
	     ptr->max_score < max_score) {
	flag = TRUE;
    }	

    if (flag == TRUE) {
	ptr->max_score = max_score;
	ptr->pure_score = pure_score;
	ptr->max_path[0] = max_pos;
	for (j = 0;; j++) {
	    ptr->max_path[j+1] = prepos_matrix[ptr->max_path[j]][j+key_pos+1];
	    if (ptr->max_path[j+1] == START_HERE) {
		ptr->jend_pos = j + key_pos + 1;
		break;
	    }
	}
    }
}

/*==================================================================*/
int detect_para_scope(SENTENCE_DATA *sp, int para_num, int restrict_p)
/*==================================================================*/
{
    int i, j, k;
    PARA_DATA *para_ptr = &(sp->para_data[para_num]);
    int key_pos = para_ptr->key_pos;

    /* 
       restrict_p
         TRUE : ϤμԤˤäΥ
	 FALSE : Ϥˤ٤ƤΥ
	 
       restrict_matrix
         ̤ˤ¤¤Ϥˤ(restrict_pξ)
	 (restrict_p==FALSEξ礳ǽ)
    */

    para_ptr->status = 'x';
    para_ptr->max_score = ENOUGH_MINUS;
    para_ptr->pure_score = ENOUGH_MINUS;
    para_ptr->manager_ptr = NULL;

    if (restrict_p == FALSE)
	for (i = 0; i < sp->Bnst_num; i++)
	    for (j = i + 1; j < sp->Bnst_num; j++)
		restrict_matrix[i][j] = 1;

    mask_quote_scope(sp, key_pos);

    for (k = 0; k < sp->Bnst_num; k++) {
	penalty_table[k] = (k == key_pos) ? 
	  0 : calc_static_level_penalty(sp, key_pos, k);
    }

    if (OptInput & OPT_PARSED) {
	_detect_para_scope(sp, para_num, para_ptr, sp->bnst_data[key_pos].dpnd_head);
    }
    else {
	for (j = key_pos+1; j < sp->Bnst_num; j++)
	    _detect_para_scope(sp, para_num, para_ptr, j);
    }

    if (para_ptr->status == 'x') {
	;
	/*
	fprintf(stderr, ";; Cannot find proper CS for the key <");
	print_bnst(sp->bnst_data + ptr->key_pos, NULL);
	fprintf(stderr, ">.\n");
	*/
    } else if (para_ptr->status == 'n' &&
	       para_ptr->pure_score >= STRONG_PARA_TH) {
	para_ptr->status = 's';
    }
    
    return TRUE;	/* ϷstatusxǤ,TRUE֤ */
}

/*==================================================================*/
	    void detect_all_para_scope(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;

    for (i = 0; i < sp->Para_num; i++) 
	detect_para_scope(sp, i, FALSE);
}

/*==================================================================*/
		int check_para_key(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;
    char *cp, type[16], condition[256];

    for (i = 0; i < sp->Bnst_num; i++) {

	if ((cp = (char *)check_feature(sp->bnst_data[i].f, "¥")) != NULL &&
	    (Language != CHINESE ||
	     (Language == CHINESE && (check_feature(sp->bnst_data[i + 1].f, "CC") || check_feature(sp->bnst_data[i + 1].f, "PU"))))) {
	    sp->bnst_data[i].para_num = sp->Para_num;
	    sp->para_data[sp->Para_num].para_char = 'a'+ sp->Para_num;
	    sp->para_data[sp->Para_num].key_pos = i;

	    type[0] = '\0';
	    condition[0] = '\0';
	    sscanf(cp, "%*[^:]:%[^:]:%s", type, condition);

	    if (!strncmp(type, "̾", strlen("̾"))) {
		sp->bnst_data[i].para_key_type = PARA_KEY_N	;
	    } else if (!strncmp(type, "", strlen(""))) {
		sp->bnst_data[i].para_key_type = PARA_KEY_P;
	    } else if (!strncmp(type, "", strlen(""))) {
		sp->bnst_data[i].para_key_type = PARA_KEY_A;
	    }
	    sp->para_data[sp->Para_num].type = sp->bnst_data[i].para_key_type;
	    
	    /* ֡Ϥפʤɤξ"¥:̾-1:...."ν */
	    if (*(type+2)) {
		sp->para_data[sp->Para_num].iend_pos = i + atoi(type+2);
	    } else {
		sp->para_data[sp->Para_num].iend_pos = i;
	    }

	    string2feature_pattern(&(sp->para_data[sp->Para_num].f_pattern),condition);
	    
	    sp->Para_num ++;
	    if (sp->Para_num >= PARA_MAX) {
		for (i++; i < sp->Bnst_num; i++) { /* Ĥʸ-1Ϳ */
		    sp->bnst_data[i].para_num = -1;
		}
		fprintf(stderr, ";; Too many para (%s)!\n", sp->Comment ? sp->Comment : "");
		return CONTINUE;
	    }
	}
	else {
	    sp->bnst_data[i].para_num = -1;
	}
    }

    if (sp->Para_num == 0) return 0;

    for (i = 0; i < sp->Bnst_num; i++) {

	if ((cp = (char *)check_feature(sp->bnst_data[i].f, "")) != NULL) {
	    if (check_feature(sp->bnst_data[i].f, "")) {
		sscanf(cp, "%*[^:]:%*d-%d", &(sp->bnst_data[i].sp_level));
	    } else {
		sscanf(cp, "%*[^:]:%d-%*d", &(sp->bnst_data[i].sp_level));
	    }
	} else {
	    sp->bnst_data[i].sp_level = 0;
	}
    }

    return sp->Para_num;
}


/*==================================================================*/
       int farthest_child(SENTENCE_DATA *sp, BNST_DATA *b_ptr)
/*==================================================================*/
{
    /* ֱ󤤻Ҷʸֹ֤
       (ΤȤδؿϻȤäƤʤ) */

    int i;
    BNST_DATA	*loop_ptr = b_ptr;
    
    while (loop_ptr->child[0]) {
	for (i = 0; loop_ptr->child[i]; i++);
	loop_ptr = loop_ptr->child[i-1];
    }
    
    return (loop_ptr - sp->bnst_data);
}

/*==================================================================*/
		 int para_recovery(SENTENCE_DATA *sp)
/*==================================================================*/
{
    /* ¤ξκƸ */

    int		i, j;
    int		ending_bonus_score = 0;
    int		starting_bonus_score = 0;
    BNST_DATA	*b_ptr;
    char	*cp;

    sp->Para_num = 0;
    sp->Para_M_num = 0;

    for (i = 0, b_ptr = sp->bnst_data; i < sp->Bnst_num; i++, b_ptr++) {
	if (b_ptr->dpnd_type == 'P') {
	    if (sp->Para_num >= PARA_MAX) {
		for (; i < sp->Bnst_num; i++, b_ptr++) { /* Ĥʸ-1Ϳ */
		    b_ptr->para_num = -1;
		}
		fprintf(stderr, ";; Too many para (%s)!\n", sp->Comment ? sp->Comment : "");
		break;
	    }
	    b_ptr->para_num = sp->Para_num;
	    sp->para_data[sp->Para_num].key_pos = i;
	    sp->para_data[sp->Para_num].jend_pos = b_ptr->dpnd_head;
	    sp->para_data[sp->Para_num].iend_pos = i; /*  */
	    for (j = i - 1; 
		 j >= 0 && 
		     (sp->bnst_data[j].dpnd_head < i ||
		      (sp->bnst_data[j].dpnd_head == i &&
		       sp->bnst_data[j].dpnd_type != 'P'));
		 j--);
	    sp->para_data[sp->Para_num].max_path[0] = j + 1;
	    sp->para_data[sp->Para_num].status = 'n';

	    /* ϤΤȤϥ׻ */
	    if (OptInput & OPT_PARSED) {
		if ((cp = check_feature(b_ptr->f, "¥"))) {
		    cp += strlen("¥:");
		    if (!strncmp(cp, "̾", strlen("̾"))) {
			sp->para_data[sp->Para_num].type = PARA_KEY_N;
		    }
		    else if (!strncmp(cp, "", strlen(""))) {
			sp->para_data[sp->Para_num].type = PARA_KEY_P;
		    }
		    else {
			sp->para_data[sp->Para_num].type = PARA_KEY_A;
		    } 
		}
		else {
		    sp->para_data[sp->Para_num].type = PARA_KEY_O;
		}

		dp_search_scope(sp, sp->para_data[sp->Para_num].key_pos, 
				sp->para_data[sp->Para_num].iend_pos, sp->para_data[sp->Para_num].jend_pos);
		ending_bonus_score = calc_ending_bonus_score(sp, sp->para_data[sp->Para_num].jend_pos, &(sp->para_data[sp->Para_num]));
		starting_bonus_score = calc_starting_bonus_score(sp, sp->para_data[sp->Para_num].max_path[0], &(sp->para_data[sp->Para_num]));

		sp->para_data[sp->Para_num].max_score = 
		    (float)score_matrix[sp->para_data[sp->Para_num].max_path[0]][sp->para_data[sp->Para_num].key_pos + 1] 
		    / norm[sp->para_data[sp->Para_num].jend_pos - sp->para_data[sp->Para_num].max_path[0] + 1] 
		    + starting_bonus_score + ending_bonus_score;
	    }

	    sp->Para_num++;
	}
	else {
	    b_ptr->para_num = -1;
	}
    }
    return detect_para_relation(sp);
}

/*====================================================================
                               END
====================================================================*/
