/*====================================================================

			 NTT  ץ

                                               S.Kurohashi 91. 6.25
                                               S.Kurohashi 93. 5.31

    $Id: lib_sm.c,v 1.60 2008/10/23 15:27:15 kawahara Exp $
====================================================================*/
#include "knp.h"

DBM_FILE	sm_db;
DBM_FILE	sm2code_db;
DBM_FILE	code2sm_db;
DBM_FILE	smp2smg_db;
int		SMExist;
int		SM2CODEExist;
int		CODE2SMExist;
int		SMP2SMGExist;

char  		cont_str[DATA_LEN];

SMLIST smlist[TBLSIZE];

/*==================================================================*/
			   void init_ntt()
/*==================================================================*/
{
    char *filename;

    /***  ǡ١ץ  ***/
    
    /* ñ <=> ̣ǥ */

    // ե̾ꤹ
    if (DICT[SM_DB]) {   
	filename = check_dict_filename(DICT[SM_DB], TRUE);  // .knprc ƤȤ    SM_DB  const.h Ƥ
	                                                    //                                  DICT[SM_DB]  configfile.c ǻꤵƤ
    }
    else {
	filename = check_dict_filename(SM_DB_NAME, FALSE);  // .knprc ƤʤȤ  path.h  default(SM_DB_NAME) Ȥ
    }

    if (OptDisplay == OPT_DEBUG) {
	fprintf(Outfp, "Opening %s ... ", filename);
    }

    if ((sm_db = DB_open(filename, O_RDONLY, 0)) == NULL) {
	if (OptDisplay == OPT_DEBUG) {
	    fputs("failed.\n", Outfp);
	}
	SMExist = FALSE;
#ifdef DEBUG
	fprintf(stderr, ";; Cannot open NTT word dictionary <%s>.\n", filename);
#endif
    }
    else {
	if (OptDisplay == OPT_DEBUG) {
	    fputs("done.\n", Outfp);
	}
	SMExist = TRUE;
    }
    free(filename);
    THESAURUS[USE_NTT].exist = SMExist;
    
    /* ̣ => ̣ǥ */
    if (Thesaurus == USE_NTT) {
	if (DICT[SM2CODE_DB]) {
	    filename = check_dict_filename(DICT[SM2CODE_DB], TRUE);
	}
	else {
	    filename = check_dict_filename(SM2CODE_DB_NAME, FALSE);
	}

	if (OptDisplay == OPT_DEBUG) {
	    fprintf(Outfp, "Opening %s ... ", filename);
	}

	if ((sm2code_db = DB_open(filename, O_RDONLY, 0)) == NULL) {
	    if (OptDisplay == OPT_DEBUG) {
		fputs("failed.\n", Outfp);
	    }
	    SM2CODEExist = FALSE;
#ifdef DEBUG
	    fprintf(stderr, ";; Cannot open NTT sm dictionary <%s>.\n", filename);
#endif
	}
	else {
	    if (OptDisplay == OPT_DEBUG) {
		fputs("done.\n", Outfp);
	    }
	    SM2CODEExist = TRUE;
	}
	free(filename);
    }

    /* ̣ǥ => ̣ */
    if (DICT[CODE2SM_DB]) {
	filename = check_dict_filename(DICT[CODE2SM_DB], TRUE);
    }
    else {
	filename = check_dict_filename(CODE2SM_DB_NAME, FALSE);
    }

    if (OptDisplay == OPT_DEBUG) {
	fprintf(Outfp, "Opening %s ... ", filename);
    }

    if ((code2sm_db = DB_open(filename, O_RDONLY, 0)) == NULL) {
	if (OptDisplay == OPT_DEBUG) {
	    fputs("failed.\n", Outfp);
	}
	CODE2SMExist = FALSE;
#ifdef DEBUG
	fprintf(stderr, ";; Cannot open NTT code2sm dictionary <%s>.\n", filename);
#endif
    }
    else {
	if (OptDisplay == OPT_DEBUG) {
	    fputs("done.\n", Outfp);
	}
	CODE2SMExist = TRUE;
    }
    free(filename);

    /* ͭ̾η <=> ̾η */
    if (DICT[SMP2SMG_DB]) {
	filename = check_dict_filename(DICT[SMP2SMG_DB], TRUE);
    }
    else {
	filename = check_dict_filename(SMP2SMG_DB_NAME, FALSE);
    }

    if (OptDisplay == OPT_DEBUG) {
	fprintf(Outfp, "Opening %s ... ", filename);
    }

    if ((smp2smg_db = DB_open(filename, O_RDONLY, 0)) == NULL) {
	if (OptDisplay == OPT_DEBUG) {
	    fputs("failed.\n", Outfp);
	}
	SMP2SMGExist = FALSE;
#ifdef DEBUG
	fprintf(stderr, ";; Cannot open NTT smp smg table <%s>.\n", filename);
#endif
    }
    else {
	if (OptDisplay == OPT_DEBUG) {
	    fputs("done.\n", Outfp);
	}
	SMP2SMGExist = TRUE;
    }
    free(filename);
}


/*==================================================================*/
			   void close_ntt()
/*==================================================================*/
{
    if (SMExist == TRUE)
	DB_close(sm_db);

    if (SM2CODEExist == TRUE)
	DB_close(sm2code_db);

    if (SMP2SMGExist == TRUE)
	DB_close(smp2smg_db);
}

/*==================================================================*/
		       void ClearSMList()
/*==================================================================*/
{
    int i;
    SMLIST *smp, *next;

    for (i = 0; i < TBLSIZE; i++) {
	if (smlist[i].key) {
	    free(smlist[i].key);
	    free(smlist[i].sm);
	    smlist[i].key = NULL;
	}
	smp = smlist[i].next;
	while (smp) {
	    free(smp->key);
	    free(smp->sm);
	    next = smp->next;
	    free(smp);
	    smp = next;
	}
    }
}

/*==================================================================*/
		   int ne_check_all_sm(char *code)
/*==================================================================*/
{
    int i;

    /* ٤Ƥΰ̣°ͭ̾ʤ TRUE */

    for (i = 0; *(code+i); i+=SM_CODE_SIZE) {
	if (*(code+i) != '2') {
	    return FALSE;
	}
    }
    return TRUE;
}

/*==================================================================*/
                 char *_get_ntt(char *cp, char *arg)
/*==================================================================*/
{

    /* ǡ١Ф code  */
    int i, pos;
    char *code;

    code = db_get(sm_db, cp);
    if (code) {

	/* 줿顢̤ */
	if (strlen(code) > SM_CODE_SIZE*SM_ELEMENT_MAX) {
#ifdef DEBUG
	    fprintf(stderr, "Too long SM content <%s>.\n", code);
#endif
	    code[SM_CODE_SIZE*SM_ELEMENT_MAX] = '\0';
	}
	
	pos = 0;
	
	/* ٤Ƥΰ̣°ͭ̾ΤȤ */
	if (ne_check_all_sm(code) == TRUE) {
	    for (i = 0; code[i]; i+=SM_CODE_SIZE) {
		if (code[i] == '2' && 
		    strncmp(code+i, "2001030", 7)) { /*  ǤϤʤ */
		    strncpy(code+pos, code+i, SM_CODE_SIZE);
		    pos += SM_CODE_SIZE;
		}
	    }
	}
	else {
	    /* ̣ǤͿʻ */
	    for (i = 0; code[i]; i+=SM_CODE_SIZE) {
		if ((*arg && code[i] == *arg) ||	/* ꤵ줿ʻ */
		    code[i] == '3' ||	/* ̾ */
		    code[i] == '4' ||	/* ̾() */
		    code[i] == '5' ||	/* ̾(ư) */
		    code[i] == '6' ||	/* ̾(ž) */
		    code[i] == '7' ||	/*  */
		    code[i] == '9' ||	/*  */
		    code[i] == 'a') {	/* ̾ */
		    strncpy(code+pos, code+i, SM_CODE_SIZE);
		    pos += SM_CODE_SIZE;
		}
	    }
	}
	code[pos] = '\0';
    }
    return code;
}

/*==================================================================*/
		       char *sm2code(char *cp)
/*==================================================================*/
{
    char *code;

    /* sm  code  1:1 б 
       -> cont_str ϰʤ */

    if (SM2CODEExist == FALSE) {
	cont_str[0] = '\0';
	return cont_str;
    }

    code = db_get(sm2code_db, cp);
    if (code) {
	strcpy(cont_str, code);
	free(code);
    }
    /* NEξ㳰 */
    else if (Thesaurus == USE_NTT) {
	if (!strncmp(cp, "ORGANIZATION", 12)) {
	    strcpy(cont_str, "ne1*********");
	}
	else if (!strncmp(cp, "PERSON", 6)) {
	    strcpy(cont_str, "ne2*********");
	}
	else if (!strncmp(cp, "LOCATION", 8)) {
	    strcpy(cont_str, "ne3*********");
	}
	else if (!strncmp(cp, "ARTIFACT", 8)) {
	    strcpy(cont_str, "ne4*********");
	}
	else if (!strncmp(cp, "DATE", 4)) {
	    strcpy(cont_str, "ne5*********");
	}
	else if (!strncmp(cp, "TIME", 4)) {
	    strcpy(cont_str, "ne6*********");
	}
	else if (!strncmp(cp, "MONEY", 5)) {
	    strcpy(cont_str, "ne7*********");
	}
	else if (!strncmp(cp, "PERCENT", 7)) {
	    strcpy(cont_str, "ne8*********");
	}
    }
    else if (Thesaurus == USE_BGH) {
	if (!strncmp(cp, "ORGANIZATION", 12)) {
	    strcpy(cont_str, "ne1********");
	}
	else if (!strncmp(cp, "PERSON", 6)) {
	    strcpy(cont_str, "ne2********");
	}
	else if (!strncmp(cp, "LOCATION", 8)) {
	    strcpy(cont_str, "ne3********");
	}
	else if (!strncmp(cp, "ARTIFACT", 8)) {
	    strcpy(cont_str, "ne4********");
	}
	else if (!strncmp(cp, "DATE", 4)) {
	    strcpy(cont_str, "ne5********");
	}
	else if (!strncmp(cp, "TIME", 4)) {
	    strcpy(cont_str, "ne6********");
	}
	else if (!strncmp(cp, "MONEY", 5)) {
	    strcpy(cont_str, "ne7********");
	}
	else if (!strncmp(cp, "PERCENT", 7)) {
	    strcpy(cont_str, "ne8********");
	}
    }
    else {
	cont_str[0] = '\0';
    }
    return cont_str;
}

/*==================================================================*/
		       char *code2sm(char *cp)
/*==================================================================*/
{
    char *sm;

    /* sm  code  1:1 б 
       -> cont_str ϰʤ */

    if (CODE2SMExist == FALSE) {
	cont_str[0] = '\0';
	return cont_str;
    }

    sm = db_get(code2sm_db, cp);
    if (sm) {
	strcpy(cont_str, sm);
	free(sm);
    }
    else {
	cont_str[0] = '\0';
    }
    return cont_str;
}

/*==================================================================*/
	       void codes2sm_print(FILE *fp, char *cp)
/*==================================================================*/
{
    int i;
    char sm[SM_CODE_SIZE + 1];

    for (i = 0; cp[i]; i += SM_CODE_SIZE) {
	if (i != 0) fputc(',', fp);
	strncpy(sm, cp + i, SM_CODE_SIZE);
	sm[0] = '1';
	sm[SM_CODE_SIZE] = '\0';
	fputs(code2sm(sm), fp);
    }
}

/*==================================================================*/
		       char *_smp2smg(char *cp)
/*==================================================================*/
{
    char *code, key[SM_CODE_SIZE+1];

    /* ͤĹƤ 52 bytes 餤 */

    if (SMP2SMGExist == FALSE) {
	cont_str[0] = '\0';
	return cont_str;
    }

    strncpy(key, cp, SM_CODE_SIZE);
    key[SM_CODE_SIZE] = '\0';

    code = db_get(smp2smg_db, key);
    return code;
}

/*==================================================================*/
		  char *smp2smg(char *cpd, int flag)
/*==================================================================*/
{
    char *cp, *start;
    int storep = 0, inc, use = 1;

    if (SMP2SMGExist == FALSE) {
	fprintf(stderr, ";;; Cannot open smp2smg table!\n");
	return NULL;
    }

    start = _smp2smg(cpd);

    if (start == NULL) {
	return NULL;
    }

    for (cp = start; *cp; cp+=SM_CODE_SIZE) {
	use = 1;
	if (*(cp+SM_CODE_SIZE) == '/') {
	    inc = 1;
	}
	else if (!strncmp(cp+SM_CODE_SIZE, " side-effect", 12)) {
	    if (*(cp+SM_CODE_SIZE+12) == '/') {
		inc = 13;		
	    }
	    /* ǽ */
	    else {
		inc = 0;
	    }
	    /* flag == FALSE ξ side-effect Ȥʤ */
	    if (flag == FALSE) {
		use = 0;
	    }
	}
	else if (*(cp+SM_CODE_SIZE) != '\0') {
	    fprintf(stderr, ";;; Invalid delimiter! <%c> (%s)\n", 
		    *(cp+SM_CODE_SIZE), "smp2smg");
	    inc = 1;
	}
	/* ǽ '\0' */
	else {
	    inc = 0;
	}

	if (use) {
	    strncpy(start+storep, cp, SM_CODE_SIZE);
	    storep+=SM_CODE_SIZE;
	}
	if (inc) {
	    cp += inc;
	}
	else {
	    break;
	}
    }

    if (storep) {
	*(start+storep) = '\0';
	return start;
    }
    free(start);
    return NULL;
}

/*==================================================================*/
		   void merge_smp2smg(BNST_DATA *bp)
/*==================================================================*/
{
    int i;
    char *p;

    /* smp2smg η̤򤯤äĤ */

    if (bp->SM_code[0] == '\0') {
	return;
    }

    for (i = 0; i < bp->SM_num; i++) {
	if (bp->SM_code[i*SM_CODE_SIZE] == '2') {
	    p = smp2smg(&(bp->SM_code[i*SM_CODE_SIZE]), FALSE);
	    if (p) {
		/* 줿 */
		if ((strlen(bp->SM_code)+strlen(p))/SM_CODE_SIZE > SM_ELEMENT_MAX) {
		    return;
		}
		strcat(bp->SM_code, p);
		free(p);
	    }
	}
    }
    bp->SM_num = strlen(bp->SM_code)/SM_CODE_SIZE;
}

/*==================================================================*/
	      float _ntt_code_match(char *c1, char *c2)
/*==================================================================*/
{
    int i, d1, d2, min;

    if ((*c1 == '2' && *c2 != '2') || 
	(*c1 != '2' && *c2 == '2')) {
	return 0;
    }

    d1 = code_depth(c1, SM_CODE_SIZE);
    d2 = code_depth(c2, SM_CODE_SIZE);

    if (d1 + d2 == 0) {
	return 0;
    }

    min = Min(d1, d2);

    if (min == 0) {
	return 0;
    }

    for (i = 1; i <= min; i++) {
	if (*(c1+i) != *(c2+i)) {
	    return (float)2*(i-1)/(d1+d2);
	}
    }
    return (float)2*min/(d1+d2);
}

/*==================================================================*/
	  float ntt_code_match(char *c1, char *c2, int flag)
/*==================================================================*/
{
    if (flag == SM_EXPAND_NE) {
	float score, maxscore = 0;
	char *cp1, *cp2;
	int i, j;
	int f1 = 0, f2 = 0, c1num = 1, c2num = 1;

	if (*c1 == '2') {
	    c1 = smp2smg(c1, FALSE);
	    if (!c1) {
		return 0;
	    }
	    f1 = 1;
	    c1num = strlen(c1)/SM_CODE_SIZE;
	}
	if (*c2 == '2') {
	    c2 = smp2smg(c2, FALSE);
	    if (!c2) {
		if (f1 == 1) {
		    free(c1);
		}
		return 0;
	    }
	    f2 = 1;
	    c2num = strlen(c2)/SM_CODE_SIZE;
	}

	for (cp1 = c1, i = 0; i < c1num; cp1+=SM_CODE_SIZE, i++) {
	    for (cp2 = c2, j = 0; j < c2num; cp2+=SM_CODE_SIZE, j++) {
		score = _ntt_code_match(cp1, cp2);
		if (score > maxscore) {
		    maxscore = score;
		}
	    }
	}
	if (f1 == 1) {
	    free(c1);
	}
	if (f2 == 1) {
	    free(c2);
	}
	return maxscore;
    }
    else if (flag == SM_EXPAND_NE_DATA) {
	float score, maxscore = 0;
	char *cp2;
	int i;
	int f2 = 0, c2num = 1;

	/* PATTERN: ͭ̾ */
	if (*c1 == '2') {
	    return _ntt_code_match(c1, c2);
	}

	/* PATTERN: ̾ */

	if (*c2 == '2') {
	    c2 = smp2smg(c2, FALSE);
	    if (!c2) {
		return 0;
	    }
	    f2 = 1;
	    c2num = strlen(c2)/SM_CODE_SIZE;
	}

	for (cp2 = c2, i = 0; i < c2num; cp2+=SM_CODE_SIZE, i++) {
	    score = _ntt_code_match(c1, cp2);
	    if (score > maxscore) {
		maxscore = score;
	    }
	}
	if (f2 == 1) {
	    free(c2);
	}
	return maxscore;
    }
    else {
	return _ntt_code_match(c1, c2);
    }
}

/*==================================================================*/
	int sm_match_check(char *pat, char *codes, int expand)
/*==================================================================*/
{
    int i;

    if (codes == NULL) {
	return FALSE;
    }

    for (i = 0; *(codes+i); i += SM_CODE_SIZE) {
	if (_sm_match_score(pat, codes+i, expand) > 0) {
	    return TRUE;
	}
    }
    return FALSE;
}

/*==================================================================*/
		int assign_sm(BNST_DATA *bp, char *cp)
/*==================================================================*/
{
    char *target_code, *code;
    int *num_p;

    target_code = sm2code(cp);

    if (Thesaurus == USE_BGH) {
	code = bp->BGH_code;
	num_p = &(bp->BGH_num);
    }
    else if (Thesaurus == USE_NTT) {
	code = bp->SM_code;
	num_p = &(bp->SM_num);
    }
    else {
	return FALSE;
    }

    /* Ǥˤΰ̣°äƤȤ */
    if (sms_match(target_code, code, SM_NO_EXPAND_NE) == TRUE) {
	return FALSE;
    }

    /* ? */
    strcat(code, target_code);
    (*num_p)++;

    return TRUE;
}

/*==================================================================*/
 int sm_check_match_max(char *exd, char *exp, int expand, char *target)
/*==================================================================*/
{
    int i, j, step = SM_CODE_SIZE, flag;
    float score = 0, tempscore;

    /* ɤ餫ΥɤʤȤ */
    if (!(exd && exp && *exd && *exp)) {
	return FALSE;
    }

    if (expand != SM_NO_EXPAND_NE) {
	expand = SM_EXPAND_NE_DATA;
    }

    /* ޥå */
    for (j = 0; exp[j]; j+=step) {
	for (i = 0; exd[i]; i+=step) {
	    tempscore = ntt_code_match(exp+j, exd+i, expand);
	    if (tempscore > score) {
		score = tempscore;
		/* ξ target ̣Ǥ° */
		if (sm_match_check(target, exd, expand) && sm_match_check(target, exp, expand)) {
		    flag = TRUE;
		}
		else {
		    flag = FALSE;
		}
	    }
	}
    }
    return flag;
}

/*==================================================================*/
	       int sm_fix(BNST_DATA *bp, char *targets)
/*==================================================================*/
{
    int i, j, pos = 0;
    char *codes;

    if (bp->SM_code[0] == '\0') {
	return FALSE;
    }

    codes = bp->SM_code;

    for (i = 0; *(codes+i); i += SM_CODE_SIZE) {
	for (j = 0; *(targets+j); j += SM_CODE_SIZE) {
	    if (_sm_match_score(targets+j, codes+i, SM_NO_EXPAND_NE) > 0) {
		strncpy(codes+pos, codes+i, SM_CODE_SIZE);
		pos += SM_CODE_SIZE;
		break;
	    }
	}
    }

    /* match ʤäƤɤʤȤ? */
    if (pos != 0) {
	*(codes+pos) = '\0';
	bp->SM_num = strlen(codes)/SM_CODE_SIZE;
    }
    return TRUE;
}

/*==================================================================*/
	       int sm_all_match(char *c, char *target)
/*==================================================================*/
{
    char *p, flag = 0;

    /* ͭ̾ΤȤʳǡ٤Ƥΰ̣°֤Ǥ TRUE */
    for (p = c;*p; p+=SM_CODE_SIZE) {
	/* ͭ̾ΤȤΤ */
	if (*p == '2') {
	    continue;
	}

	/* ̣ǤΥå */
	if (!comp_sm(target, p, 1)) {
	    return FALSE;
	}
	else if (!flag) {
	    flag = 1;
	}
    }

    if (flag) {
	return TRUE;
    }
    else {
	return FALSE;
    }
}

/*==================================================================*/
	       void assign_time_feature(BNST_DATA *bp)
/*==================================================================*/
{
    /* <> ΰ̣ǤäƤʤ <> Ϳ */

    if (!check_feature(bp->f, "") && 
	sm_all_match(bp->SM_code, sm2code(""))) {
	assign_cfeature(&(bp->f), "Ƚ", FALSE);
	assign_cfeature(&(bp->f), "", FALSE);
    }
}

/*==================================================================*/
	      void assign_sm_aux_feature(BNST_DATA *bp)
/*==================================================================*/
{
    /* 롼줿 */

    if (Thesaurus != USE_NTT) {
	return;
    }

    /* <>°Ϳ */
    assign_time_feature(bp);

    /* <>°Ϳ */
    if (sm_all_match(bp->SM_code, sm2code(""))) {
	assign_cfeature(&(bp->f), "", FALSE);
    }
}

/*==================================================================*/
	      int delete_matched_sm(char *sm, char *del)
/*==================================================================*/
{
    int i, j, flag, pos = 0;

    for (i = 0; sm[i]; i += SM_CODE_SIZE) {
	flag = 1;
	/* ͭǤϤʤȤå */
	if (sm[i] != '2') {
	    for (j = 0; del[j]; j += SM_CODE_SIZE) {
		if (_sm_match_score(sm+i, del+j, SM_NO_EXPAND_NE) > 0) {
		    flag = 0;
		    break;
		}
	    }
	}
	if (flag) {
	    strncpy(sm+pos, sm+i, SM_CODE_SIZE);
	    pos += SM_CODE_SIZE;
	}
    }
    *(sm+pos) = '\0';
    return 1;
}

/*==================================================================*/
	     int delete_specified_sm(char *sm, char *del)
/*==================================================================*/
{
    int i, j, flag, pos = 0;

    for (i = 0; sm[i]; i += SM_CODE_SIZE) {
	flag = 1;
	/* ͭǤϤʤȤоݤȤ */
	if (sm[i] != '2') {
	    for (j = 0; del[j]; j += SM_CODE_SIZE) {
		if (!strncmp(sm+i+1, del+j+1, SM_CODE_SIZE-1)) {
		    flag = 0;
		    break;
		}
	    }
	}
	if (flag) {
	    strncpy(sm+pos, sm+i, SM_CODE_SIZE);
	    pos += SM_CODE_SIZE;
	}
    }
    *(sm+pos) = '\0';
    return 1;
}

/*==================================================================*/
		void fix_sm_person(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;

    if (Thesaurus != USE_NTT) return;

    /* ̾ΤȤ: 
       o ̾ηϤ<>ʲΰ̣Ǥ
       o ͭ̾ηϤΰ̣Ǥΰ̾ηϤؤΥޥåԥ󥰤ػ */

    for (i = 0; i < sp->Bnst_num; i++) {
	if (check_feature((sp->bnst_data+i)->f, "̾")) {
	    /* ͭΰ̣ǤĤ */
	    delete_matched_sm((sp->bnst_data+i)->SM_code, "100*********"); /* <>ΰ̣ */
	    assign_cfeature(&((sp->bnst_data+i)->f), "ԸͭŸػ", FALSE);
	}
    }
}

/*==================================================================*/
      void fix_sm_place(SENTENCE_DATA *sp, CF_PRED_MGR *cpm_ptr)
/*==================================================================*/
{
    /* ΤѲ
       ߤ <> Τ */

    int i, num;

    if (Thesaurus != USE_NTT) return;

    for (i = 0; i < cpm_ptr->cf.element_num; i++) {
	num = cpm_ptr->cmm[0].result_lists_d[0].flag[i];
	/* άǤǤϤʤƤäȤ */
	if (cpm_ptr->elem_b_num[i] > -2 && 
	    cpm_ptr->elem_b_ptr[i] && /* ǤϤʤ */
	    num >= 0 && 
	    MatchPP(cpm_ptr->cmm[0].cf_ptr->pp[num][0], "") && 
	    cf_match_element(cpm_ptr->cmm[0].cf_ptr->sm[num], "", TRUE)) {
	    /* ͭѴƤ */
	    merge_smp2smg((BNST_DATA *)cpm_ptr->elem_b_ptr[i]);
	    /* <>Τߤ˸ꤹ */
	    sm_fix((BNST_DATA *)cpm_ptr->elem_b_ptr[i], "101*********20**********");
	    assign_cfeature(&(cpm_ptr->elem_b_ptr[i]->f), "ԸͭŸػ", FALSE);
	    assign_cfeature(&(cpm_ptr->elem_b_ptr[i]->f), "", FALSE);
	    break;
	}
    }
}

/*==================================================================*/
	      void register_noun_sm(char *key, char *sm)
/*==================================================================*/
{
    SMLIST *slp;

    if (key == NULL) {
	return;
    }

    slp = &(smlist[hash(key, strlen(key))]);
    if (slp->key) {
	SMLIST **slpp;
	slpp = &slp;
	do {
	    if (!strcmp((*slpp)->key, key)) {
		/* Ǥˤsm */
		free((*slpp)->sm);
		(*slpp)->sm = strdup(sm);
		return;
	    }
	    slpp = &((*slpp)->next);
	} while (*slpp);
	*slpp = (SMLIST *)malloc_data(sizeof(SMLIST), "register_noun_sm");
	(*slpp)->key = strdup(key);
	(*slpp)->sm = strdup(sm);
	(*slpp)->next = NULL;
    }
    else {
	slp->key = strdup(key);
	slp->sm = strdup(sm);
    }
}

/*==================================================================*/
		    char *check_noun_sm(char *key)
/*==================================================================*/
{
    SMLIST *slp;

    slp = &(smlist[hash(key, strlen(key))]);
    if (!slp->key) {
	return NULL;
    }
    while (slp) {
	if (!strcmp(slp->key, key)) {
	    char *newsm;

	    newsm = strdup(slp->sm);

	    if (VerboseLevel >= VERBOSE2) {
		fprintf(stderr, ";; Cache hit!: %s [", key);
		codes2sm_print(stderr, newsm);
		fprintf(stderr, "]\n");
	    }

	    return newsm;
	}
	slp = slp->next;
    }
    return NULL;
}

/*==================================================================*/
   void specify_sm_from_cf(SENTENCE_DATA *sp, CF_PRED_MGR *cpm_ptr)
/*==================================================================*/
{
    int i, num;
    char *new_code, *sm_codes;

    if (Thesaurus != USE_NTT) return;

    for (i = 0; i < cpm_ptr->cf.element_num; i++) {
	if (!cpm_ptr->elem_b_ptr[i] || !cpm_ptr->elem_b_ptr[i]->SM_code[0]) {
	    continue;
	}
	num = cpm_ptr->cmm[0].result_lists_d[0].flag[i];
	/* άǤǤϤʤƤäȤ */
	if (cpm_ptr->elem_b_num[i] > -2 && num >= 0 && cpm_ptr->cmm[0].cf_ptr->ex[num] && 
	    cpm_ptr->cmm[0].result_lists_p[0].pos[cpm_ptr->cmm[0].result_lists_d[0].flag[i]] != MATCH_SUBJECT && 
	    cpm_ptr->cmm[0].result_lists_d[0].score[i] > CF_DECIDE_THRESHOLD) { /* ʥե졼Ȥ٥ޥåȤ */

	    if (cpm_ptr->cmm[0].cf_ptr->sm_specify[num]) {
		sm_codes = strdup(cpm_ptr->cmm[0].cf_ptr->sm_specify[num]);
	    }
	    else {
		sm_codes = strdup(cpm_ptr->cmm[0].cf_ptr->ex[num]);
		if (cpm_ptr->cmm[0].cf_ptr->sm_delete[num]) {
		    delete_specified_sm(sm_codes, cpm_ptr->cmm[0].cf_ptr->sm_delete[num]);
		}
	    }
	    /* äȤƤ̣°˷ */
	    if (new_code = get_most_similar_code(cpm_ptr->elem_b_ptr[i]->SM_code, sm_codes)) {
		if (strcmp(cpm_ptr->elem_b_ptr[i]->SM_code, new_code)) { /* ̣ǹ */
		    if (VerboseLevel >= VERBOSE2) {
			fprintf(stderr, ";;; %s %d %s [", sp->KNPSID ? sp->KNPSID : "?", cpm_ptr->elem_b_ptr[i]->num, 
				cpm_ptr->elem_b_ptr[i]->head_ptr->Goi);
			codes2sm_print(stderr, cpm_ptr->elem_b_ptr[i]->SM_code);
			fprintf(stderr, "] -> [");
			codes2sm_print(stderr, new_code);
			fprintf(stderr, "]\n");
		    }

		    strcpy((sp->tag_data + cpm_ptr->elem_b_ptr[i]->num)->SM_code, new_code);
		    (sp->tag_data + cpm_ptr->elem_b_ptr[i]->num)->SM_num = strlen(new_code) / SM_CODE_SIZE;

		    /* ̣Ͽ */
		    register_noun_sm(cpm_ptr->elem_b_ptr[i]->head_ptr->Goi, new_code);
		}
		free(new_code);
	    }
	    free(sm_codes);
	}
    }
}

/*==================================================================*/
   void assign_ga_subject(SENTENCE_DATA *sp, CF_PRED_MGR *cpm_ptr)
/*==================================================================*/
{
    int i, num;

    if (Thesaurus != USE_NTT) return;

    for (i = 0; i < cpm_ptr->cf.element_num; i++) {
	num = cpm_ptr->cmm[0].result_lists_d[0].flag[i];
	/* άǤǤϤʤƤäȤ */
	if (cpm_ptr->elem_b_num[i] > -2 && 
	    cpm_ptr->elem_b_ptr[i] && /* ǤϤʤ */
	    cpm_ptr->cmm[0].result_lists_d[0].flag[i] >= 0 && 
	    MatchPP(cpm_ptr->cmm[0].cf_ptr->pp[num][0], "")) {
	    /* o Ǥ˼ͿƤʤ
	       o <> ǤϤʤ (<>ΤȤ̣°ʤ)
	       o <Ѹ:ư>Ǥ 
	       o ʥե졼ब<>, <ν>ǤϤʤ
	       o ¦̣Ǥʤ(ͭ̾ȿ)
	         <ʪ> or <>Ȥ̣Ǥ (Ĥޤꡢ<Ūط>ǤϤʤ)
	    */
	    if (!check_feature(cpm_ptr->elem_b_ptr[i]->f, "Ϳ") && 
		!check_feature(cpm_ptr->elem_b_ptr[i]->f, "") && 
		check_feature(cpm_ptr->pred_b_ptr->f, "Ѹ:ư") && 
		cf_match_element(cpm_ptr->cmm[0].cf_ptr->sm[num], "", TRUE) && 
		(cpm_ptr->elem_b_ptr[i]->SM_num == 0 || 
		 /* (!(cpm_ptr->cmm[0].cf_ptr->etcflag & CF_GA_SEMI_SUBJECT) && ( */
		 sm_match_check(sm2code(""), cpm_ptr->elem_b_ptr[i]->SM_code, SM_NO_EXPAND_NE) || 
		 sm_match_check(sm2code("̾"), cpm_ptr->elem_b_ptr[i]->SM_code, SM_NO_EXPAND_NE) || /* ȿ̾, ̾ϤǤ˼ */
		 sm_match_check(sm2code("ʪ"), cpm_ptr->elem_b_ptr[i]->SM_code, SM_NO_EXPAND_NE) || 
		 sm_match_check(sm2code(""), cpm_ptr->elem_b_ptr[i]->SM_code, SM_NO_EXPAND_NE))) {
		assign_sm((BNST_DATA *)(sp->tag_data + cpm_ptr->elem_b_ptr[i]->num), "");
		assign_cfeature(&((sp->tag_data + cpm_ptr->elem_b_ptr[i]->num)->f), "Ϳ", FALSE);
	    }
	    break;
	}
    }
}

/*==================================================================*/
		  void sm2feature(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;
    char *cp, feature_buffer[BNST_LENGTH_MAX + SM_CODE_SIZE * SM_ELEMENT_MAX + 4];

    for (i = 0; i < sp->Tag_num; i++) {
	/* thesaurus.c: get_bnst_code() Ϳ줿feature */
	if (cp = check_feature((sp->tag_data + i)->f, "NTT")) {
	    sprintf(feature_buffer, "%s:%s", cp, (sp->tag_data + i)->SM_code);
	    assign_cfeature(&((sp->tag_data + i)->f), feature_buffer, FALSE);
	}
    }
}

/*====================================================================
                               END
====================================================================*/
