/*
  Copyright(C) 2007-2012 National Institute of Information and Communications Technology
*/

/*
  Model Module
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include "exception.h"
#include "model.h"


#define BUF_SIZE (32 * 1024)


/*
  $B%b%G%k$N=i4|2=(B
*/
MDL *mdl_new(void) {
  MDL *mdl;

  mdl = smalloc(sizeof(MDL));
  mdl->ft = sdb_new();
  if (mdl->ft == NULL) return NULL;
  mdl->lmd = NULL;

  return mdl;
}


/*
  $B%b%G%k$NFI$_9~$_(B
*/
MDL *mdl_read(FILE *fp) {
  int i;
  MDL *mdl;
  int len;
  char buf[BUF_SIZE], *ptr;
  int ft_size;

  /* $B=i4|2=(B */
  mdl = smalloc(sizeof(MDL));
  exception((mdl->ft = sdb_new()) == NULL, "sdb_new() failed");
  ft_size = 0;
  mdl->lmd = NULL;

  for (; ; ) {
    if (fgets(buf, BUF_SIZE, fp) == 0) break;
    len = strlen(buf);
    exception(buf[len - 1] != '\n', "buffer overflow");
    buf[--len] = '\0';

    ptr = index(buf, '\t');
    exception(ptr == NULL, "wrong model file format");
    *(ptr++) = '\0';

    exception((i = sdb_put(mdl->ft, ptr)) == -1, "sdb_put() failed");
    while (ft_size < sdb_size(mdl->ft)) {
      ft_size = 2 * ft_size + 1;
      mdl->lmd = srealloc(mdl->lmd, sizeof(double) * ft_size);
    }
    mdl->lmd[i] = atof(buf);
  }

  /* $BL$;HMQJQ?t$N=i4|2=(B */
  ft_size = sdb_size(mdl->ft);
  if (ft_size > 0) {
    mdl->lmd = srealloc(mdl->lmd, sizeof(double) * ft_size);
  }

  return mdl;
}


/*
  $B%b%G%k$N=q$-9~$_(B
*/
int mdl_write(MDL *mdl, FILE *fp) {
  int i;

  for (i = 0; i < sdb_size(mdl->ft); i++) {
    if (mdl->lmd[i] == 0.0) continue;
    fprintf(fp, "%.32g\t%s\n", mdl->lmd[i], sdb_str(mdl->ft, i));
  }

  return 0;
}


/*
  $B%9%3%"$N7W;;(B
*/
double mdl_score(MDL *mdl, int fv_num, int *fv) {
  int i;
  double s;

  s = 0.0;
  for (i = 0; i < fv_num; i++) {
    s += mdl->lmd[fv[i]];
  }

  return s;
}


/*
  $B%Y%/%H%k$N:9$r5a$a$k(B(c=a-b)
  $B:w0zI=$r;H$C$?7W;;(B($B:w0zI=Cf$NMWAG$NCM$OA4$F(B0$B!%%5%$%:$OAG@-$N?t0J>e(B)
  c_idx$B$H(Bc_val$B$N%5%$%:$O:GBg$G(B(a_num+b_num)$BI,MW(B
*/
void mdl_fvsub(int a_num, int *a, int b_num, int *b, int *c_num, int *c_idx, short *c_val, short *lut) {
  int i;

  /* $B:9$r7W;;(B */
  for (i = 0; i < a_num; i++) {
    lut[a[i]]++;
  }
  for (i = 0; i < b_num; i++) {
    lut[b[i]]--;
  }

  /* $B7k2L$N%Y%/%H%k:n@.(B */
  *c_num = 0;
  for (i = 0; i < a_num; i++) {
    if (lut[a[i]] != 0) {
      c_idx[*c_num] = a[i];
      c_val[*c_num] = lut[a[i]];
      lut[a[i]] = 0;
      (*c_num)++;
    }
  }
  for (i = 0; i < b_num; i++) {
    if (lut[b[i]] != 0) {
      c_idx[*c_num] = b[i];
      c_val[*c_num] = lut[b[i]];
      lut[b[i]] = 0;
      (*c_num)++;
    }
  }

  return;
}


/*
  $BAG@-%Y%/%H%k$KAG@-$rDI2C$9$k(B
*/
int mdl_addftr(SDB *ft, int *fv_num, int *fv, int add, ...) {
  char str[BUF_SIZE];
  int str_len;
  va_list ap;
  char *s;
  int len;
  int idx;

  /* $B0z?t$K;XDj$5$l$?J8;zNs$rO"7k$7$FDI2C(B */
  str_len = 0;
  va_start(ap, add);
  for (s = va_arg(ap, char *); s != NULL; s = va_arg(ap, char *)) {
    len = strlen(s);

    exception(str_len + len + 1 > BUF_SIZE, "too long feature string");

    /* $BFbMF$r%3%T!<(B */
    memcpy(str + str_len, s, len);
    str_len += len;
  }
  va_end(ap);
  str[str_len] = '\0';

  idx = sdb_id(ft, str, add);
  if (idx == -1) return 0;

  fv[(*fv_num)++] = idx;

  return 0;
}
