#!/bin/sh

usage () {
    echo "Usage: $0 [-d cdrom_dir]"
    exit 1
}

make_dir () {
    if [ ! -d "$1" ]; then
	mkdir -p $1
    fi
}

CDROM_DIR=/mnt/cdrom
PERL=perl
BASE_DIR=`dirname $0`
DAT_DIR=$BASE_DIR/dat
DEST_DIR=$DAT_DIR/num
SYN_DIR=$DAT_DIR/syn
REL_DIR=$DAT_DIR/rel

while getopts d:h OPT
do
    case $OPT in
	d)  CDROM_DIR=$OPTARG
	    ;;
	h)  usage
	    ;;
	*) usage
	    ;;
    esac
done
shift `expr $OPTIND - 1`

if [ ! -f $CDROM_DIR/mai95.txt ]; then
    echo "Not found: $CDROM_DIR/mai95.txt"
    usage
fi

make_dir $SYN_DIR
make_dir $REL_DIR

echo "converting mai95.txt to SGML ..."
$PERL $BASE_DIR/src/trans.pl < $CDROM_DIR/mai95.txt 2> /dev/null | $PERL $BASE_DIR/src/mainichi.pl $DEST_DIR

echo "making KyotoCorpus (syn) ..."
for i in 01 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
do
    echo "converting 9501$i ..."
    $PERL $BASE_DIR/src/format.pl -exed 9501$i < $DEST_DIR/9501.all | $PERL $BASE_DIR/src/dupli.pl > $DEST_DIR/9501$i.org
    $PERL $BASE_DIR/src/num2KNP.pl $DEST_DIR/9501$i
    mv -f $DEST_DIR/9501$i.KNP $SYN_DIR
done

for i in 01 02 03 04 05 06 07 08 09 10 11 12
do
    echo "converting 95${i}ED ..."
    : > $DEST_DIR/95${i}ED.org
    for j in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
    do
	$PERL $BASE_DIR/src/format.pl -ed 95$i$j < $DEST_DIR/95$i.all | $PERL $BASE_DIR/src/dupli.pl >> $DEST_DIR/95${i}ED.org
    done
    $PERL $BASE_DIR/src/num2KNP.pl $DEST_DIR/95${i}ED
    mv -f $DEST_DIR/95${i}ED.KNP $SYN_DIR
done

echo "making KyotoCorpus (rel) ..."
for i in 01 03 04 05 06 07
do
    echo "converting 9501$i (with relation tags) ..."
    $PERL $BASE_DIR/src/num2KNP.pl --rel $BASE_DIR/src/rel.dat $DEST_DIR/9501$i
    mv -f $DEST_DIR/9501$i.KNP $REL_DIR
done
