/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.brat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import opennlp.tools.formats.brat.BratAnnotation;
import opennlp.tools.formats.brat.BratDocument;
import opennlp.tools.formats.brat.SegmenterObjectStream;
import opennlp.tools.formats.brat.SpanAnnotation;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;

public class BratNameSampleStream
extends SegmenterObjectStream<BratDocument, NameSample> {
    private SentenceDetector sentDetector;
    private Tokenizer tokenizer;

    protected BratNameSampleStream(SentenceDetector sentDetector, Tokenizer tokenizer, ObjectStream<BratDocument> samples) {
        super(samples);
        this.sentDetector = sentDetector;
        this.tokenizer = tokenizer;
    }

    protected BratNameSampleStream(SentenceModel sentModel, TokenizerModel tokenModel, ObjectStream<BratDocument> samples) {
        super(samples);
        this.sentDetector = new SentenceDetectorME(sentModel);
        this.tokenizer = new TokenizerME(tokenModel);
    }

    @Override
    protected List<NameSample> read(BratDocument sample) throws IOException {
        HashSet<String> entityIdSet = new HashSet<String>();
        for (BratAnnotation ann : sample.getAnnotations()) {
            if (!(ann instanceof SpanAnnotation)) continue;
            entityIdSet.add(ann.getId());
        }
        Span[] sentences = this.sentDetector.sentPosDetect(sample.getText());
        ArrayList<NameSample> samples = new ArrayList<NameSample>(sentences.length);
        for (Span sentence : sentences) {
            String sentenceText = sentence.getCoveredText(sample.getText()).toString();
            Span[] tokens = this.tokenizer.tokenizePos(sentenceText);
            HashMap<Integer, Integer> tokenIndexMap = new HashMap<Integer, Integer>();
            for (int i = 0; i < tokens.length; ++i) {
                tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
                tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
            }
            ArrayList<Span> names = new ArrayList<Span>();
            for (BratAnnotation ann : sample.getAnnotations()) {
                SpanAnnotation entity;
                Span entitySpan;
                if (!(ann instanceof SpanAnnotation) || !sentence.contains(entitySpan = (entity = (SpanAnnotation)ann).getSpan())) continue;
                entityIdSet.remove(ann.getId());
                entitySpan = entitySpan.trim(sample.getText());
                Integer nameBeginIndex = (Integer)tokenIndexMap.get(-entitySpan.getStart());
                Integer nameEndIndex = (Integer)tokenIndexMap.get(entitySpan.getEnd());
                if (nameBeginIndex != null && nameEndIndex != null) {
                    names.add(new Span((int)nameBeginIndex, (int)nameEndIndex, entity.getType()));
                    continue;
                }
                System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + sample.getId() + ", it is not matching tokenization!");
            }
            samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, samples.size() == 0));
        }
        for (String id : entityIdSet) {
            System.err.println("Dropped entity " + id + " in document " + sample.getId() + ", is not matching sentence segmentation!");
        }
        return samples;
    }
}

