package opennlp.uima.namefind;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import opennlp.maxent.GIS;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.NameSampleDataStream;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.uima.util.CasConsumerUtil;
import opennlp.uima.util.ContainingConstraint;
import opennlp.uima.util.OpennlpUtil;
import opennlp.uima.util.UimaUtil;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.ProcessTrace;

/* loaded from: input_file:opennlp/uima/namefind/NameFinderTrainer.class */
public final class NameFinderTrainer extends CasConsumer_ImplBase {
    private Logger logger;
    private String modelPath;
    private String additionalTrainingDataFile;
    private String additionalTrainingDataEncoding;
    private Type sentenceType;
    private Type tokenType;
    private Type nameType;
    private String language;
    private int cutoff;
    private int iterations;
    private List<NameSample> nameFinderSamples = new ArrayList();

    public void initialize() throws ResourceInitializationException {
        super.initialize();
        this.logger = getUimaContext().getLogger();
        if (this.logger.isLoggable(Level.INFO)) {
            this.logger.log(Level.INFO, "Initializing the OpenNLP Name Trainer.");
        }
        this.modelPath = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), UimaUtil.MODEL_PARAMETER);
        this.language = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), UimaUtil.LANGUAGE_PARAMETER);
        this.cutoff = CasConsumerUtil.getOptionalIntegerParameter(getUimaContext(), UimaUtil.CUTOFF_PARAMETER, 5).intValue();
        this.iterations = CasConsumerUtil.getOptionalIntegerParameter(getUimaContext(), UimaUtil.ITERATIONS_PARAMETER, 100).intValue();
        this.additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter(getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE);
        if (this.additionalTrainingDataFile != null) {
            this.additionalTrainingDataEncoding = CasConsumerUtil.getRequiredStringParameter(getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING);
        }
    }

    public void typeSystemInit(TypeSystem typeSystem) throws ResourceInitializationException {
        this.sentenceType = CasConsumerUtil.getType(typeSystem, CasConsumerUtil.getRequiredStringParameter(getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER));
        this.tokenType = CasConsumerUtil.getType(typeSystem, CasConsumerUtil.getRequiredStringParameter(getUimaContext(), UimaUtil.TOKEN_TYPE_PARAMETER));
        this.nameType = CasConsumerUtil.getType(typeSystem, CasConsumerUtil.getRequiredStringParameter(getUimaContext(), NameFinder.NAME_TYPE_PARAMETER));
    }

    private static <T> List<T> iteratorToList(Iterator<T> it) {
        LinkedList linkedList = new LinkedList();
        while (it.hasNext()) {
            linkedList.add(it.next());
        }
        return linkedList;
    }

    private static boolean isContaining(AnnotationFS annotationFS, AnnotationFS annotationFS2) {
        if (annotationFS.getBegin() <= annotationFS2.getBegin()) {
            return annotationFS.getEnd() >= annotationFS2.getEnd();
        }
        return false;
    }

    private static Span[] createNames(List<AnnotationFS> list, List<AnnotationFS> list2) {
        LinkedList linkedList = new LinkedList();
        AnnotationFS annotationFS = null;
        int i = -1;
        int i2 = 0;
        for (AnnotationFS annotationFS2 : list) {
            for (AnnotationFS annotationFS3 : list2) {
                if (!isContaining(annotationFS3, annotationFS2)) {
                    if (annotationFS == annotationFS3) {
                        linkedList.add(new Span(i, i2));
                        i = -1;
                        annotationFS = null;
                    }
                }
                if (annotationFS == null && isContaining(annotationFS3, annotationFS2)) {
                    i = i2;
                    annotationFS = annotationFS3;
                }
            }
            i2++;
        }
        if (annotationFS != null) {
            linkedList.add(new Span(i, i2));
        }
        return (Span[]) linkedList.toArray(new Span[linkedList.size()]);
    }

    public void processCas(CAS cas) {
        FSIterator it = cas.getAnnotationIndex(this.sentenceType).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            ContainingConstraint containingConstraint = new ContainingConstraint(annotationFS);
            FSIterator createFilteredIterator = cas.createFilteredIterator(cas.getAnnotationIndex(this.tokenType).iterator(), containingConstraint);
            FSIterator createFilteredIterator2 = cas.createFilteredIterator(cas.getAnnotationIndex(this.nameType).iterator(), containingConstraint);
            List iteratorToList = iteratorToList(createFilteredIterator);
            Span[] createNames = createNames(iteratorToList, iteratorToList(createFilteredIterator2));
            String[] strArr = new String[iteratorToList.size()];
            for (int i = 0; i < strArr.length; i++) {
                strArr[i] = ((AnnotationFS) iteratorToList.get(i)).getCoveredText();
            }
            NameSample nameSample = new NameSample(strArr, createNames, (String[][]) null, false);
            if (nameSample.getSentence().length != 0) {
                this.nameFinderSamples.add(nameSample);
            } else if (this.logger.isLoggable(Level.INFO)) {
                this.logger.log(Level.INFO, "Sentence without tokens: " + annotationFS.getCoveredText());
            }
        }
    }

    public void collectionProcessComplete(ProcessTrace processTrace) throws ResourceProcessException, IOException {
        if (this.logger.isLoggable(Level.INFO)) {
            this.logger.log(Level.INFO, "Collected " + this.nameFinderSamples.size() + " name samples.");
        }
        GIS.PRINT_MESSAGES = false;
        ObjectStream createObjectStream = ObjectStreamUtils.createObjectStream(this.nameFinderSamples);
        FileInputStream fileInputStream = null;
        try {
            if (this.additionalTrainingDataFile != null) {
                if (this.logger.isLoggable(Level.INFO)) {
                    this.logger.log(Level.INFO, "Using addional training data file: " + this.additionalTrainingDataFile);
                }
                fileInputStream = new FileInputStream(this.additionalTrainingDataFile);
                createObjectStream = ObjectStreamUtils.createObjectStream(new ObjectStream[]{createObjectStream, new NameSampleDataStream(new PlainTextByLineStream(new InputStreamReader(fileInputStream, this.additionalTrainingDataEncoding)))});
            }
            TokenNameFinderModel train = NameFinderME.train(this.language, (String) null, createObjectStream, Collections.EMPTY_MAP, this.iterations, this.cutoff);
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            this.nameFinderSamples = null;
            File file = new File(getUimaContextAdmin().getResourceManager().getDataPath() + File.separatorChar + this.modelPath);
            OpennlpUtil.serialize(train, file);
            if (this.logger.isLoggable(Level.INFO)) {
                this.logger.log(Level.INFO, "Model was written to: " + file.getAbsolutePath());
            }
        } catch (Throwable th) {
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            throw th;
        }
    }

    public boolean isStateless() {
        return false;
    }

    public void destroy() {
        this.nameFinderSamples = null;
    }
}
