uimaFIT and Groovy allow users to quickly prototype analysis workflows. This works best if the UIMA components that are used in the workflow use the uimaFIT Java annotations like @ConfigurationParameter. But it also works for arbitrary UIMA components.

Example using uimaFIT, Groovy, and OpenNLP

The following example illustrates how to use uimaFIT and Groovy to build and run an analysis pipeline using the Apache OpenNLP UIMA components:

#!/usr/bin/env groovy
@Grab(group='org.apache.uima', module='uimafit-core', version='2.1.0')
@Grab(group='org.apache.opennlp', module='opennlp-uima', version='1.5.3')

import org.apache.uima.fit.pipeline.*;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.*;
import static org.apache.uima.fit.factory.CollectionReaderFactory.*;
import static org.apache.uima.fit.factory.ExternalResourceFactory.*;
import static org.apache.uima.fit.factory.JCasFactory.*
import static org.apache.uima.fit.util.CasUtil.*;

import opennlp.uima.postag.*;
import opennlp.uima.tokenize.*;
import opennlp.uima.sentdetect.*;
import opennlp.uima.util.*;

// Create document to be analyzed
def document = createJCasFromPath(
  "http://svn.apache.org/repos/asf/opennlp/tags/opennlp-1.5.3-rc3/opennlp-uima/descriptors/TypeSystem.xml");
document.documentText = """
  The quick brown fox jumps over the lazy dog.
  Later, he jumped over the moon.""";
document.documentLanguage = "en";

def tokenType = document.typeSystem.getType("opennlp.uima.Token")
def sentenceType = document.typeSystem.getType("opennlp.uima.Sentence")
def posFeature = tokenType.getFeatureByBaseName("pos")

// Configure sentence detector
def sentenceDetector = createEngineDescription(
  SentenceDetector.class,
  UimaUtil.SENTENCE_TYPE_PARAMETER, sentenceType.name);
createDependencyAndBind(sentenceDetector, 
  UimaUtil.MODEL_PARAMETER,
  SentenceModelResourceImpl.class,
  "http://opennlp.sourceforge.net/models-1.5/en-sent.bin");

// Configure tokenizer
def tokenizer = createEngineDescription(
  Tokenizer.class,
  UimaUtil.TOKEN_TYPE_PARAMETER, tokenType.name,
  UimaUtil.SENTENCE_TYPE_PARAMETER, sentenceType.name);
createDependencyAndBind(tokenizer, 
  UimaUtil.MODEL_PARAMETER,
  TokenizerModelResourceImpl.class,
  "http://opennlp.sourceforge.net/models-1.5/en-token.bin");

// Configure part-of-speech tagger
def posTagger = createEngineDescription(
  POSTagger.class,
  UimaUtil.TOKEN_TYPE_PARAMETER, tokenType.name,
  UimaUtil.SENTENCE_TYPE_PARAMETER, sentenceType.name,
  UimaUtil.POS_FEATURE_PARAMETER , posFeature.shortName);
createDependencyAndBind(posTagger, 
  UimaUtil.MODEL_PARAMETER,
  POSModelResourceImpl.class,
  "http://opennlp.sourceforge.net/models-1.5/en-pos-perceptron.bin");

// Run pipeline
SimplePipeline.runPipeline(document, sentenceDetector, tokenizer, posTagger)

// Display results
select(document.cas, sentenceType).each { sentence ->
  println "<sentence>"
  selectCovered(tokenType, sentence).each { token ->
    println "${token.coveredText}   ${token.getFeatureValueAsString(posFeature)}"
  }
  println "</sentence>"
}
  • No labels