uimaFIT and Groovy allow users to quickly prototype analysis workflows. This works best if the UIMA components that are used in the workflow use the uimaFIT Java annotations like @ConfigurationParameter. But it also works for arbitrary UIMA components.
Example using uimaFIT, Groovy, and OpenNLP
The following example illustrates how to use uimaFIT and Groovy to build and run an analysis pipeline using the Apache OpenNLP UIMA components:
#!/usr/bin/env groovy @Grab(group='org.apache.uima', module='uimafit-core', version='2.1.0') @Grab(group='org.apache.opennlp', module='opennlp-uima', version='1.5.3') import org.apache.uima.fit.pipeline.*; import static org.apache.uima.fit.factory.AnalysisEngineFactory.*; import static org.apache.uima.fit.factory.CollectionReaderFactory.*; import static org.apache.uima.fit.factory.ExternalResourceFactory.*; import static org.apache.uima.fit.factory.JCasFactory.* import static org.apache.uima.fit.util.CasUtil.*; import opennlp.uima.postag.*; import opennlp.uima.tokenize.*; import opennlp.uima.sentdetect.*; import opennlp.uima.util.*; // Create document to be analyzed def document = createJCasFromPath( "http://svn.apache.org/repos/asf/opennlp/tags/opennlp-1.5.3-rc3/opennlp-uima/descriptors/TypeSystem.xml"); document.documentText = """ The quick brown fox jumps over the lazy dog. Later, he jumped over the moon."""; document.documentLanguage = "en"; def tokenType = document.typeSystem.getType("opennlp.uima.Token") def sentenceType = document.typeSystem.getType("opennlp.uima.Sentence") def posFeature = tokenType.getFeatureByBaseName("pos") // Configure sentence detector def sentenceDetector = createEngineDescription( SentenceDetector.class, UimaUtil.SENTENCE_TYPE_PARAMETER, sentenceType.name); createDependencyAndBind(sentenceDetector, UimaUtil.MODEL_PARAMETER, SentenceModelResourceImpl.class, "http://opennlp.sourceforge.net/models-1.5/en-sent.bin"); // Configure tokenizer def tokenizer = createEngineDescription( Tokenizer.class, UimaUtil.TOKEN_TYPE_PARAMETER, tokenType.name, UimaUtil.SENTENCE_TYPE_PARAMETER, sentenceType.name); createDependencyAndBind(tokenizer, UimaUtil.MODEL_PARAMETER, TokenizerModelResourceImpl.class, "http://opennlp.sourceforge.net/models-1.5/en-token.bin"); // Configure part-of-speech tagger def posTagger = createEngineDescription( POSTagger.class, UimaUtil.TOKEN_TYPE_PARAMETER, tokenType.name, UimaUtil.SENTENCE_TYPE_PARAMETER, sentenceType.name, UimaUtil.POS_FEATURE_PARAMETER , posFeature.shortName); createDependencyAndBind(posTagger, UimaUtil.MODEL_PARAMETER, POSModelResourceImpl.class, "http://opennlp.sourceforge.net/models-1.5/en-pos-perceptron.bin"); // Run pipeline SimplePipeline.runPipeline(document, sentenceDetector, tokenizer, posTagger) // Display results select(document.cas, sentenceType).each { sentence -> println "<sentence>" selectCovered(tokenType, sentence).each { token -> println "${token.coveredText} ${token.getFeatureValueAsString(posFeature)}" } println "</sentence>" }