Integrating Tika With ExtractingRequestHandler

0. Because the online apache repositories do not have Tika-1.8-SNAPSHOT for now, we have to use local maven repository.

1. Checkout tike-trunk:

$ svn co tika-trunk

2. Build Tika

$ cd tika-trunk

$ mvn install

3. Download tika-parsers dependencies:

$ cd tika-parsers

$ mvn dependency:copy-dependencies

4. Checkout lucene-solr-4-10

$ svn checkout lucene_solr_4_10

5. Modified the lucene_solr_4_10/lucene/ivy-setting.xml by uncommenting line 45-52 and line 56:

Line 45-52:

    <filesystem name="local-maven-2" m2compatible="true" local="true">


            pattern="${local-maven2-dir}/[organisation]/[module]/[revision]/[module]-[revision].[ext]    " />


           pattern="${local-maven2-dir}/[organisation]/[module]/[revision]/[module]-[revision].pom"     />


Line 56:

   <resolver ref="local-maven-2" />

6. Replace the lucene_solr_4_10/solr/contrib/extraction/ivy.xml to the following ivy.xml:


<ivy-module version="2.0">

  <info organisation="org.apache.solr" module="extraction"/>

  <configurations defaultconfmapping="compile->master;test->master">

    <conf name="compile" transitive="false"/>

    <conf name="test" transitive="false"/>



    <!-- Tika JARs -->

    <dependency org="org.apache.tika" name="tika-core" rev="1.8-SNAPSHOT" conf="compile"/>

    <dependency org="org.apache.tika" name="tika-parsers" rev="1.8-SNAPSHOT" conf="compile"/>

    <dependency org="org.apache.tika" name="tika-xmp" rev="1.8-SNAPSHOT" conf="compile"/>

    <!-- Tika dependencies - see -->

    <!-- When upgrading Tika, upgrade dependencies versions and add any new ones

         (except slf4j-api, commons-codec, commons-logging, commons-httpclient, geronimo-stax-api_1.0_spec, jcip-annotations, xml-apis, asm)

         WARNING: Don't add netcdf / unidataCommon (partially LGPL code) -->

    <dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.6" conf="compile"/>

    <dependency org="org.gagravarr" name="vorbis-java-core" rev="0.6" conf="compile"/>

    <dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" conf="compile"/>

    <dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" conf="compile"/>

    <dependency org="org.apache.commons" name="commons-compress" rev="1.9" conf="compile"/>

    <dependency org="org.apache.pdfbox" name="pdfbox" rev="1.8.8" conf="compile"/>

    <dependency org="org.apache.pdfbox" name="fontbox" rev="1.8.8" conf="compile"/>

    <dependency org="org.apache.pdfbox" name="jempbox" rev="1.8.8" conf="compile"/>

    <dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" conf="compile"/>

    <dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" conf="compile"/>

    <dependency org="org.apache.poi" name="poi" rev="3.11" conf="compile"/>

    <dependency org="org.apache.poi" name="poi-scratchpad" rev="3.11" conf="compile"/>

    <dependency org="org.apache.poi" name="poi-ooxml" rev="3.11" conf="compile"/>

    <dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.11" conf="compile"/>

    <dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.6.0" conf="compile"/>

    <dependency org="dom4j" name="dom4j" rev="${/dom4j/dom4j}" conf="compile"/>

    <dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" conf="compile"/>

    <dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0.2" conf="compile"/>

    <dependency org="org.aspectj" name="aspectjrt" rev="1.8.0" conf="compile"/>

    <dependency org="com.drewnoakes" name="metadata-extractor" rev="2.6.2" conf="compile"/>

    <dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" conf="compile"/>

    <dependency org="rome" name="rome" rev="1.0" conf="compile"/>

    <dependency org="jdom" name="jdom" rev="1.0" conf="compile"/>

    <dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" conf="compile"/>

    <dependency org="org.tukaani" name="xz" rev="1.5" conf="compile"/>

    <dependency org="com.adobe.xmp" name="xmpcore" rev="5.1.2" conf="compile"/>

    <dependency org="com.uwyn" name="jhighlight" rev="1.0" conf="compile"/>

    <!-- Other ExtractingRequestHandler dependencies -->

    <dependency org="" name="icu4j" rev="${/}" conf="compile"/>

    <dependency org="xerces" name="xercesImpl" rev="${/xerces/xercesImpl}" conf="compile"/>

    <dependency org="org.slf4j" name="jcl-over-slf4j" rev="${/org.slf4j/jcl-over-slf4j}" conf="test"/>

    <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>



7. Compile solr

Change the working directory to lucene_solr_4_10/solr/

$ ant compile

8. Generate new sha1 files for the jars

$ ant jar-checksums

9. Done. Enjoy.

