Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java Wed May 13 13:49:36 2015 @@ -1,30 +1,30 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import org.apache.tika.cli.TikaCLI; - -/** - * - * Print the supported Tika Metadata models and their fields. - * - */ -public class DescribeMetadata { - - public static void main(String[] args) throws Exception { - TikaCLI.main(new String[] { "--list-met-models" }); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import org.apache.tika.cli.TikaCLI; + +/** + * + * Print the supported Tika Metadata models and their fields. + * + */ +public class DescribeMetadata { + + public static void main(String[] args) throws Exception { + TikaCLI.main(new String[] { "--list-met-models" }); + } + +}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java Wed May 13 13:49:36 2015 @@ -1,139 +1,139 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.commons.io.FileUtils; -import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -/** - * Parses the output of /bin/ls and counts the number of files and the number of - * executables using Tika. - */ -public class DirListParser implements Parser { - - private static final long serialVersionUID = 2717930544410610735L; - - private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>( - Arrays.asList(MediaType.TEXT_PLAIN)); - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#getSupportedTypes( - * org.apache.tika.parser.ParseContext) - */ - public Set<MediaType> getSupportedTypes(ParseContext context) { - return SUPPORTED_TYPES; - } - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, - * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata) - */ - public void parse(InputStream is, ContentHandler handler, Metadata metadata) - throws IOException, SAXException, TikaException { - this.parse(is, handler, metadata, new ParseContext()); - } - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, - * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, - * org.apache.tika.parser.ParseContext) - */ - public void parse(InputStream is, ContentHandler handler, - Metadata metadata, ParseContext context) throws IOException, - SAXException, TikaException { - - List<String> lines = FileUtils.readLines(TikaInputStream.get(is) - .getFile()); - for (String line : lines) { - String[] fileToks = line.split("\\s+"); - if (fileToks.length < 8) - continue; - String filePermissions = fileToks[0]; - String numHardLinks = fileToks[1]; - String fileOwner = fileToks[2]; - String fileOwnerGroup = fileToks[3]; - String fileSize = fileToks[4]; - StringBuffer lastModDate = new StringBuffer(); - lastModDate.append(fileToks[5]); - lastModDate.append(" "); - lastModDate.append(fileToks[6]); - lastModDate.append(" "); - lastModDate.append(fileToks[7]); - StringBuffer fileName = new StringBuffer(); - for (int i = 8; i < fileToks.length; i++) { - fileName.append(fileToks[i]); - fileName.append(" "); - } - fileName.deleteCharAt(fileName.length() - 1); - this.addMetadata(metadata, filePermissions, numHardLinks, - fileOwner, fileOwnerGroup, fileSize, - lastModDate.toString(), fileName.toString()); - } - } - - public static void main(String[] args) throws IOException, SAXException, - TikaException { - DirListParser parser = new DirListParser(); - Metadata met = new Metadata(); - parser.parse(System.in, new BodyContentHandler(), met); - - System.out.println("Num files: " + met.getValues("Filename").length); - System.out.println("Num executables: " + met.get("NumExecutables")); - } - - private void addMetadata(Metadata metadata, String filePerms, - String numHardLinks, String fileOwner, String fileOwnerGroup, - String fileSize, String lastModDate, String fileName) { - metadata.add("FilePermissions", filePerms); - metadata.add("NumHardLinks", numHardLinks); - metadata.add("FileOwner", fileOwner); - metadata.add("FileOwnerGroup", fileOwnerGroup); - metadata.add("FileSize", fileSize); - metadata.add("LastModifiedDate", lastModDate); - metadata.add("Filename", fileName); - - if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) { - if (metadata.get("NumExecutables") != null) { - int numExecs = Integer.valueOf(metadata.get("NumExecutables")); - numExecs++; - metadata.set("NumExecutables", String.valueOf(numExecs)); - } else { - metadata.set("NumExecutables", "1"); - } - } - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.io.FileUtils; +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +/** + * Parses the output of /bin/ls and counts the number of files and the number of + * executables using Tika. + */ +public class DirListParser implements Parser { + + private static final long serialVersionUID = 2717930544410610735L; + + private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>( + Arrays.asList(MediaType.TEXT_PLAIN)); + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#getSupportedTypes( + * org.apache.tika.parser.ParseContext) + */ + public Set<MediaType> getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, + * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata) + */ + public void parse(InputStream is, ContentHandler handler, Metadata metadata) + throws IOException, SAXException, TikaException { + this.parse(is, handler, metadata, new ParseContext()); + } + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, + * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, + * org.apache.tika.parser.ParseContext) + */ + public void parse(InputStream is, ContentHandler handler, + Metadata metadata, ParseContext context) throws IOException, + SAXException, TikaException { + + List<String> lines = FileUtils.readLines(TikaInputStream.get(is) + .getFile()); + for (String line : lines) { + String[] fileToks = line.split("\\s+"); + if (fileToks.length < 8) + continue; + String filePermissions = fileToks[0]; + String numHardLinks = fileToks[1]; + String fileOwner = fileToks[2]; + String fileOwnerGroup = fileToks[3]; + String fileSize = fileToks[4]; + StringBuffer lastModDate = new StringBuffer(); + lastModDate.append(fileToks[5]); + lastModDate.append(" "); + lastModDate.append(fileToks[6]); + lastModDate.append(" "); + lastModDate.append(fileToks[7]); + StringBuffer fileName = new StringBuffer(); + for (int i = 8; i < fileToks.length; i++) { + fileName.append(fileToks[i]); + fileName.append(" "); + } + fileName.deleteCharAt(fileName.length() - 1); + this.addMetadata(metadata, filePermissions, numHardLinks, + fileOwner, fileOwnerGroup, fileSize, + lastModDate.toString(), fileName.toString()); + } + } + + public static void main(String[] args) throws IOException, SAXException, + TikaException { + DirListParser parser = new DirListParser(); + Metadata met = new Metadata(); + parser.parse(System.in, new BodyContentHandler(), met); + + System.out.println("Num files: " + met.getValues("Filename").length); + System.out.println("Num executables: " + met.get("NumExecutables")); + } + + private void addMetadata(Metadata metadata, String filePerms, + String numHardLinks, String fileOwner, String fileOwnerGroup, + String fileSize, String lastModDate, String fileName) { + metadata.add("FilePermissions", filePerms); + metadata.add("NumHardLinks", numHardLinks); + metadata.add("FileOwner", fileOwner); + metadata.add("FileOwnerGroup", fileOwnerGroup); + metadata.add("FileSize", fileSize); + metadata.add("LastModifiedDate", lastModDate); + metadata.add("Filename", fileName); + + if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) { + if (metadata.get("NumExecutables") != null) { + int numExecs = Integer.valueOf(metadata.get("NumExecutables")); + numExecs++; + metadata.set("NumExecutables", String.valueOf(numExecs)); + } else { + metadata.set("NumExecutables", "1"); + } + } + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java Wed May 13 13:49:36 2015 @@ -1,46 +1,46 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.IOException; -import java.net.URL; - -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.pdf.PDFParser; -import org.apache.tika.sax.BodyContentHandler; -import org.xml.sax.SAXException; - -/** - * Grabs a PDF file from a URL and prints its {@link Metadata} - */ -public class DisplayMetInstance { - - public static Metadata getMet(URL url) throws IOException, SAXException, - TikaException { - Metadata met = new Metadata(); - PDFParser parser = new PDFParser(); - parser.parse(url.openStream(), new BodyContentHandler(), met, - new ParseContext()); - return met; - } - - public static void main(String[] args) throws Exception { - Metadata met = DisplayMetInstance.getMet(new URL(args[0])); - System.out.println(met); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.IOException; +import java.net.URL; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.SAXException; + +/** + * Grabs a PDF file from a URL and prints its {@link Metadata} + */ +public class DisplayMetInstance { + + public static Metadata getMet(URL url) throws IOException, SAXException, + TikaException { + Metadata met = new Metadata(); + PDFParser parser = new PDFParser(); + parser.parse(url.openStream(), new BodyContentHandler(), met, + new ParseContext()); + return met; + } + + public static void main(String[] args) throws Exception { + Metadata met = DisplayMetInstance.getMet(new URL(args[0])); + System.out.println(met); + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java Wed May 13 13:49:36 2015 @@ -1,62 +1,62 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.IOException; -import java.io.InputStream; -import java.security.GeneralSecurityException; -import java.security.Key; - -import javax.crypto.Cipher; -import javax.crypto.CipherInputStream; -import javax.xml.namespace.QName; - -import org.apache.tika.detect.Detector; -import org.apache.tika.detect.XmlRootExtractor; -import org.apache.tika.io.LookaheadInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; - -public class EncryptedPrescriptionDetector implements Detector { - - private static final long serialVersionUID = -1709652690773421147L; - - public MediaType detect(InputStream stream, Metadata metadata) - throws IOException { - Key key = Pharmacy.getKey(); - MediaType type = MediaType.OCTET_STREAM; - - InputStream lookahead = new LookaheadInputStream(stream, 1024); - try { - Cipher cipher = Cipher.getInstance("RSA"); - - cipher.init(Cipher.DECRYPT_MODE, key); - InputStream decrypted = new CipherInputStream(lookahead, cipher); - - QName name = new XmlRootExtractor().extractRootElement(decrypted); - if (name != null - && "http://example.com/xpd".equals(name.getNamespaceURI()) - && "prescription".equals(name.getLocalPart())) { - type = MediaType.application("x-prescription"); - } - } catch (GeneralSecurityException e) { - // unable to decrypt, fall through - } finally { - lookahead.close(); - } - return type; - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.security.Key; + +import javax.crypto.Cipher; +import javax.crypto.CipherInputStream; +import javax.xml.namespace.QName; + +import org.apache.tika.detect.Detector; +import org.apache.tika.detect.XmlRootExtractor; +import org.apache.tika.io.LookaheadInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; + +public class EncryptedPrescriptionDetector implements Detector { + + private static final long serialVersionUID = -1709652690773421147L; + + public MediaType detect(InputStream stream, Metadata metadata) + throws IOException { + Key key = Pharmacy.getKey(); + MediaType type = MediaType.OCTET_STREAM; + + InputStream lookahead = new LookaheadInputStream(stream, 1024); + try { + Cipher cipher = Cipher.getInstance("RSA"); + + cipher.init(Cipher.DECRYPT_MODE, key); + InputStream decrypted = new CipherInputStream(lookahead, cipher); + + QName name = new XmlRootExtractor().extractRootElement(decrypted); + if (name != null + && "http://example.com/xpd".equals(name.getNamespaceURI()) + && "prescription".equals(name.getLocalPart())) { + type = MediaType.application("x-prescription"); + } + } catch (GeneralSecurityException e) { + // unable to decrypt, fall through + } finally { + lookahead.close(); + } + return type; + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java Wed May 13 13:49:36 2015 @@ -1,60 +1,60 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.IOException; -import java.io.InputStream; -import java.security.GeneralSecurityException; -import java.security.Key; -import java.util.Collections; -import java.util.Set; - -import javax.crypto.Cipher; -import javax.crypto.CipherInputStream; - -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.AbstractParser; -import org.apache.tika.parser.ParseContext; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -public class EncryptedPrescriptionParser extends AbstractParser { - - private static final long serialVersionUID = -7816987249611278541L; - - public void parse(InputStream stream, ContentHandler handler, - Metadata metadata, ParseContext context) throws IOException, - SAXException, TikaException { - try { - Key key = Pharmacy.getKey(); - Cipher cipher = Cipher.getInstance("RSA"); - cipher.init(Cipher.DECRYPT_MODE, key); - InputStream decrypted = new CipherInputStream(stream, cipher); - - new PrescriptionParser().parse(decrypted, handler, metadata, - context); - } catch (GeneralSecurityException e) { - throw new TikaException("Unable to decrypt a digital prescription", - e); - } - } - - public Set<MediaType> getSupportedTypes(ParseContext context) { - return Collections.singleton(MediaType.application("x-prescription")); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.security.Key; +import java.util.Collections; +import java.util.Set; + +import javax.crypto.Cipher; +import javax.crypto.CipherInputStream; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +public class EncryptedPrescriptionParser extends AbstractParser { + + private static final long serialVersionUID = -7816987249611278541L; + + public void parse(InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) throws IOException, + SAXException, TikaException { + try { + Key key = Pharmacy.getKey(); + Cipher cipher = Cipher.getInstance("RSA"); + cipher.init(Cipher.DECRYPT_MODE, key); + InputStream decrypted = new CipherInputStream(stream, cipher); + + new PrescriptionParser().parse(decrypted, handler, metadata, + context); + } catch (GeneralSecurityException e) { + throw new TikaException("Unable to decrypt a digital prescription", + e); + } + } + + public Set<MediaType> getSupportedTypes(ParseContext context) { + return Collections.singleton(MediaType.application("x-prescription")); + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java Wed May 13 13:49:36 2015 @@ -1,59 +1,59 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.IOException; - -import org.apache.tika.language.LanguageIdentifier; -import org.apache.tika.language.LanguageProfile; -import org.apache.tika.language.ProfilingHandler; -import org.apache.tika.language.ProfilingWriter; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.ParseContext; - -public class Language { - - public static void languageDetection() throws IOException { - LanguageProfile profile = new LanguageProfile( - "Alla människor är födda fria och" - + " lika i värde och rättigheter."); - - LanguageIdentifier identifier = new LanguageIdentifier(profile); - System.out.println(identifier.getLanguage()); - } - - public static void languageDetectionWithWriter() throws IOException { - ProfilingWriter writer = new ProfilingWriter(); - writer.append("Minden emberi lény"); - writer.append(" szabadon születik és"); - writer.append(" egyenlŠméltósága és"); - writer.append(" joga van."); - - LanguageIdentifier identifier = writer.getLanguage(); - System.out.println(identifier.getLanguage()); - writer.close(); - - } - - public static void languageDetectionWithHandler() throws Exception { - ProfilingHandler handler = new ProfilingHandler(); - new AutoDetectParser().parse(System.in, handler, new Metadata(), - new ParseContext()); - - LanguageIdentifier identifier = handler.getLanguage(); - System.out.println(identifier.getLanguage()); - } -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.IOException; + +import org.apache.tika.language.LanguageIdentifier; +import org.apache.tika.language.LanguageProfile; +import org.apache.tika.language.ProfilingHandler; +import org.apache.tika.language.ProfilingWriter; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; + +public class Language { + + public static void languageDetection() throws IOException { + LanguageProfile profile = new LanguageProfile( + "Alla människor är födda fria och" + + " lika i värde och rättigheter."); + + LanguageIdentifier identifier = new LanguageIdentifier(profile); + System.out.println(identifier.getLanguage()); + } + + public static void languageDetectionWithWriter() throws IOException { + ProfilingWriter writer = new ProfilingWriter(); + writer.append("Minden emberi lény"); + writer.append(" szabadon születik és"); + writer.append(" egyenlŠméltósága és"); + writer.append(" joga van."); + + LanguageIdentifier identifier = writer.getLanguage(); + System.out.println(identifier.getLanguage()); + writer.close(); + + } + + public static void languageDetectionWithHandler() throws Exception { + ProfilingHandler handler = new ProfilingHandler(); + new AutoDetectParser().parse(System.in, handler, new Metadata(), + new ParseContext()); + + LanguageIdentifier identifier = handler.getLanguage(); + System.out.println(identifier.getLanguage()); + } +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java Wed May 13 13:49:36 2015 @@ -1,49 +1,49 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.tika.exception.TikaException; -import org.apache.tika.language.LanguageIdentifier; -import org.apache.tika.language.ProfilingHandler; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.DelegatingParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.sax.TeeContentHandler; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -@SuppressWarnings("deprecation") -public class LanguageDetectingParser extends DelegatingParser { - - private static final long serialVersionUID = 4291320409396502774L; - - public void parse(InputStream stream, ContentHandler handler, - final Metadata metadata, ParseContext context) throws SAXException, - IOException, TikaException { - ProfilingHandler profiler = new ProfilingHandler(); - ContentHandler tee = new TeeContentHandler(handler, profiler); - - super.parse(stream, tee, metadata, context); - - LanguageIdentifier identifier = profiler.getLanguage(); - if (identifier.isReasonablyCertain()) { - metadata.set(Metadata.LANGUAGE, identifier.getLanguage()); - } - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.language.LanguageIdentifier; +import org.apache.tika.language.ProfilingHandler; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.DelegatingParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.TeeContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +@SuppressWarnings("deprecation") +public class LanguageDetectingParser extends DelegatingParser { + + private static final long serialVersionUID = 4291320409396502774L; + + public void parse(InputStream stream, ContentHandler handler, + final Metadata metadata, ParseContext context) throws SAXException, + IOException, TikaException { + ProfilingHandler profiler = new ProfilingHandler(); + ContentHandler tee = new TeeContentHandler(handler, profiler); + + super.parse(stream, tee, metadata, context); + + LanguageIdentifier identifier = profiler.getLanguage(); + if (identifier.isReasonablyCertain()) { + metadata.set(Metadata.LANGUAGE, identifier.getLanguage()); + } + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java Wed May 13 13:49:36 2015 @@ -1,46 +1,46 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -/* */ -import java.io.File; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.IndexWriter; -import org.apache.tika.Tika; - -public class LuceneIndexer { - - private final Tika tika; - - private final IndexWriter writer; - - public LuceneIndexer(Tika tika, IndexWriter writer) { - this.tika = tika; - this.writer = writer; - } - - public void indexDocument(File file) throws Exception { - Document document = new Document(); - document.add(new Field("filename", file.getName(), Store.YES, - Index.ANALYZED)); - document.add(new Field("fulltext", tika.parseToString(file), Store.NO, - Index.ANALYZED)); - writer.addDocument(document); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +/* */ +import java.io.File; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriter; +import org.apache.tika.Tika; + +public class LuceneIndexer { + + private final Tika tika; + + private final IndexWriter writer; + + public LuceneIndexer(Tika tika, IndexWriter writer) { + this.tika = tika; + this.writer = writer; + } + + public void indexDocument(File file) throws Exception { + Document document = new Document(); + document.add(new Field("filename", file.getName(), Store.YES, + Index.ANALYZED)); + document.add(new Field("fulltext", tika.parseToString(file), Store.NO, + Index.ANALYZED)); + writer.addDocument(document); + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java Wed May 13 13:49:36 2015 @@ -1,70 +1,70 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.File; -import java.io.Reader; - -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.store.SimpleFSDirectory; -import org.apache.lucene.util.Version; -import org.apache.tika.Tika; - -@SuppressWarnings("deprecation") -public class LuceneIndexerExtended { - - private final IndexWriter writer; - - private final Tika tika; - - public LuceneIndexerExtended(IndexWriter writer, Tika tika) { - this.writer = writer; - this.tika = tika; - } - - public static void main(String[] args) throws Exception { - IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new File( - args[0])), new StandardAnalyzer(Version.LUCENE_30), - MaxFieldLength.UNLIMITED); - try { - LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer); - for (int i = 1; i < args.length; i++) { - indexer.indexDocument(new File(args[i])); - } - } finally { - writer.close(); - } - } - - public void indexDocument(File file) throws Exception { - Reader fulltext = tika.parse(file); - try { - Document document = new Document(); - document.add(new Field("filename", file.getName(), Store.YES, - Index.ANALYZED)); - document.add(new Field("fulltext", fulltext)); - writer.addDocument(document); - } finally { - fulltext.close(); - } - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.File; +import java.io.Reader; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.SimpleFSDirectory; +import org.apache.lucene.util.Version; +import org.apache.tika.Tika; + +@SuppressWarnings("deprecation") +public class LuceneIndexerExtended { + + private final IndexWriter writer; + + private final Tika tika; + + public LuceneIndexerExtended(IndexWriter writer, Tika tika) { + this.writer = writer; + this.tika = tika; + } + + public static void main(String[] args) throws Exception { + IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new File( + args[0])), new StandardAnalyzer(Version.LUCENE_30), + MaxFieldLength.UNLIMITED); + try { + LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer); + for (int i = 1; i < args.length; i++) { + indexer.indexDocument(new File(args[i])); + } + } finally { + writer.close(); + } + } + + public void indexDocument(File file) throws Exception { + Reader fulltext = tika.parse(file); + try { + Document document = new Document(); + document.add(new Field("filename", file.getName(), Store.YES, + Index.ANALYZED)); + document.add(new Field("fulltext", fulltext)); + writer.addDocument(document); + } finally { + fulltext.close(); + } + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java Wed May 13 13:49:36 2015 @@ -1,58 +1,58 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.util.Map; -import java.util.Set; - -import org.apache.tika.mime.MediaType; -import org.apache.tika.mime.MediaTypeRegistry; - -public class MediaTypeExample { - - public static void describeMediaType() { - - MediaType type = MediaType.parse("text/plain; charset=UTF-8"); - - System.out.println("type: " + type.getType()); - System.out.println("subtype: " + type.getSubtype()); - - Map<String, String> parameters = type.getParameters(); - System.out.println("parameters:"); - for (String name : parameters.keySet()) { - System.out.println(" " + name + "=" + parameters.get(name)); - } - } - - public static void listAllTypes() { - MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry(); - - for (MediaType type : registry.getTypes()) { - Set<MediaType> aliases = registry.getAliases(type); - System.out.println(type + ", also known as " + aliases); - } - } - - public static void main(String[] args) throws Exception { - MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry(); - - MediaType type = MediaType.parse("image/svg+xml"); - while (type != null) { - System.out.println(type); - type = registry.getSupertype(type); - } - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.util.Map; +import java.util.Set; + +import org.apache.tika.mime.MediaType; +import org.apache.tika.mime.MediaTypeRegistry; + +public class MediaTypeExample { + + public static void describeMediaType() { + + MediaType type = MediaType.parse("text/plain; charset=UTF-8"); + + System.out.println("type: " + type.getType()); + System.out.println("subtype: " + type.getSubtype()); + + Map<String, String> parameters = type.getParameters(); + System.out.println("parameters:"); + for (String name : parameters.keySet()) { + System.out.println(" " + name + "=" + parameters.get(name)); + } + } + + public static void listAllTypes() { + MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry(); + + for (MediaType type : registry.getTypes()) { + Set<MediaType> aliases = registry.getAliases(type); + System.out.println(type + ", also known as " + aliases); + } + } + + public static void main(String[] args) throws Exception { + MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry(); + + MediaType type = MediaType.parse("image/svg+xml"); + while (type != null) { + System.out.println(type); + type = registry.getSupertype(type); + } + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java Wed May 13 13:49:36 2015 @@ -1,93 +1,93 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.util.Date; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.IndexWriter; -import org.apache.tika.Tika; -import org.apache.tika.metadata.DublinCore; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.Property; - -/** - * Builds on the LuceneIndexer from Chapter 5 and adds indexing of Metadata. - */ -@SuppressWarnings("deprecation") -public class MetadataAwareLuceneIndexer { - - private Tika tika; - - private IndexWriter writer; - - public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) { - this.writer = writer; - this.tika = tika; - } - - public void indexContentSpecificMet(File file) throws Exception { - Metadata met = new Metadata(); - InputStream is = new FileInputStream(file); - try { - tika.parse(is, met); - Document document = new Document(); - for (String key : met.names()) { - String[] values = met.getValues(key); - for (String val : values) { - document.add(new Field(key, val, Store.YES, Index.ANALYZED)); - } - writer.addDocument(document); - } - } finally { - is.close(); - } - } - - public void indexWithDublinCore(File file) throws Exception { - Metadata met = new Metadata(); - met.add(Metadata.CREATOR, "Manning"); - met.add(Metadata.CREATOR, "Tika in Action"); - met.set(Metadata.DATE, new Date()); - met.set(Metadata.FORMAT, tika.detect(file)); - met.set(DublinCore.SOURCE, file.toURI().toURL().toString()); - met.add(Metadata.SUBJECT, "File"); - met.add(Metadata.SUBJECT, "Indexing"); - met.add(Metadata.SUBJECT, "Metadata"); - met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public", - "private"), "public"); - InputStream is = new FileInputStream(file); - try { - tika.parse(is, met); - Document document = new Document(); - for (String key : met.names()) { - String[] values = met.getValues(key); - for (String val : values) { - document.add(new Field(key, val, Store.YES, Index.ANALYZED)); - } - writer.addDocument(document); - } - } finally { - is.close(); - } - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Date; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriter; +import org.apache.tika.Tika; +import org.apache.tika.metadata.DublinCore; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; + +/** + * Builds on the LuceneIndexer from Chapter 5 and adds indexing of Metadata. + */ +@SuppressWarnings("deprecation") +public class MetadataAwareLuceneIndexer { + + private Tika tika; + + private IndexWriter writer; + + public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) { + this.writer = writer; + this.tika = tika; + } + + public void indexContentSpecificMet(File file) throws Exception { + Metadata met = new Metadata(); + InputStream is = new FileInputStream(file); + try { + tika.parse(is, met); + Document document = new Document(); + for (String key : met.names()) { + String[] values = met.getValues(key); + for (String val : values) { + document.add(new Field(key, val, Store.YES, Index.ANALYZED)); + } + writer.addDocument(document); + } + } finally { + is.close(); + } + } + + public void indexWithDublinCore(File file) throws Exception { + Metadata met = new Metadata(); + met.add(Metadata.CREATOR, "Manning"); + met.add(Metadata.CREATOR, "Tika in Action"); + met.set(Metadata.DATE, new Date()); + met.set(Metadata.FORMAT, tika.detect(file)); + met.set(DublinCore.SOURCE, file.toURI().toURL().toString()); + met.add(Metadata.SUBJECT, "File"); + met.add(Metadata.SUBJECT, "Indexing"); + met.add(Metadata.SUBJECT, "Metadata"); + met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public", + "private"), "public"); + InputStream is = new FileInputStream(file); + try { + tika.parse(is, met); + Document document = new Document(); + for (String key : met.names()) { + String[] values = met.getValues(key); + for (String val : values) { + document.add(new Field(key, val, Store.YES, Index.ANALYZED)); + } + writer.addDocument(document); + } + } finally { + is.close(); + } + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Wed May 13 13:49:36 2015 @@ -1,79 +1,79 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.File; - -import org.apache.commons.io.FileUtils; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.detect.Detector; -import org.apache.tika.language.LanguageIdentifier; -import org.apache.tika.language.LanguageProfile; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.mime.MimeTypes; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; -import org.xml.sax.ContentHandler; - -/** - * Demonstrates how to call the different components within Tika: its - * {@link Detector} framework (aka MIME identification and repository), its - * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies. - */ - -@SuppressWarnings("deprecation") -public class MyFirstTika { - - public static void main(String[] args) throws Exception { - String filename = args[0]; - MimeTypes mimeRegistry = TikaConfig.getDefaultConfig() - .getMimeRepository(); - - System.out.println("Examining: [" + filename + "]"); - - System.out.println("The MIME type (based on filename) is: [" - + mimeRegistry.getMimeType(filename) + "]"); - - System.out.println("The MIME type (based on MAGIC) is: [" - + mimeRegistry.getMimeType(new File(filename)) + "]"); - - Detector mimeDetector = (Detector) mimeRegistry; - System.out - .println("The MIME type (based on the Detector interface) is: [" - + mimeDetector.detect(new File(filename).toURI().toURL() - .openStream(), new Metadata()) + "]"); - - LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile( - FileUtils.readFileToString(new File(filename)))); - - System.out.println("The language of this content is: [" - + lang.getLanguage() + "]"); - - Parser parser = TikaConfig.getDefaultConfig().getParser( - MediaType.parse(mimeRegistry.getMimeType(filename).getName())); - Metadata parsedMet = new Metadata(); - ContentHandler handler = new BodyContentHandler(); - parser.parse(new File(filename).toURI().toURL().openStream(), handler, - parsedMet, new ParseContext()); - - System.out.println("Parsed Metadata: "); - System.out.println(parsedMet); - System.out.println("Parsed Text: "); - System.out.println(handler.toString()); - - } -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.File; + +import org.apache.commons.io.FileUtils; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.detect.Detector; +import org.apache.tika.language.LanguageIdentifier; +import org.apache.tika.language.LanguageProfile; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.mime.MimeTypes; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; + +/** + * Demonstrates how to call the different components within Tika: its + * {@link Detector} framework (aka MIME identification and repository), its + * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies. + */ + +@SuppressWarnings("deprecation") +public class MyFirstTika { + + public static void main(String[] args) throws Exception { + String filename = args[0]; + MimeTypes mimeRegistry = TikaConfig.getDefaultConfig() + .getMimeRepository(); + + System.out.println("Examining: [" + filename + "]"); + + System.out.println("The MIME type (based on filename) is: [" + + mimeRegistry.getMimeType(filename) + "]"); + + System.out.println("The MIME type (based on MAGIC) is: [" + + mimeRegistry.getMimeType(new File(filename)) + "]"); + + Detector mimeDetector = (Detector) mimeRegistry; + System.out + .println("The MIME type (based on the Detector interface) is: [" + + mimeDetector.detect(new File(filename).toURI().toURL() + .openStream(), new Metadata()) + "]"); + + LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile( + FileUtils.readFileToString(new File(filename)))); + + System.out.println("The language of this content is: [" + + lang.getLanguage() + "]"); + + Parser parser = TikaConfig.getDefaultConfig().getParser( + MediaType.parse(mimeRegistry.getMimeType(filename).getName())); + Metadata parsedMet = new Metadata(); + ContentHandler handler = new BodyContentHandler(); + parser.parse(new File(filename).toURI().toURL().openStream(), handler, + parsedMet, new ParseContext()); + + System.out.println("Parsed Metadata: "); + System.out.println(parsedMet); + System.out.println("Parsed Text: "); + System.out.println(handler.toString()); + + } +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java Wed May 13 13:49:36 2015 @@ -1,52 +1,52 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.util.Collections; -import java.util.Set; - -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.xml.ElementMetadataHandler; -import org.apache.tika.parser.xml.XMLParser; -import org.apache.tika.sax.TeeContentHandler; -import org.xml.sax.ContentHandler; - -public class PrescriptionParser extends XMLParser { - - private static final long serialVersionUID = 7690682277511967388L; - - @Override - protected ContentHandler getContentHandler(ContentHandler handler, - Metadata metadata, ParseContext context) { - String xpd = "http://example.com/2011/xpd"; - - ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor", - metadata, "xpd:doctor"); - ContentHandler patient = new ElementMetadataHandler(xpd, "patient", - metadata, "xpd:patient"); - - return new TeeContentHandler(super.getContentHandler(handler, metadata, - context), doctor, patient); - } - - @Override - public Set<MediaType> getSupportedTypes(ParseContext context) { - return Collections.singleton(MediaType - .application("x-prescription+xml")); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.util.Collections; +import java.util.Set; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.xml.ElementMetadataHandler; +import org.apache.tika.parser.xml.XMLParser; +import org.apache.tika.sax.TeeContentHandler; +import org.xml.sax.ContentHandler; + +public class PrescriptionParser extends XMLParser { + + private static final long serialVersionUID = 7690682277511967388L; + + @Override + protected ContentHandler getContentHandler(ContentHandler handler, + Metadata metadata, ParseContext context) { + String xpd = "http://example.com/2011/xpd"; + + ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor", + metadata, "xpd:doctor"); + ContentHandler patient = new ElementMetadataHandler(xpd, "patient", + metadata, "xpd:patient"); + + return new TeeContentHandler(super.getContentHandler(handler, metadata, + context), doctor, patient); + } + + @Override + public Set<MediaType> getSupportedTypes(ParseContext context) { + return Collections.singleton(MediaType + .application("x-prescription+xml")); + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java Wed May 13 13:49:36 2015 @@ -1,148 +1,148 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.File; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.GregorianCalendar; -import java.util.Locale; -import java.util.TimeZone; - -import org.apache.jackrabbit.util.ISO8601; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.TopScoreDocCollector; -import org.apache.lucene.store.SimpleFSDirectory; -import org.apache.tika.metadata.DublinCore; -import org.apache.tika.metadata.Metadata; - -/** - * - * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6 - * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within - * the last N minutes. - */ -@SuppressWarnings("deprecation") -public class RecentFiles { - - private IndexReader reader; - - private SimpleDateFormat rssDateFormat = new SimpleDateFormat( - "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault()); - - public String generateRSS(File indexFile) throws CorruptIndexException, - IOException { - StringBuffer output = new StringBuffer(); - output.append(getRSSHeaders()); - IndexSearcher searcher = null; - try { - reader = IndexReader.open(new SimpleFSDirectory(indexFile)); - searcher = new IndexSearcher(reader); - GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); - gc.setTime(new Date()); - String nowDateTime = ISO8601.format(gc); - gc.add(java.util.GregorianCalendar.MINUTE, -5); - String fiveMinsAgo = ISO8601.format(gc); - TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), - fiveMinsAgo, nowDateTime, true, true); - TopScoreDocCollector collector = TopScoreDocCollector.create(20, - true); - searcher.search(query, collector); - ScoreDoc[] hits = collector.topDocs().scoreDocs; - for (int i = 0; i < hits.length; i++) { - Document doc = searcher.doc(hits[i].doc); - output.append(getRSSItem(doc)); - } - - } finally { - if (reader != null) reader.close(); - if (searcher != null) searcher.close(); - } - - output.append(getRSSFooters()); - return output.toString(); - } - - public String getRSSItem(Document doc) { - StringBuffer output = new StringBuffer(); - output.append("<item>"); - output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()), - "isPermalink", "true")); - output.append(emitTag("title", doc.get(Metadata.TITLE), null, null)); - output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()), - null, null)); - output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null)); - for (String topic : doc.getValues(Metadata.SUBJECT)) { - output.append(emitTag("category", topic, null, null)); - } - output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc - .get(Metadata.DATE.toString()))), null, null)); - output.append(emitTag("description", doc.get(Metadata.TITLE), null, - null)); - output.append("</item>"); - return output.toString(); - } - - public String getRSSHeaders() { - StringBuffer output = new StringBuffer(); - output.append("<?xml version=\"1.0\" encoding=\"utf-8\">"); - output.append("<rss version=\"2.0\">"); - output.append(" <channel>"); - output.append(" <title>Tika in Action: Recent Files Feed." - + "</title>"); - output.append(" <description>Chapter 6 Examples demonstrating " - + "use of Tika Metadata for RSS.</description>"); - output.append(" <link>tikainaction.rss</link>"); - output.append(" <lastBuildDate>" + rssDateFormat.format(new Date()) - + "</lastBuildDate>"); - output.append(" <generator>Manning Publications: Tika in Action" - + "</generator>"); - output.append(" <copyright>All Rights Reserved</copyright>"); - return output.toString(); - } - - public String getRSSFooters() { - StringBuffer output = new StringBuffer(); - output.append(" </channel>"); - return output.toString(); - } - - private String emitTag(String tagName, String value, String attributeName, - String attributeValue) { - StringBuffer output = new StringBuffer(); - output.append("<"); - output.append(tagName); - if (attributeName != null) { - output.append(" "); - output.append(attributeName); - output.append("=\""); - output.append(attributeValue); - output.append("\""); - } - output.append(">"); - output.append(value); - output.append("</"); - output.append(tagName); - output.append(">"); - return output.toString(); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.File; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.Locale; +import java.util.TimeZone; + +import org.apache.jackrabbit.util.ISO8601; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.SimpleFSDirectory; +import org.apache.tika.metadata.DublinCore; +import org.apache.tika.metadata.Metadata; + +/** + * + * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6 + * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within + * the last N minutes. + */ +@SuppressWarnings("deprecation") +public class RecentFiles { + + private IndexReader reader; + + private SimpleDateFormat rssDateFormat = new SimpleDateFormat( + "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault()); + + public String generateRSS(File indexFile) throws CorruptIndexException, + IOException { + StringBuffer output = new StringBuffer(); + output.append(getRSSHeaders()); + IndexSearcher searcher = null; + try { + reader = IndexReader.open(new SimpleFSDirectory(indexFile)); + searcher = new IndexSearcher(reader); + GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); + gc.setTime(new Date()); + String nowDateTime = ISO8601.format(gc); + gc.add(java.util.GregorianCalendar.MINUTE, -5); + String fiveMinsAgo = ISO8601.format(gc); + TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), + fiveMinsAgo, nowDateTime, true, true); + TopScoreDocCollector collector = TopScoreDocCollector.create(20, + true); + searcher.search(query, collector); + ScoreDoc[] hits = collector.topDocs().scoreDocs; + for (int i = 0; i < hits.length; i++) { + Document doc = searcher.doc(hits[i].doc); + output.append(getRSSItem(doc)); + } + + } finally { + if (reader != null) reader.close(); + if (searcher != null) searcher.close(); + } + + output.append(getRSSFooters()); + return output.toString(); + } + + public String getRSSItem(Document doc) { + StringBuffer output = new StringBuffer(); + output.append("<item>"); + output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()), + "isPermalink", "true")); + output.append(emitTag("title", doc.get(Metadata.TITLE), null, null)); + output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()), + null, null)); + output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null)); + for (String topic : doc.getValues(Metadata.SUBJECT)) { + output.append(emitTag("category", topic, null, null)); + } + output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc + .get(Metadata.DATE.toString()))), null, null)); + output.append(emitTag("description", doc.get(Metadata.TITLE), null, + null)); + output.append("</item>"); + return output.toString(); + } + + public String getRSSHeaders() { + StringBuffer output = new StringBuffer(); + output.append("<?xml version=\"1.0\" encoding=\"utf-8\">"); + output.append("<rss version=\"2.0\">"); + output.append(" <channel>"); + output.append(" <title>Tika in Action: Recent Files Feed." + + "</title>"); + output.append(" <description>Chapter 6 Examples demonstrating " + + "use of Tika Metadata for RSS.</description>"); + output.append(" <link>tikainaction.rss</link>"); + output.append(" <lastBuildDate>" + rssDateFormat.format(new Date()) + + "</lastBuildDate>"); + output.append(" <generator>Manning Publications: Tika in Action" + + "</generator>"); + output.append(" <copyright>All Rights Reserved</copyright>"); + return output.toString(); + } + + public String getRSSFooters() { + StringBuffer output = new StringBuffer(); + output.append(" </channel>"); + return output.toString(); + } + + private String emitTag(String tagName, String value, String attributeName, + String attributeValue) { + StringBuffer output = new StringBuffer(); + output.append("<"); + output.append(tagName); + if (attributeName != null) { + output.append(" "); + output.append(attributeName); + output.append("=\""); + output.append(attributeValue); + output.append("\""); + } + output.append(">"); + output.append(value); + output.append("</"); + output.append(tagName); + output.append(">"); + return output.toString(); + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java Wed May 13 13:49:36 2015 @@ -1,141 +1,141 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.File; -import java.io.FileFilter; -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.commons.io.IOUtils; -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.Link; -import org.apache.tika.sax.LinkContentHandler; -import org.apache.tika.sax.XHTMLContentHandler; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -/** - * Demonstrates Tika and its ability to sense symlinks. - */ -@SuppressWarnings("deprecation") -public class RollbackSoftware { - - public static void main(String[] args) throws Exception { - RollbackSoftware r = new RollbackSoftware(); - r.rollback(new File(args[0])); - } - - public void rollback(File deployArea) throws IOException, SAXException, - TikaException { - LinkContentHandler handler = new LinkContentHandler(); - Metadata met = new Metadata(); - DeploymentAreaParser parser = new DeploymentAreaParser(); - parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath()), - handler, met); - List<Link> links = handler.getLinks(); - if (links.size() < 2) - throw new IOException("Must have installed at least 2 versions!"); - Collections.sort(links, new Comparator<Link>() { - public int compare(Link o1, Link o2) { - return o1.getText().compareTo(o2.getText()); - } - }); - - this.updateVersion(links.get(links.size() - 2).getText()); - - } - - private void updateVersion(String version) { - System.out.println("Rolling back to version: [" + version + "]"); - } - - class DeploymentAreaParser implements Parser { - - private static final long serialVersionUID = -2356647405087933468L; - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#getSupportedTypes( - * org.apache.tika.parser.ParseContext) - */ - public Set<MediaType> getSupportedTypes(ParseContext context) { - return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays - .asList(MediaType.TEXT_PLAIN))); - } - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, - * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata) - */ - public void parse(InputStream is, ContentHandler handler, - Metadata metadata) throws IOException, SAXException, - TikaException { - parse(is, handler, metadata, new ParseContext()); - } - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, - * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, - * org.apache.tika.parser.ParseContext) - */ - - public void parse(InputStream is, ContentHandler handler, - Metadata metadata, ParseContext context) throws IOException, - SAXException, TikaException { - - File deployArea = new File(IOUtils.toString(is)); - File[] versions = deployArea.listFiles(new FileFilter() { - - public boolean accept(File pathname) { - return !pathname.getName().startsWith("current"); - } - }); - - XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, - metadata); - xhtml.startDocument(); - for (File v : versions) { - if (isSymlink(v)) - continue; - xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm()); - xhtml.characters(v.getName()); - xhtml.endElement("a"); - } - - } - - } - - private boolean isSymlink(File f) throws IOException { - return !f.getAbsolutePath().equals(f.getCanonicalPath()); - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.io.IOUtils; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.Link; +import org.apache.tika.sax.LinkContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +/** + * Demonstrates Tika and its ability to sense symlinks. + */ +@SuppressWarnings("deprecation") +public class RollbackSoftware { + + public static void main(String[] args) throws Exception { + RollbackSoftware r = new RollbackSoftware(); + r.rollback(new File(args[0])); + } + + public void rollback(File deployArea) throws IOException, SAXException, + TikaException { + LinkContentHandler handler = new LinkContentHandler(); + Metadata met = new Metadata(); + DeploymentAreaParser parser = new DeploymentAreaParser(); + parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath()), + handler, met); + List<Link> links = handler.getLinks(); + if (links.size() < 2) + throw new IOException("Must have installed at least 2 versions!"); + Collections.sort(links, new Comparator<Link>() { + public int compare(Link o1, Link o2) { + return o1.getText().compareTo(o2.getText()); + } + }); + + this.updateVersion(links.get(links.size() - 2).getText()); + + } + + private void updateVersion(String version) { + System.out.println("Rolling back to version: [" + version + "]"); + } + + class DeploymentAreaParser implements Parser { + + private static final long serialVersionUID = -2356647405087933468L; + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#getSupportedTypes( + * org.apache.tika.parser.ParseContext) + */ + public Set<MediaType> getSupportedTypes(ParseContext context) { + return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays + .asList(MediaType.TEXT_PLAIN))); + } + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, + * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata) + */ + public void parse(InputStream is, ContentHandler handler, + Metadata metadata) throws IOException, SAXException, + TikaException { + parse(is, handler, metadata, new ParseContext()); + } + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, + * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, + * org.apache.tika.parser.ParseContext) + */ + + public void parse(InputStream is, ContentHandler handler, + Metadata metadata, ParseContext context) throws IOException, + SAXException, TikaException { + + File deployArea = new File(IOUtils.toString(is)); + File[] versions = deployArea.listFiles(new FileFilter() { + + public boolean accept(File pathname) { + return !pathname.getName().startsWith("current"); + } + }); + + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, + metadata); + xhtml.startDocument(); + for (File v : versions) { + if (isSymlink(v)) + continue; + xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm()); + xhtml.characters(v.getName()); + xhtml.endElement("a"); + } + + } + + } + + private boolean isSymlink(File f) throws IOException { + return !f.getAbsolutePath().equals(f.getCanonicalPath()); + } + +} Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java Wed May 13 13:49:36 2015 @@ -1,34 +1,34 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.example; - -import java.io.File; -import org.apache.tika.Tika; - -public class SimpleTextExtractor { - - public static void main(String[] args) throws Exception { - // Create a Tika instance with the default configuration - Tika tika = new Tika(); - - // Parse all given files and print out the extracted - // text content - for (String file : args) { - String text = tika.parseToString(new File(file)); - System.out.print(text); - } - } - -} +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.example; + +import java.io.File; +import org.apache.tika.Tika; + +public class SimpleTextExtractor { + + public static void main(String[] args) throws Exception { + // Create a Tika instance with the default configuration + Tika tika = new Tika(); + + // Parse all given files and print out the extracted + // text content + for (String file : args) { + String text = tika.parseToString(new File(file)); + System.out.print(text); + } + } + +}