TextHandler extracting content when running code as Java App but not as Web App

Khare, Kushal (MIND) Fri, 01 Nov 2019 00:12:21 -0700

Hello Mates, hope you are all doing good !

I am trying to integrate my Solr & Tika search utility code with my java web 
application.
The issue that I am facing is that I am unable to get the results (text handler 
is unable to extract any data) when I hit through my we app search page, that 
is , when I run my application on server. Whereas, when I independently run my 
utility method using main() function, I get the results.
Please help me with this, as I am unable to figure out this. I have checked all 
the jars, libs, etc. Everything is fine.
Following is the code that I am using :


package com.mind.qdms.utility;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;


public class QdmsSolrUtilityMethods {



         public static void main(String[] args) throws IOException,
         SolrServerException { QdmsSolrUtilityMethods.getDocsList("kushal"); }


       public static List<String> getDocsList(String keyword) throws 
IOException, SolrServerException {
              System.out.println("in util");
              HttpSolrClient client = new 
HttpSolrClient.Builder("http://localhost:8983/solr/tika";).build();
              AutoDetectParser autoParser = new AutoDetectParser();
              indexTikaDocuments(new File("D:\\docs"), client, autoParser);
              List<String> resultDocList = queryDocuments(client, keyword);
              return resultDocList;
       }

       public static void indexTikaDocuments(File root,HttpSolrClient
         client,AutoDetectParser autoParser) throws IOException, 
SolrServerException {
         int totalTika = 0;

         @SuppressWarnings("rawtypes") Collection docList = new ArrayList();

         for (File file : root.listFiles()){
                if (file.isDirectory()) {
                       indexTikaDocuments(file,client,autoParser);
                       continue;
                }
                ContentHandler textHandler = new BodyContentHandler(-1);
                Metadata metadata = new Metadata();
                ParseContext context = new ParseContext();
                InputStream input = new
                FileInputStream(file);
                try {
                autoParser.parse(input, textHandler, metadata, context);
                }catch (Exception e) {
                System.out.println(String.format("File %s failed", 
file.getCanonicalPath()));
                e.printStackTrace();
                continue;
                }
                SolrInputDocument doc = new SolrInputDocument();
                doc.addField("id", file.getCanonicalPath());
                doc.addField("_text_", textHandler.toString());
                docList.add(doc);
                System.out.println(textHandler.toString());
                System.out.println( file.getCanonicalPath()); ++totalTika;
                // Completely arbitrary, just batch up more than one document 
for throughput!
                if(docList.size() >= 1000) {
                       // Commit within 5 minutes.
                       UpdateResponse resp = client.add(docList, 300000);
                       if (resp.getStatus() != 0) {
                             System.out.println("Some horrible error has 
occurred, status is: " +
                             resp.getStatus());
                             }
                       docList.clear();
                       }
                }if(docList.size() > 0) {
                       client.add(docList, 300000);
                     } client.commit();
                System.out.println("indexed " + totalTika + " documents");
                }

       public static List<String> queryDocuments(HttpSolrClient client1, String 
queryTerm) throws SolrServerException, IOException {
              List<String> resultList = null;
              HttpSolrClient client = new 
HttpSolrClient.Builder("http://localhost:8983/solr";).build();

              final Map<String, String> queryParamMap = new HashMap<String, 
String>();
              queryParamMap.put("q", queryTerm);
              queryParamMap.put("rows", "5000");
              MapSolrParams queryParams = new MapSolrParams(queryParamMap);

              final QueryResponse response = client.query("tika", queryParams);
              final SolrDocumentList docList = response.getResults();

              System.out.println("docList ::  "+docList);
              System.out.println("docList size ::  "+docList.size());

              for (SolrDocument document : docList) {
                     final String id = (String) document.getFirstValue("id");
                     resultList.add(id);
              }
              return resultList;
       }


}


Kindly help me get through this.
Thanks !

________________________________

The information contained in this electronic message and any attachments to 
this message are intended for the exclusive use of the addressee(s) and may 
contain proprietary, confidential or privileged information. If you are not the 
intended recipient, you should not disseminate, distribute or copy this e-mail. 
Please notify the sender immediately and destroy all copies of this message and 
any attachments. WARNING: Computer viruses can be transmitted via email. The 
recipient should check this email and any attachments for the presence of 
viruses. The company accepts no liability for any damage caused by any 
virus/trojan/worms/malicious code transmitted by this email. www.motherson.com

TextHandler extracting content when running code as Java App but not as Web App

Reply via email to