import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryParser.QueryParser;

import org.xml.sax.*;
import org.xml.sax.helpers.*;
import org.xml.sax.Attributes;
import javax.xml.parsers.*;

import java.sql.*;
import java.io.*;
import java.util.*;

public class SDXHealthChecker {

public static final String PROTOCOL_PREFIX = "jdbc:hsqldb:";
public static final String CLASS_NAME_SUFFIX = "HSQLDB";
public static final String HSQLDB_DRIVER = "org.hsqldb.jdbcDriver";

private boolean repair = false;
private String sdxconfdir = "";
private String documentbase = "";
private String cocoonconfpath = "";

private SdxConfReader SDX = null;
private Connection repositoryconnection = null;
private String repositorypath = null;
private Connection hSQLconnection = null;
private PreparedStatement repocheck = null;
private PreparedStatement repoloop = null;
private PreparedStatement repodelete = null;
private PreparedStatement repoget = null;

private PreparedStatement queryfspath = null;

private String sdx_name = null;

private String repository_tablename;

private String base = null;
private String repo = null;
private String hsql_tablename = null;

private CocoonConfReader CoCoon = null;
private Vector bases = null;
private Vector docstoadd = null;

public SDXHealthChecker(String sdxconfdir, String documentbase, String cocoonconfpath, boolean repair)

{
try {
    this.sdxconfdir = sdxconfdir;
    hSQLconnection = getHSQLConnection(sdxconfdir);

    DatabaseMetaData dbmd = hSQLconnection.getMetaData();
    ResultSet rsTables = dbmd.getTables(null, null, "%_%", null);
    System.out.println("HSQL Database contains tables... ");
    while (rsTables.next()) {
	System.out.println(" - " + rsTables.getString("TABLE_NAME"));
        }
    System.out.println("reading sdx configuration");
    SDX = new SdxConfReader(sdxconfdir + "/application.xconf");
    System.out.println("reading cocoon configuration");
    if (cocoonconfpath == null) cocoonconfpath = sdxconfdir + "/../../WEB-INF/cocoon.xconf"; 
    CoCoon = new CocoonConfReader(cocoonconfpath);

    sdx_name = SDX.getName();
    // loop over the different documentbases to check ...
    bases = SDX.getBases();

    if (documentbase != null) {
	if (bases.indexOf(documentbase) == -1) {
		System.out.println("Document base " + documentbase + " is not part of this SDX application.");
		System.exit(-1);
		}
	bases = new Vector();
	bases.addElement(documentbase);
	}
    if (repair) docstoadd = new Vector();
    }
catch (Exception e) {
    e.printStackTrace();
    }
this.repair = repair;
}


public void trackLuceneIndex()
{
try {
    for (int b = 0; b < bases.size(); b++) {
	base = (String) bases.elementAt(b);
	System.out.println("investigation of base " + base + " starts... ");
	openRepositoryConnection(base);

	hsql_tablename = base.toUpperCase();
	PreparedStatement pscount = hSQLconnection.prepareStatement("select count(*) from " + hsql_tablename + " where ID = ?");

	System.out.println("configuration in dir " + sdxconfdir + "/dbs/" + base + "/sdx-search-index");
	IndexReader ir = IndexReader.open(sdxconfdir + "/dbs/" + base + "/sdx-search-index");
	System.out.println("Index " + base + " has " + ir.numDocs() + " documents.");

	String docid = "";
	for (int i=0; i < ir.numDocs(); i++) {
	    try {
		    Document doc = ir.document(i);

		    String sdxdocid = doc.get("sdxdocid");

		    if (sdxdocid == null) {
			System.out.println("NIET NORMAAL ! sdxdocid zou null zijn");
			dumpLuceneDoc(doc);
			} 
		    else {
			Term t = new Term("sdxdocid", sdxdocid);
			int frequency = ir.docFreq(t);
			if (frequency > 1) {
			    System.out.println("Multiple lucene docs found for document " + sdxdocid + ", " + frequency + " duplicates... ");
			    dumpLuceneDoc(doc);
			    if (repair) {
				docstoadd.addElement(doc);
				// ir.deleteDocuments(t);   Nieuwere interface, wij zitten nog met SDX 2.2 en Lucene van 2004
				ir.delete(t);	
				}
			    }
			// checking HSQL health 
			pscount.setString(1, sdxdocid); 
			ResultSet rs = pscount.executeQuery();
			rs.next();
			int cnt = rs.getInt(1);
			if (cnt < 4) {      // not normal
			    System.out.println("THIS IS NOT NORMAL, DOCUMENT " + sdxdocid + " has only " + cnt + " descriptor records in HSQL. ");
			    }
			}
			repocheck.setString(1,sdxdocid);
			ResultSet rsrepo = repocheck.executeQuery();
			rsrepo.next();
			if (rsrepo.getInt(1) == 0) {
				System.out.println("Document " + sdxdocid + " not present in repository ");
				}
			}
	 	catch (Exception e) {
			System.out.println("Document " + i + ": " + e.getMessage());
			}
      	    }  // for all documents in index
		
	} // for all documentbases requested
    }
catch (Exception e) {
    e.printStackTrace();
    }
}

public void trackHSQL() 
{
try {
    for (int b = 0; b < bases.size(); b++) {
	String base = (String) bases.elementAt(b);
	openRepositoryConnection(base);
	IndexSearcher is = new IndexSearcher(sdxconfdir + "/dbs/" + base + "/sdx-search-index");
	Analyzer analyzer = new StandardAnalyzer();
	QueryParser parser = new QueryParser("sdxdocument", analyzer); 

	hsql_tablename = base.toUpperCase();
	PreparedStatement ps = hSQLconnection.prepareStatement("select distinct(ID) from " + hsql_tablename);
	ResultSet rs = ps.executeQuery();
	while (rs.next()) {
		String sdxdocid = rs.getString(1);
		Query query = parser.parse("+sdxdocid:" + sdxdocid);
		Hits hits = is.search(query);
		if (hits.length() != 1) {
			System.out.println("for " + sdxdocid + " there are " + hits.length() + " documents in the Lucene index.");
			if (checkDocumentinRepository(sdxdocid)) System.out.println("document present in repository");
			else System.out.println("document not present in repository");
			}
		}

	is.close();
	} // for all documentbases requested
    }
catch (Exception e) {
    e.printStackTrace();
    }
}

public void traceOrphanisedRepositoryDocs()
{
try {
   FileOutputStream fostxt= new FileOutputStream("toDelete.txt");
   Writer txtdeletescript = new OutputStreamWriter(fostxt, "UTF8");
   FileOutputStream fossql = null;
   Writer deletesql = null; 

   if (!repair) {
	fossql  = new FileOutputStream("delete.sql");
   	deletesql = new OutputStreamWriter(fossql, "UTF8");
	}

    for (int b = 0; b < bases.size(); b++) {
	String base = (String) bases.elementAt(b);
	hsql_tablename = base.toUpperCase();
	PreparedStatement pscount = hSQLconnection.prepareStatement("select count(*) from " + hsql_tablename + " where ID = ?");
	PreparedStatement hsqldel   = hSQLconnection.prepareStatement("delete from " + hsql_tablename + " where ID = ?");

	openRepositoryConnection(base);
	IndexSearcher is = new IndexSearcher(sdxconfdir + "/dbs/" + base + "/sdx-search-index");
	Analyzer analyzer = new StandardAnalyzer();
	QueryParser parser = new QueryParser("sdxdocument", analyzer); 
	
	ResultSet rsRepo = repoloop.executeQuery();
	while (rsRepo.next()) {
		String sdxdocid = rsRepo.getString(1);
		Query query = parser.parse("+sdxdocid:" + sdxdocid);
		Hits hits = is.search(query);
		if (hits.length() != 1) {
			System.out.println("for " + sdxdocid + " there are " + hits.length() + " documents in the Lucene index.");
			pscount.setString(1,sdxdocid);
			ResultSet rsc = pscount.executeQuery();
			int hsqlcnt = 0;
			if (rsc.next()) hsqlcnt = rsc.getInt(1);
			if (hits.length() == 0) {
				checkout(base + "_orphans", sdxdocid);
				if (repair) {
					repodelete.setString(1,sdxdocid);
					repodelete.executeUpdate();
					}
				else    {
					deletesql.write("delete from " + repository_tablename + " where id = '" + sdxdocid + "';\n");
					}
				if (hsqlcnt > 0) {      
				    if (repair) {
					hsqldel.setString(1,sdxdocid);
					hsqldel.executeUpdate();
					}
				    }
				System.out.println("document not present in HSQL (orphan)");
				}
			else    {    // dubbel ge-indexeerde objekten
				checkout(base + "_orphans", sdxdocid);
				txtdeletescript.write(sdxdocid + "\n");
				}
			}
		}

	is.close();
	txtdeletescript.close();
	deletesql.close();
	} // for all documentbases requested
    }
catch (Exception e) {
    e.printStackTrace();
    }
}


private void openRepositoryConnection(String docbase)
{ 
try {
	repo = SDX.getRepository(docbase);    
	System.out.println("repository for " + docbase + " is " + repo);
	if (SDX.isJdbc(repo)) {
		String dsi = SDX.getDsi(repo);
		System.out.println("DSI is " + dsi);
		repositoryconnection = CoCoon.getConnection(dsi);
		if (repositoryconnection == null) {
			System.out.println("repository connection is null");
			System.exit(0);
			}
		repositorypath = null;
		repository_tablename = sdx_name + "_" + repo;
		repocheck = repositoryconnection.prepareStatement("select count(*) from " + repository_tablename + " where id = ?");
		repoloop = repositoryconnection.prepareStatement("select id from " + repository_tablename);
		repodelete = repositoryconnection.prepareStatement("delete from " + repository_tablename + " where id = ?");
		repoget = repositoryconnection.prepareStatement("select * from " + repository_tablename + " where id = ?");
		}
	else {
		repositoryconnection = null;
		repositorypath = SDX.getDsi(repo);
	     }
    }
catch (Exception e) {
    e.printStackTrace();
    }
}

static void dumpLuceneDoc(Document doc) 
{
try {

      System.out.println("============== DOCUMENT DUMP ==============");
      Enumeration E = doc.fields();
      while (E.hasMoreElements()) {
        Field ff = (Field) E.nextElement();
	System.out.println("Field " + ff.name() + " = " + ff.stringValue());
	} 
      System.out.println("==============          ==============");
    }
catch (Exception e) {
    e.printStackTrace();
    }
}


static Connection getHSQLConnection(String dbDirPath)

{
try {

    Class.forName(HSQLDB_DRIVER);
    Connection conn = DriverManager.getConnection(PROTOCOL_PREFIX + dbDirPath + "/databases/_hsql/", // filenames
            "sa", // username
            "");                     // password
    return conn;
    }
catch (Exception e) {
    e.printStackTrace();
    }
return null;
}

boolean checkDocumentinRepository(String docid) throws SQLException
{
if (repositoryconnection != null) {    
    repocheck.setString(1,docid); 
    ResultSet rs = repocheck.executeQuery();
    rs.next();
    int count = rs.getInt(1);
    if (count == 0) return false;
    return true;
    }
else {
    // inquire docid from hsql
    queryfspath.setString(1,docid);
    ResultSet rs = queryfspath.executeQuery();
    if (!rs.next()) return false; 
    String localpath = rs.getString(1);
    File f = new File(localpath);
    if (f.exists()) return true;
    }
return false;
}

private void checkout(String dir, String docid) 
{
try {
	repoget.setString(1,docid);
        ResultSet rs = repoget.executeQuery();
        if (rs.next()) {
		    Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dir + "/" + docid + ".xml"),"UTF8"));
		    String str = new String(rs.getBytes("data"),"UTF-8");
		    byte []byte_array=str.getBytes("UTF-8");

		    try{
			Reader in=new BufferedReader(new InputStreamReader(new ByteArrayInputStream(byte_array),"UTF-8"));
			char[] buf=new char[1024];
			int len;
			while((len=in.read(buf))!=-1){
				out.write(buf,0,len);
				}

			in.close();
			}
		    catch(Exception e){
			System.out.println(e.getMessage());
			}
		    out.close();
		}
    }
catch (Exception e) {
    e.printStackTrace();
    }
}

}
