Sorry, the source code follows: public class InitialDBCreator { private static final SimpleDateFormat DATE_PARSER = new SimpleDateFormat("dd/MM/yyyy"); private static final SimpleDateFormat DATE_FORMATTER = new SimpleDateFormat("yyyyMMdd");
private static final int GRP_DEST_DOC = 1; private static final int GRP_DEST_NAME = 2; private static final int GRP_SRC_DOC = 3; private static final int GRP_SRC_NAME = 5; private static final int GRP_QUAL = 6; private static final int GRP_ENTRY_DATE = 7; private static final int GRP_PART_INT = 8; private static final int GRP_PART_DEC = 9; private static final Pattern PTRN_LINE = Pattern.compile("(\\d{11,14})\\t([^\\t]+)\\t(\\d{11,14})\\t"+ "([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)\\t(\\d{2}/\\d{2}/"+ "\\d{4})\\t(\\d{1,3}),(\\d{2})%\\t(\\d{2}/\\d{2}/\\d{4})"); private final BatchInserter inserter; private final GraphDatabaseService dbService; private final BatchInserterIndexProvider indexProvider; private final BatchInserterIndex index; public InitialDBCreator(final String storeDir, final Map<String, String> config, final String indexName) { System.out.println("Iniciando inserter..."); inserter = new BatchInserterImpl(storeDir, config); dbService = inserter.getGraphDbService(); System.out.println("Iniciando indexProvider..."); indexProvider = new LuceneBatchInserterIndexProvider(inserter); System.out.println("Iniciando index..."); index = indexProvider.nodeIndex(indexName, MapUtil.stringMap("type", "exact")); System.out.println("DB iniciado!"); Runtime.getRuntime().addShutdownHook( new Thread() { @Override public void run() { indexProvider.shutdown(); inserter.shutdown(); } }); } public void shutdown() { index.flush(); indexProvider.shutdown(); inserter.shutdown(); } private File prepareNodesFile(final File initialFile) { File nodesFile = null; int count; int countErr; try { System.out.println("Extracting nodes..."); File tmpFile = File.createTempFile("qsa-tempnodes", ".txt"); BufferedWriter writer = new BufferedWriter(new FileWriter(tmpFile)); InputStream in = FUtils.getInputStream(initialFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = null; count = 0; countErr = 0; while ((line = reader.readLine()) != null) { Matcher matcher = PTRN_LINE.matcher(line); if (matcher.matches()) { String docOne = matcher.group(GRP_SRC_DOC); String nameOne = matcher.group(GRP_SRC_NAME); if (!docOne.equals("") && !nameOne.equals("")) { writer.write(docOne+"|"+nameOne+"\n"); } String docTwo = matcher.group(GRP_DEST_DOC); String nameTwo = matcher.group(GRP_DEST_NAME); if (!docTwo.equals("") && !nameTwo.equals("")) { writer.write(docTwo+"|"+nameTwo+"\n"); } count++; } else { System.err.println("ERRO: the line '"+line+"' doesn't match the pattern."); System.err.println("---"); countErr++; } if (((count > 0) && (count % 5000 == 0)) || ((countErr > 0) && (countErr % 500 == 0))) { System.out.print("\r"+count+" rows processed, "+countErr+" erroneous lines."); } } System.out.println("\r"+count+" rows processed, "+countErr+" erroneous lines."); in.close(); reader.close(); writer.close(); File sortedFile = FUtils.sortFile(tmpFile); System.out.println("Unifying nodes..."); nodesFile = File.createTempFile("qsa-nodes", ".txt"); writer = new BufferedWriter(new FileWriter(nodesFile)); in = FUtils.getInputStream(sortedFile); reader = new BufferedReader(new InputStreamReader(in)); line = null; count = 0; String lastDoc = "-1"; String lastLine = ""; while ((line = reader.readLine()) != null) { String doc = line.substring(0, line.indexOf("|")); if (!doc.equals(lastDoc) && !lastDoc.equals("-1")) { writer.write(lastLine+"\n"); } lastDoc = doc; lastLine = line; count++; if ((count > 0) && (count % 5000 == 0)) { System.out.print("\r"+count+" rows processed."); } } writer.write(lastLine+"\n"); System.out.println("\r"+count+" rows processed."); in.close(); reader.close(); writer.close(); } catch (IOException e) { e.printStackTrace(); } return nodesFile; } private void addPerson(final String doc, final String name) { PersonType tipo = (doc.length() <= 11) ? PersonType.INDIVIDUAL : PersonType.LEGAL; Map<String, Object> pessoaProperties = new HashMap<String, Object>(); pessoaProperties.put(Person.KEY_DOC , doc); pessoaProperties.put(Person.KEY_NAME, name); pessoaProperties.put(Person.KEY_TYPE, tipo.toString()); Map<String, Object> indexInfo = new HashMap<String, Object>(); indexInfo.put(Person.KEY_DOC, doc); index.add(inserter.createNode(pessoaProperties), indexInfo); tipo = null; pessoaProperties = null; indexInfo = null; } private void addSociety(final String srcDoc, final String destDoc, final long entryDate, final String qualification, final double participation) { Person source = null; Person destination = null; try { IndexHits<Long> hits = index.get(Person.KEY_DOC, srcDoc); source = new Person(dbService.getNodeById(hits.getSingle())); hits = index.get(Person.KEY_DOC, destDoc); destination = new Person(dbService.getNodeById(hits.getSingle())); CorporateRelationship sociedade = source.getSociety(destination); if (sociedade == null) { sociedade = source.addSociety(destination, qualification, participation, entryDate); } else { sociedade.setQualification(qualification); sociedade.setParticipation(participation); sociedade.setEntryDate(entryDate); } } catch (Exception e) { System.err.println("Error creating society between '"+srcDoc+"' and '"+destDoc+"'."); System.err.println("Source : "+source); System.err.println("Destination: "+destination); System.err.println(e.getMessage()); System.err.println("---"); } } public void createNodes(final File nodesFile) { System.out.println("Creating nodes..."); int count = 0; InputStream in = FUtils.getInputStream(nodesFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = null; try { while ((line = reader.readLine()) != null) { int i = line.indexOf("|"); if (i != -1) { String doc = line.substring(0, i); String name = line.substring(i+1); addPerson(doc, name); doc = null; name = null; count++; } else { System.err.println("ERROR: invalid line '"+line+"'"); } if (count % 5000 == 0) { System.out.print("\r"+count+" added nodes."); } } System.out.println("\r"+count+" added nodes."); } catch (IOException e) { e.printStackTrace(); } } public void createRelationships(final File relationshipsFile) { System.out.println("Creating edges..."); int count = 0; int countErr = 0; InputStream in = FUtils.getInputStream(relationshipsFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = null; try { while ((line = reader.readLine()) != null) {Matcher matcher = PTRN_LINE.matcher(line); if (matcher.matches()) { String srcDoc = matcher.group(GRP_SRC_DOC); String destDoc = matcher.group(GRP_DEST_DOC); long entryDate = Long.parseLong(DATE_FORMATTER.format(DATE_PARSER.parse(matcher.group(GRP_ENTRY_DATE)))); String qualification = matcher.group(GRP_QUAL); double participation = Double.parseDouble(matcher.group(GRP_PART_INT)) / 100 + Double.parseDouble(matcher.group(GRP_PART_DEC)) / 10000; if (!srcDoc.equals(destDoc)) { addSociety(srcDoc, destDoc, entryDate, qualification, participation); count++; } else { System.err.println("ERROR: invalid society."); System.err.println("linha: '"+line+"'"); System.err.println("---"); countErr++; } } else { System.err.println("ERROR: the line '"+line+"' doesn't match the pattern."); System.err.println("---"); countErr++; } if (((count > 0) && (count % 5000 == 0)) || ((countErr > 0) && (countErr % 500 == 0))) { System.out.print("\r"+count+" edges added, "+countErr+" invalid societies."); } } System.out.println("\r"+count+" edges added, "+countErr+" invalid societies."); } catch (IOException e) { e.printStackTrace(); } catch (NumberFormatException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } } public void updateDB(final File file) { InputStream in = FUtils.getInputStream(file); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = null; try { while ((line = reader.readLine()) != null) { Matcher matcher = PTRN_LINE.matcher(line); if (matcher.matches()) { String srcDoc = matcher.group(GRP_SRC_DOC); String srcName = matcher.group(GRP_SRC_NAME); IndexHits<Long> srcNode = index.get(Person.KEY_DOC, srcDoc); String destDoc = matcher.group(GRP_DEST_DOC); String destName = matcher.group(GRP_DEST_NAME); long entryDate = Long.parseLong(DATE_FORMATTER.format(DATE_PARSER.parse(matcher.group(GRP_ENTRY_DATE)))); String qualification = matcher.group(GRP_QUAL); double participation = Double.parseDouble(matcher.group(GRP_PART_INT)) / 100 + Double.parseDouble(matcher.group(GRP_PART_DEC)) / 10000; } else { System.err.println("ERRO: the line '"+line+"' doesn't match the pattern."); System.err.println("---"); } } } catch (IOException e) { e.printStackTrace(); } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void createDB(final File initialFile) { File nodesFile = prepareNodesFile(initialFile); createNodes(nodesFile); index.flush(); createRelationships(initialFile); } } _______________________________________________ Neo4j mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user