Sorry, the source code follows:

public class InitialDBCreator {
private static final SimpleDateFormat DATE_PARSER = new
SimpleDateFormat("dd/MM/yyyy");
    private static final SimpleDateFormat DATE_FORMATTER = new
SimpleDateFormat("yyyyMMdd");

    private static final int GRP_DEST_DOC   = 1;
    private static final int GRP_DEST_NAME  = 2;
    private static final int GRP_SRC_DOC    = 3;
    private static final int GRP_SRC_NAME   = 5;
    private static final int GRP_QUAL       = 6;
    private static final int GRP_ENTRY_DATE = 7;
    private static final int GRP_PART_INT   = 8;
    private static final int GRP_PART_DEC   = 9;
    private static final Pattern PTRN_LINE =
Pattern.compile("(\\d{11,14})\\t([^\\t]+)\\t(\\d{11,14})\\t"+

"([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)\\t(\\d{2}/\\d{2}/"+

"\\d{4})\\t(\\d{1,3}),(\\d{2})%\\t(\\d{2}/\\d{2}/\\d{4})");

    private final BatchInserter inserter;
    private final GraphDatabaseService dbService;
    private final BatchInserterIndexProvider indexProvider;
    private final BatchInserterIndex index;

    public InitialDBCreator(final String storeDir, final Map<String, String>
config, final String indexName) {
    System.out.println("Iniciando inserter...");
    inserter = new BatchInserterImpl(storeDir, config);
    dbService = inserter.getGraphDbService();
    System.out.println("Iniciando indexProvider...");
    indexProvider = new LuceneBatchInserterIndexProvider(inserter);
    System.out.println("Iniciando index...");
    index = indexProvider.nodeIndex(indexName, MapUtil.stringMap("type",
"exact"));
    System.out.println("DB iniciado!");
    Runtime.getRuntime().addShutdownHook(
    new Thread() {
    @Override
    public void run() {
    indexProvider.shutdown();
    inserter.shutdown();
    }
    });
    }

    public void shutdown() {
    index.flush();
    indexProvider.shutdown();
    inserter.shutdown();
    }

    private File prepareNodesFile(final File initialFile) {
    File nodesFile = null;
    int count;
    int countErr;

    try {
    System.out.println("Extracting nodes...");
 File tmpFile = File.createTempFile("qsa-tempnodes", ".txt");
BufferedWriter writer = new BufferedWriter(new FileWriter(tmpFile));
 InputStream in = FUtils.getInputStream(initialFile);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
 String line = null;
count = 0;
countErr = 0;
 while ((line = reader.readLine()) != null) {
Matcher matcher = PTRN_LINE.matcher(line);
 if (matcher.matches()) {
String docOne = matcher.group(GRP_SRC_DOC);
 String nameOne = matcher.group(GRP_SRC_NAME);
if (!docOne.equals("") && !nameOne.equals("")) {
 writer.write(docOne+"|"+nameOne+"\n");
}

 String docTwo = matcher.group(GRP_DEST_DOC);
String nameTwo = matcher.group(GRP_DEST_NAME);
 if (!docTwo.equals("") && !nameTwo.equals("")) {
writer.write(docTwo+"|"+nameTwo+"\n");
 }
count++;
} else {
 System.err.println("ERRO: the line '"+line+"' doesn't match the pattern.");
System.err.println("---");
 countErr++;
}

if (((count > 0) && (count % 5000 == 0)) || ((countErr > 0) && (countErr %
500 == 0))) {
 System.out.print("\r"+count+" rows processed, "+countErr+" erroneous
lines.");
}
 }
System.out.println("\r"+count+" rows processed, "+countErr+" erroneous
lines.");
 in.close();
reader.close();
writer.close();

File sortedFile = FUtils.sortFile(tmpFile);

System.out.println("Unifying nodes...");
 nodesFile = File.createTempFile("qsa-nodes", ".txt");
writer = new BufferedWriter(new FileWriter(nodesFile));
 in = FUtils.getInputStream(sortedFile);
reader = new BufferedReader(new InputStreamReader(in));
 line = null;
count = 0;
String lastDoc  = "-1";
 String lastLine = "";
while ((line = reader.readLine()) != null) {
 String doc = line.substring(0, line.indexOf("|"));
if (!doc.equals(lastDoc) && !lastDoc.equals("-1")) {
 writer.write(lastLine+"\n");
}
lastDoc = doc;
 lastLine = line;
count++;
if ((count > 0) && (count % 5000 == 0)) {
 System.out.print("\r"+count+" rows processed.");
}
 }
writer.write(lastLine+"\n");
System.out.println("\r"+count+" rows processed.");
 in.close();
reader.close();
writer.close();
 } catch (IOException e) {
e.printStackTrace();
}

return nodesFile;
    }

    private void addPerson(final String doc, final String name) {
PersonType tipo = (doc.length() <= 11) ? PersonType.INDIVIDUAL :
PersonType.LEGAL;

Map<String, Object> pessoaProperties = new HashMap<String, Object>();
pessoaProperties.put(Person.KEY_DOC , doc);
 pessoaProperties.put(Person.KEY_NAME, name);
pessoaProperties.put(Person.KEY_TYPE, tipo.toString());

Map<String, Object> indexInfo = new HashMap<String, Object>();
indexInfo.put(Person.KEY_DOC, doc);

index.add(inserter.createNode(pessoaProperties), indexInfo);
tipo = null;
 pessoaProperties = null;
indexInfo = null;
    }

    private void addSociety(final String srcDoc, final String destDoc, final
long entryDate,
      final String qualification, final double participation) {
    Person source = null;
    Person destination = null;
    try {
        IndexHits<Long> hits = index.get(Person.KEY_DOC, srcDoc);
        source = new Person(dbService.getNodeById(hits.getSingle()));
        hits = index.get(Person.KEY_DOC, destDoc);
        destination = new Person(dbService.getNodeById(hits.getSingle()));

        CorporateRelationship sociedade = source.getSociety(destination);
        if (sociedade == null) {
            sociedade = source.addSociety(destination, qualification,
participation, entryDate);
        } else {
            sociedade.setQualification(qualification);
            sociedade.setParticipation(participation);
            sociedade.setEntryDate(entryDate);
        }
    } catch (Exception e) {
    System.err.println("Error creating society between '"+srcDoc+"' and
'"+destDoc+"'.");
    System.err.println("Source     : "+source);
    System.err.println("Destination: "+destination);
    System.err.println(e.getMessage());
    System.err.println("---");
}
    }

    public void createNodes(final File nodesFile) {
    System.out.println("Creating nodes...");
    int count = 0;
    InputStream in = FUtils.getInputStream(nodesFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String line = null;
    try {
while ((line = reader.readLine()) != null) {
 int i = line.indexOf("|");
if (i != -1) {
String doc  = line.substring(0, i);
 String name = line.substring(i+1);
addPerson(doc, name);
doc = null;
 name = null;
count++;
} else {
 System.err.println("ERROR: invalid line '"+line+"'");
}

if (count % 5000 == 0) {
System.out.print("\r"+count+" added nodes.");
 }
}
System.out.println("\r"+count+" added nodes.");
 } catch (IOException e) {
e.printStackTrace();
}
    }

    public void createRelationships(final File relationshipsFile) {
    System.out.println("Creating edges...");
    int count = 0;
    int countErr = 0;
    InputStream in = FUtils.getInputStream(relationshipsFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String line = null;
    try {
while ((line = reader.readLine()) != null) {Matcher matcher =
PTRN_LINE.matcher(line);
 if (matcher.matches()) {
String srcDoc = matcher.group(GRP_SRC_DOC);
 String destDoc = matcher.group(GRP_DEST_DOC);
                long entryDate =
Long.parseLong(DATE_FORMATTER.format(DATE_PARSER.parse(matcher.group(GRP_ENTRY_DATE))));
                String qualification = matcher.group(GRP_QUAL);
                double participation =
Double.parseDouble(matcher.group(GRP_PART_INT)) / 100 +

Double.parseDouble(matcher.group(GRP_PART_DEC)) / 10000;
                if (!srcDoc.equals(destDoc)) {
                 addSociety(srcDoc, destDoc, entryDate, qualification,
participation);
                count++;
                } else {
                 System.err.println("ERROR: invalid society.");
                 System.err.println("linha: '"+line+"'");
 System.err.println("---");
countErr++;
                }
 } else {
System.err.println("ERROR: the line '"+line+"' doesn't match the pattern.");
 System.err.println("---");
countErr++;
}

if (((count > 0) && (count % 5000 == 0)) || ((countErr > 0) && (countErr %
500 == 0))) {
System.out.print("\r"+count+" edges added, "+countErr+" invalid
societies.");
 }
}
System.out.println("\r"+count+" edges added, "+countErr+" invalid
societies.");
 } catch (IOException e) {
e.printStackTrace();
} catch (NumberFormatException e) {
 e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
 }
    }

    public void updateDB(final File file) {
    InputStream in = FUtils.getInputStream(file);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String line = null;
    try {
while ((line = reader.readLine()) != null) {
Matcher matcher = PTRN_LINE.matcher(line);
 if (matcher.matches()) {
String srcDoc  = matcher.group(GRP_SRC_DOC);
 String srcName = matcher.group(GRP_SRC_NAME);
IndexHits<Long> srcNode = index.get(Person.KEY_DOC, srcDoc);

String destDoc  = matcher.group(GRP_DEST_DOC);
String destName = matcher.group(GRP_DEST_NAME);

long entryDate =
Long.parseLong(DATE_FORMATTER.format(DATE_PARSER.parse(matcher.group(GRP_ENTRY_DATE))));
                String qualification = matcher.group(GRP_QUAL);
                double participation =
Double.parseDouble(matcher.group(GRP_PART_INT)) / 100 +

Double.parseDouble(matcher.group(GRP_PART_DEC)) / 10000;
 } else {
System.err.println("ERRO: the line '"+line+"' doesn't match the pattern.");
 System.err.println("---");
}
}
 } catch (IOException e) {
e.printStackTrace();
} catch (NumberFormatException e) {
 // TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
 // TODO Auto-generated catch block
e.printStackTrace();
}
    }

    public void createDB(final File initialFile) {
    File nodesFile = prepareNodesFile(initialFile);
    createNodes(nodesFile);
    index.flush();
    createRelationships(initialFile);
    }
}
_______________________________________________
Neo4j mailing list
User@lists.neo4j.org
https://lists.neo4j.org/mailman/listinfo/user

Reply via email to