Revision: 16561 http://sourceforge.net/p/gate/code/16561 Author: valyt Date: 2013-03-05 16:24:20 +0000 (Tue, 05 Mar 2013) Log Message: ----------- Partially rolled back previous change: include the document ID in the document content. The document ID could be useful for e.g. searching for a particular document in a large collection.
Modified Paths: -------------- gate/trunk/plugins/Format_PubMed/src/gate/corpora/CochraneTextDocumentFormat.java gate/trunk/plugins/Format_PubMed/src/gate/corpora/PubmedTextDocumentFormat.java Modified: gate/trunk/plugins/Format_PubMed/src/gate/corpora/CochraneTextDocumentFormat.java =================================================================== --- gate/trunk/plugins/Format_PubMed/src/gate/corpora/CochraneTextDocumentFormat.java 2013-03-05 15:39:04 UTC (rev 16560) +++ gate/trunk/plugins/Format_PubMed/src/gate/corpora/CochraneTextDocumentFormat.java 2013-03-05 16:24:20 UTC (rev 16561) @@ -58,6 +58,8 @@ private static final String COCHRANE_AUTHORS = "AU"; + private static final String COCHRANE_ID = "ID"; + protected static final Logger logger = Logger.getLogger( CochraneTextDocumentFormat.class); @@ -141,6 +143,19 @@ logger.warn("Could not find document title in document " + (docName != null ? docName : "")); } + // add ID + int idStart = docText.length(); + int idEnd = idStart; + aField = fields.get(COCHRANE_ID); + if(aField != null) { + docText.append(PubmedUtils.getFieldValueString(aField)); + idEnd = docText.length(); + docText.append(Strings.getNl()).append(Strings.getNl()); + } else { + String docName = doc.getName(); + logger.warn("Could not find document ID in document " + + (docName != null ? docName : "")); + } // add authors int authorStart = docText.length(); int authorEnd = authorStart; @@ -173,6 +188,10 @@ origMkups.add((long)titleStart, (long)titleEnd, "title", Factory.newFeatureMap()); } + if(idEnd > idStart){ + origMkups.add((long)idStart, (long)idEnd, "id", + Factory.newFeatureMap()); + } if(authorEnd > authorStart) { origMkups.add((long)authorStart, (long)authorEnd, "authors", Factory.newFeatureMap()); Modified: gate/trunk/plugins/Format_PubMed/src/gate/corpora/PubmedTextDocumentFormat.java =================================================================== --- gate/trunk/plugins/Format_PubMed/src/gate/corpora/PubmedTextDocumentFormat.java 2013-03-05 15:39:04 UTC (rev 16560) +++ gate/trunk/plugins/Format_PubMed/src/gate/corpora/PubmedTextDocumentFormat.java 2013-03-05 16:24:20 UTC (rev 16561) @@ -60,6 +60,8 @@ public static final String PUBMED_AUTHORS = "AU"; + public static final String PUBMED_ID = "PMID"; + protected static final Logger logger = Logger.getLogger( PubmedTextDocumentFormat.class); @@ -144,6 +146,19 @@ logger.warn("Could not find document title in document " + (docName != null ? docName : "")); } + // add ID + int idStart = docText.length(); + int idEnd = idStart; + aField = fields.get(PUBMED_ID); + if(aField != null) { + docText.append(PubmedUtils.getFieldValueString(aField)); + idEnd = docText.length(); + docText.append(Strings.getNl()).append(Strings.getNl()); + } else { + String docName = doc.getName(); + logger.warn("Could not find document ID in document " + + (docName != null ? docName : "")); + } // add authors int authorStart = docText.length(); int authorEnd = authorStart; @@ -176,6 +191,10 @@ origMkups.add((long)titleStart, (long)titleEnd, "title", Factory.newFeatureMap()); } + if(idEnd > idStart){ + origMkups.add((long)idStart, (long)idEnd, "id", + Factory.newFeatureMap()); + } if(authorEnd > authorStart) { origMkups.add((long)authorStart, (long)authorEnd, "authors", Factory.newFeatureMap()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Everyone hates slow websites. So do we. Make your web apps faster with AppDynamics Download AppDynamics Lite for free today: http://p.sf.net/sfu/appdyn_d2d_feb _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs