Revision: 17389
http://sourceforge.net/p/gate/code/17389
Author: markagreenwood
Date: 2014-02-21 19:44:41 +0000 (Fri, 21 Feb 2014)
Log Message:
-----------
removed the old single file populate code as it has been deprecated and disn't
work properly anyway
Modified Paths:
--------------
gate/trunk/src/main/gate/SimpleCorpus.java
gate/trunk/src/main/gate/corpora/CorpusImpl.java
gate/trunk/src/main/gate/corpora/SerialCorpusImpl.java
Removed Paths:
-------------
gate/trunk/src/main/gate/corpora/DocType.java
Modified: gate/trunk/src/main/gate/SimpleCorpus.java
===================================================================
--- gate/trunk/src/main/gate/SimpleCorpus.java 2014-02-21 19:32:16 UTC (rev
17388)
+++ gate/trunk/src/main/gate/SimpleCorpus.java 2014-02-21 19:44:41 UTC (rev
17389)
@@ -124,17 +124,12 @@
* files.
* @param documentNamePrefix the prefix to use for document names when
* creating from
- * @param documentType type of the document (i.e. xml, html etc.)
+ * @param mineType the mime type which determines how the document is
handled
* @return total length of populated documents in the corpus in number
* of bytes
*/
- @Deprecated
+
public long populate(URL singleConcatenatedFile, String documentRootElement,
- String encoding, int numberOfDocumentsToExtract,
- String documentNamePrefix, gate.corpora.DocType documentType) throws
IOException,
- ResourceInstantiationException;
-
- public long populate(URL singleConcatenatedFile, String documentRootElement,
String encoding, int numberOfDocumentsToExtract,
String documentNamePrefix, String mimeType, boolean includeRootElement)
throws IOException,
ResourceInstantiationException;
Modified: gate/trunk/src/main/gate/corpora/CorpusImpl.java
===================================================================
--- gate/trunk/src/main/gate/corpora/CorpusImpl.java 2014-02-21 19:32:16 UTC
(rev 17388)
+++ gate/trunk/src/main/gate/corpora/CorpusImpl.java 2014-02-21 19:44:41 UTC
(rev 17389)
@@ -508,30 +508,14 @@
* @param encoding the encoding of the trec file.
* @param numberOfDocumentsToExtract extracts the specified number of
* documents from the trecweb file; -1 to indicate all files.
- * @param documentType type of the document it is (i.e. xml, html etc)
+ * @param mimeType the mime type which determines how the document is handled
* @return total length of populated documents in the corpus in number
* of bytes
* @throws java.io.IOException
- */
- @Deprecated
- @SuppressWarnings("deprecation")
+ */
public static long populate(Corpus corpus, URL singleConcatenatedFile,
String documentRootElement, String encoding,
int numberOfDocumentsToExtract, String documentNamePrefix,
- DocType documentType) throws IOException {
- String mimeType = null;
-
- if(DocType.XML.equals(documentType))
- mimeType = "text/xml";
- else if(DocType.HTML.equals(documentType)) mimeType = "text/html";
-
- return populate(corpus, singleConcatenatedFile, documentRootElement,
- encoding, numberOfDocumentsToExtract, documentNamePrefix, mimeType,
true);
- }
-
- public static long populate(Corpus corpus, URL singleConcatenatedFile,
- String documentRootElement, String encoding,
- int numberOfDocumentsToExtract, String documentNamePrefix,
String mimeType, boolean includeRootElement) throws IOException {
StatusListener sListener =
(StatusListener)gate.Gate.getListeners().get("gate.event.StatusListener");
@@ -689,21 +673,11 @@
* extract from the trecweb file.
* @param documentNamePrefix the prefix to use for document names when
* creating from
- * @param documentType type of the document it is (i.e. html, xml)
+ * @param mimeType the mime type which determines how the document is handled
* @return total length of populated documents in the corpus in number
* of bytes
- */
- @Deprecated
- @SuppressWarnings("deprecation")
+ */
public long populate(URL singleConcatenatedFile, String documentRootElement,
- String encoding, int numberOfFilesToExtract,
- String documentNamePrefix, DocType documentType) throws IOException,
- ResourceInstantiationException {
- return populate(this, singleConcatenatedFile, documentRootElement,
- encoding, numberOfFilesToExtract, documentNamePrefix,
documentType);
- }
-
- public long populate(URL singleConcatenatedFile, String documentRootElement,
String encoding, int numberOfFilesToExtract,
String documentNamePrefix, String mimeType, boolean includeRootElement)
throws IOException,
ResourceInstantiationException {
Deleted: gate/trunk/src/main/gate/corpora/DocType.java
===================================================================
--- gate/trunk/src/main/gate/corpora/DocType.java 2014-02-21 19:32:16 UTC
(rev 17388)
+++ gate/trunk/src/main/gate/corpora/DocType.java 2014-02-21 19:44:41 UTC
(rev 17389)
@@ -1,23 +0,0 @@
-/*
- * DocType.java
- *
- * Copyright (c) 1995-2012, The University of Sheffield. See the file
- * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
- *
- * This file is part of GATE (see http://gate.ac.uk/), and is free
- * software, licenced under the GNU Library General Public License,
- * Version 2, June 1991 (in the distribution as file licence.html,
- * and also available at http://gate.ac.uk/gate/licence.html).
- *
- * Niraj Aswani, 09/March/2011
- */
-package gate.corpora;
-
-/**
- * Enum for different types of documents.
- * @author niraj
- */
-@Deprecated
-public enum DocType {
- HTML, XML, OTHER;
-}
Modified: gate/trunk/src/main/gate/corpora/SerialCorpusImpl.java
===================================================================
--- gate/trunk/src/main/gate/corpora/SerialCorpusImpl.java 2014-02-21
19:32:16 UTC (rev 17388)
+++ gate/trunk/src/main/gate/corpora/SerialCorpusImpl.java 2014-02-21
19:44:41 UTC (rev 17389)
@@ -399,24 +399,12 @@
* extract from the trecweb file.
* @param documentNamePrefix the prefix to use for document names when
* creating from
- * @param documentType type of the document it is (i.e. xml or html
- * etc.)
+ * @param mimeType the mime type which determines how the document is handled
* @return total length of populated documents in the corpus in number
* of bytes
- */
- @Deprecated
- @SuppressWarnings("deprecation")
+ */
public long populate(URL singleConcatenatedFile, String documentRootElement,
String encoding, int numberOfFilesToExtract,
- String documentNamePrefix, DocType documentType) throws IOException,
- ResourceInstantiationException {
- return CorpusImpl.populate(this, singleConcatenatedFile,
- documentRootElement, encoding, numberOfFilesToExtract,
- documentNamePrefix, documentType);
- }
-
- public long populate(URL singleConcatenatedFile, String documentRootElement,
- String encoding, int numberOfFilesToExtract,
String documentNamePrefix, String mimeType, boolean
includeRootElement) throws IOException,
ResourceInstantiationException {
return CorpusImpl.populate(this, singleConcatenatedFile,
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs