Author: jukka
Date: Fri Nov 11 12:18:56 2011
New Revision: 1200844
URL: http://svn.apache.org/viewvc?rev=1200844&view=rev
Log:
TIKA-780: Optimize loading of the media type registry
Avoid collecting data when it's not needed. Fix handling of complex magic
matches.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=1200844&r1=1200843&r2=1200844&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
Fri Nov 11 12:18:56 2011
@@ -101,14 +101,7 @@ class MimeTypesReader extends DefaultHan
private int priority;
- private Clause clause = null;
-
- private List<Clause> clauses = new LinkedList<Clause>();
-
- private final LinkedList<List<Clause>> clauseStack =
- new LinkedList<List<Clause>>();
-
- private final StringBuilder characters = new StringBuilder();
+ private StringBuilder characters = null;
MimeTypesReader(MimeTypes types) {
this.types = types;
@@ -161,6 +154,8 @@ class MimeTypesReader extends DefaultHan
} else if (SUB_CLASS_OF_TAG.equals(qName)) {
String parent = attributes.getValue(SUB_CLASS_TYPE_ATTR);
types.setSuperType(type, MediaType.parse(parent));
+ } else if (COMMENT_TAG.equals(qName)) {
+ characters = new StringBuilder();
} else if (GLOB_TAG.equals(qName)) {
String pattern = attributes.getValue(PATTERN_ATTR);
String isRegex = attributes.getValue(ISREGEX_ATTR);
@@ -183,9 +178,8 @@ class MimeTypesReader extends DefaultHan
if (kind == null) {
kind = "string";
}
- clause = new MagicMatch(type.getType(), kind, offset, value, mask);
- clauseStack.addLast(clauses);
- clauses = null;
+ current = new ClauseRecord(
+ new MagicMatch(type.getType(), kind, offset, value, mask));
} else if (MAGIC_TAG.equals(qName)) {
String value = attributes.getValue(MAGIC_PRIORITY_ATTR);
if (value != null && value.length() > 0) {
@@ -193,6 +187,7 @@ class MimeTypesReader extends DefaultHan
} else {
priority = 50;
}
+ current = new ClauseRecord(null);
}
}
@@ -203,40 +198,66 @@ class MimeTypesReader extends DefaultHan
type = null;
} else if (COMMENT_TAG.equals(qName)) {
type.setDescription(characters.toString().trim());
+ characters = null;
} else if (MATCH_TAG.equals(qName)) {
- if (clauses != null) {
- Clause subclause;
- if (clauses.size() == 1) {
- subclause = clauses.get(0);
- } else {
- subclause = new OrClause(clauses);
- }
- clause = new AndClause(clause, subclause);
- }
- clauses = clauseStack.removeLast();
- if (clauses == null) {
- clauses = Collections.singletonList(clause);
- } else {
- if (clauses.size() == 1) {
- clauses = new ArrayList<Clause>(clauses);
- }
- clauses.add(clause);
- }
+ current.stop();
} else if (MAGIC_TAG.equals(qName)) {
- if (clauses != null) {
- for (Clause clause : clauses) {
- type.addMagic(new Magic(type, priority, clause));
- }
- clauses = null;
+ for (Clause clause : current.getClauses()) {
+ type.addMagic(new Magic(type, priority, clause));
}
+ current = null;
}
}
- characters.setLength(0);
}
@Override
public void characters(char[] ch, int start, int length) {
- characters.append(ch, start, length);
+ if (characters != null) {
+ characters.append(ch, start, length);
+ }
+ }
+
+ private ClauseRecord current = new ClauseRecord(null);
+
+ private class ClauseRecord {
+
+ private ClauseRecord parent;
+
+ private Clause clause;
+
+ private List<Clause> subclauses = null;
+
+ public ClauseRecord(Clause clause) {
+ this.parent = current;
+ this.clause = clause;
+ }
+
+ public void stop() {
+ if (subclauses != null) {
+ Clause subclause;
+ if (subclauses.size() == 1) {
+ subclause = subclauses.get(0);
+ } else {
+ subclause = new OrClause(subclauses);
+ }
+ clause = new AndClause(clause, subclause);
+ }
+ if (parent.subclauses == null) {
+ parent.subclauses = Collections.singletonList(clause);
+ } else {
+ if (parent.subclauses.size() == 1) {
+ parent.subclauses = new
ArrayList<Clause>(parent.subclauses);
+ }
+ parent.subclauses.add(clause);
+ }
+
+ current = current.parent;
+ }
+
+ public List<Clause> getClauses() {
+ return subclauses;
+ }
+
}
}