fix: remove test and handle null quantities
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/a484e5ec Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/a484e5ec Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/a484e5ec Branch: refs/heads/master Commit: a484e5ec5a275219ac947b95c8b3df1fcc9a89fd Parents: e558f5d Author: Yash Tanna <[email protected]> Authored: Fri Apr 22 14:52:29 2016 -0700 Committer: Yash Tanna <[email protected]> Committed: Fri Apr 22 14:52:29 2016 -0700 ---------------------------------------------------------------------- .../parser/ner/grobid/GrobidNERecogniser.java | 87 +++++++++----------- .../ner/grobid/GrobidNERecogniserTest.java | 66 --------------- 2 files changed, 40 insertions(+), 113 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/a484e5ec/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java index 7f4be9d..d2e0bc4 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java @@ -1,8 +1,6 @@ package org.apache.tika.parser.ner.grobid; -import java.util.Map; -import java.util.Set; - +import org.apache.cxf.jaxrs.client.WebClient; import org.apache.tika.parser.ner.NERecogniser; import org.json.simple.JSONArray; import org.json.simple.JSONObject; @@ -10,16 +8,10 @@ import org.json.simple.parser.JSONParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.URLEncoder; -import java.util.HashSet; -import java.util.HashMap; -import java.util.Properties; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; - -import org.apache.cxf.jaxrs.client.WebClient; +import java.io.IOException; +import java.util.*; public class GrobidNERecogniser implements NERecogniser{ @@ -164,42 +156,43 @@ public class GrobidNERecogniser implements NERecogniser{ StringBuffer normalizedMeasurementString = new StringBuffer(); JSONObject quantity = (JSONObject) convertToJSONObject(measurements.get(i).toString()).get("quantity"); - - if(quantity.containsKey("rawValue")){ - String measurementNumber = (String) convertToJSONObject(quantity.toString()).get("rawValue"); - measurementString.append(measurementNumber); - measurementString.append(" "); - measurementNumberSet.add(measurementNumber); - } - - if(quantity.containsKey("normalizedQuantity")){ - String normalizedMeasurementNumber = convertToJSONObject(quantity.toString()).get("normalizedQuantity").toString(); - normalizedMeasurementString.append(normalizedMeasurementNumber); - normalizedMeasurementString.append(" "); - } - - JSONObject jsonObj = (JSONObject) convertToJSONObject(quantity.toString()); - - if(jsonObj.containsKey("rawUnit")){ - JSONObject rawUnit = (JSONObject) jsonObj.get("rawUnit"); - String unitName = (String) convertToJSONObject(rawUnit.toString()).get("name"); - unitSet.add(unitName); - measurementString.append(unitName); - } - - if(jsonObj.containsKey("normalizedUnit")){ - JSONObject normalizedUnit = (JSONObject) jsonObj.get("normalizedUnit"); - String normalizedUnitName = (String) convertToJSONObject(normalizedUnit.toString()).get("name"); - normalizedMeasurementString.append(normalizedUnitName); - } - - if(!measurementString.toString().equals("")){ - measurementSet.add(measurementString.toString()); - } - - if(!normalizedMeasurementString.toString().equals("")){ - normalizedMeasurementSet.add(normalizedMeasurementString.toString()); - } + if(quantity!=null) { + if (quantity.containsKey("rawValue")) { + String measurementNumber = (String) convertToJSONObject(quantity.toString()).get("rawValue"); + measurementString.append(measurementNumber); + measurementString.append(" "); + measurementNumberSet.add(measurementNumber); + } + + if (quantity.containsKey("normalizedQuantity")) { + String normalizedMeasurementNumber = convertToJSONObject(quantity.toString()).get("normalizedQuantity").toString(); + normalizedMeasurementString.append(normalizedMeasurementNumber); + normalizedMeasurementString.append(" "); + } + + JSONObject jsonObj = (JSONObject) convertToJSONObject(quantity.toString()); + + if (jsonObj.containsKey("rawUnit")) { + JSONObject rawUnit = (JSONObject) jsonObj.get("rawUnit"); + String unitName = (String) convertToJSONObject(rawUnit.toString()).get("name"); + unitSet.add(unitName); + measurementString.append(unitName); + } + + if (jsonObj.containsKey("normalizedUnit")) { + JSONObject normalizedUnit = (JSONObject) jsonObj.get("normalizedUnit"); + String normalizedUnitName = (String) convertToJSONObject(normalizedUnit.toString()).get("name"); + normalizedMeasurementString.append(normalizedUnitName); + } + + if (!measurementString.toString().equals("")) { + measurementSet.add(measurementString.toString()); + } + + if (!normalizedMeasurementString.toString().equals("")) { + normalizedMeasurementSet.add(normalizedMeasurementString.toString()); + } + } } entities.put("MEASUREMENT_NUMBERS",measurementNumberSet); http://git-wip-us.apache.org/repos/asf/tika/blob/a484e5ec/tika-parsers/src/test/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniserTest.java deleted file mode 100644 index 60279e9..0000000 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniserTest.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright owlocationNameEntitieship. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.parser.ner.grobid; - -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayInputStream; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.HashSet; - -import org.apache.tika.Tika; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ner.NamedEntityParser; -import org.junit.Test; - -/** -*Test case for {@link Grobid NER} -*/ -public class GrobidNERecogniserTest { - - @Test - public void testGetEntityTypes() throws Exception { - String text = "I've lost one minute."; - System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, GrobidNERecogniser.class.getName()); - Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml"))); - Metadata md = new Metadata(); - tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md); - - HashSet<String> set = new HashSet<String>(); - - set.clear(); - set.addAll(Arrays.asList(md.getValues("NER_MEASUREMENT_NUMBERS"))); - assertTrue(set.contains("one")); - - set.clear(); - set.addAll(Arrays.asList(md.getValues("NER_MEASUREMENT_UNITS"))); - assertTrue(set.contains("minute")); - - set.clear(); - set.addAll(Arrays.asList(md.getValues("NER_MEASUREMENTS"))); - assertTrue(set.contains("one minute")); - - set.clear(); - set.addAll(Arrays.asList(md.getValues("NER_NORMALIZED_MEASUREMENTS"))); - assertTrue(set.contains("60 s")); - } -} - -
