[ https://issues.apache.org/jira/browse/ANY23-304?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15891207#comment-15891207 ]
ASF GitHub Bot commented on ANY23-304: -------------------------------------- Github user ansell commented on a diff in the pull request: https://github.com/apache/any23/pull/34#discussion_r103806632 --- Diff: openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java --- @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.any23.openie; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.any23.extractor.ExtractionContext; +import org.apache.any23.extractor.ExtractionException; +import org.apache.any23.extractor.ExtractionParameters; +import org.apache.any23.extractor.ExtractionResult; +import org.apache.any23.extractor.ExtractionResultImpl; +import org.apache.any23.extractor.openie.OpenIEExtractor; +import org.apache.any23.rdf.RDFUtils; +import org.apache.any23.util.StreamUtils; +import org.apache.any23.writer.RDFXMLWriter; +import org.apache.any23.writer.TripleHandler; +import org.apache.any23.writer.TripleHandlerException; +import org.eclipse.rdf4j.model.IRI; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author lewismc + * + */ +public class OpenIEExtractorTest { + + private static final Logger logger = LoggerFactory.getLogger(OpenIEExtractorTest.class); + + private OpenIEExtractor extractor; + + @Before + public void setUp() throws Exception { + extractor = new OpenIEExtractor(); + } + + @After + public void tearDown() throws Exception { + extractor = null; + } + + //@Ignore("This typically results in a JVM crash... disabled for the time being.") + @Test + public void testExtractFromHTMLDocument() + throws IOException, ExtractionException, TripleHandlerException { + final IRI uri = RDFUtils.iri("http://podaac.jpl.nasa.gov/aquarius"); + extract(uri, "/org/apache/any23/extractor/openie/example-openie.html"); + } + + public void extract(IRI uri, String filePath) + throws IOException, ExtractionException, TripleHandlerException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); --- End diff -- Writing to a file instead of ByteArrayOutputStream may alleviate some of the memory pressures. > Add extractor for OpenIE > ------------------------ > > Key: ANY23-304 > URL: https://issues.apache.org/jira/browse/ANY23-304 > Project: Apache Any23 > Issue Type: Bug > Components: core, extractors > Reporter: Lewis John McGibbney > Assignee: Lewis John McGibbney > Fix For: 2.1 > > > I'm going to start work on an extractor which uses the OpenIE library > https://github.com/allenai/openie-standalone > This will provide us with the ability to execute structured extractions from > unstructured content essentially taking Any23 in a new direction. -- This message was sent by Atlassian JIRA (v6.3.15#6346)