monkmachine commented on code in PR #558: URL: https://github.com/apache/tika/pull/558#discussion_r871707028
########## tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/main/java/org/apache/tika/parser/dwg/DWGReadParser.java: ########## @@ -0,0 +1,208 @@ +package org.apache.tika.parser.dwg; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.function.Consumer; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.commons.io.FileUtils; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.XHTMLContentHandler; + +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.core.json.JsonReadFeature; + + +public class DWGReadParser extends AbstractDWGParser { + private static final Logger LOG = LoggerFactory.getLogger(DWGParser.class); + /** + * + */ + private static final long serialVersionUID = 7983127145030096837L; + private static MediaType TYPE = MediaType.image("vnd.dwg"); + + public Set < MediaType > getSupportedTypes(ParseContext context) { + return Collections.singleton(TYPE); + } + + @Override + public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + + configure(context); + DWGParserConfig dwgc = context.get(DWGParserConfig.class); + final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + + xhtml.startDocument(); + UUID uuid = UUID.randomUUID(); + File tmpFileOut = File.createTempFile(uuid + "dwgreadout", ".json"); + File tmpFileOutCleaned = File.createTempFile(uuid + "dwgreadoutclean", ".json"); + File tmpFileIn = File.createTempFile(uuid + "dwgreadin", ".dwg"); + try { + + FileUtils.copyInputStreamToFile(stream, tmpFileIn); + + List < String > command = Arrays.asList(dwgc.getDwgReadExecutable(), "-O", "JSON", "-o", Review Comment: @tballison added ProcessUtils.execute instead of using ProcessBuilder manually, also added timeout (default 5mins as I have found some do take some time) to the DWGParserConfig -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@tika.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org