tballison commented on code in PR #2452:
URL: https://github.com/apache/tika/pull/2452#discussion_r2623284127
##########
tika-grpc/src/main/java/org/apache/tika/pipes/grpc/TikaGrpcServerImpl.java:
##########
@@ -77,99 +75,41 @@ class TikaGrpcServerImpl extends TikaGrpc.TikaImplBase {
}
public static final JsonSchemaGenerator JSON_SCHEMA_GENERATOR = new
JsonSchemaGenerator(OBJECT_MAPPER);
- /**
- * FetcherID is key, The pair is the Fetcher object and the Metadata
- */
PipesConfig pipesConfig;
PipesClient pipesClient;
- ExpiringFetcherStore expiringFetcherStore;
+ FetcherManager fetcherManager;
+ Path tikaConfigPath;
+ PluginManager pluginManager;
- String tikaConfigPath;
-
- TikaGrpcServerImpl(String tikaConfigPath)
- throws TikaConfigException, IOException,
ParserConfigurationException,
- TransformerException, SAXException {
+ TikaGrpcServerImpl(String tikaConfigPath) throws TikaConfigException,
IOException {
File tikaConfigFile = new File(tikaConfigPath);
- if (!tikaConfigFile.canWrite()) {
- File tmpTikaConfigFile = Files.createTempFile("configCopy",
tikaConfigFile.getName()).toFile();
- tmpTikaConfigFile.deleteOnExit();
- LOG.info("Tika config file {} is read-only. Making a temporary
copy to {}", tikaConfigFile, tmpTikaConfigFile);
- String tikaConfigFileContents =
FileUtils.readFileToString(tikaConfigFile, StandardCharsets.UTF_8);
- FileUtils.writeStringToFile(tmpTikaConfigFile,
tikaConfigFileContents, StandardCharsets.UTF_8);
- tikaConfigFile = tmpTikaConfigFile;
- tikaConfigPath = tikaConfigFile.getAbsolutePath();
+ if (!tikaConfigFile.exists()) {
+ throw new TikaConfigException("Tika config file does not exist: "
+ tikaConfigPath);
}
- pipesConfig =
TikaLoader.load(tikaConfigFile.toPath()).configs().load("pipes",
PipesConfig.class);
- pipesClient = new PipesClient(pipesConfig, tikaConfigFile.toPath());
- expiringFetcherStore = new
ExpiringFetcherStore(pipesConfig.getStaleFetcherTimeoutSeconds(),
- pipesConfig.getStaleFetcherDelaySeconds());
- this.tikaConfigPath = tikaConfigPath;
- try {
- updateTikaConfig();
- } catch (TikaException e) {
- throw new TikaConfigException("Problem updating tikaConfig", e);
- }
- }
+ Path configPath = tikaConfigFile.toPath();
+ this.tikaConfigPath = configPath;
- private void updateTikaConfig() throws ParserConfigurationException,
IOException, SAXException, TransformerException, TikaException {
- /* TODO -- update this with json stuff if necessary any more at all?
- Document tikaConfigDoc =
-
DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(tikaConfigPath);
+ TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
- Element fetchersElement = (Element)
tikaConfigDoc.getElementsByTagName("fetchers").item(0);
- if (fetchersElement == null) {
- fetchersElement = tikaConfigDoc.createElement("fetchers");
- tikaConfigDoc.getDocumentElement().appendChild(fetchersElement);
+ // Load PipesConfig directly from root level (not from "other-configs")
+ pipesConfig = tikaJsonConfig.deserialize("pipes", PipesConfig.class);
+ if (pipesConfig == null) {
+ pipesConfig = new PipesConfig();
}
- for (int i = 0; i < fetchersElement.getChildNodes().getLength(); ++i) {
-
fetchersElement.removeChild(fetchersElement.getChildNodes().item(i));
- }
- for (var fetcherEntry : expiringFetcherStore.getFetchers().entrySet())
{
- Fetcher fetcherObject = fetcherEntry.getValue();
- Map<String, Object> fetcherConfigParams =
OBJECT_MAPPER.convertValue(
-
expiringFetcherStore.getFetcherConfigs().get(fetcherEntry.getKey()),
- new TypeReference<>() {
- });
- Element fetcher = tikaConfigDoc.createElement("fetcher");
- fetcher.setAttribute("class",
fetcherEntry.getValue().getClass().getName());
- Element fetcherPluginId = tikaConfigDoc.createElement("name");
-
fetcherPluginId.setTextContent(fetcherObject.getExtensionConfig().id());
- fetcher.appendChild(fetcherPluginId);
- populateFetcherConfigs(fetcherConfigParams, tikaConfigDoc,
fetcher);
- fetchersElement.appendChild(fetcher);
- }
- DOMSource source = new DOMSource(tikaConfigDoc);
- FileWriter writer = new FileWriter(tikaConfigPath,
StandardCharsets.UTF_8);
- StreamResult result = new StreamResult(writer);
- TransformerFactory transformerFactory =
XMLReaderUtils.getTransformerFactory();
- Transformer transformer = transformerFactory.newTransformer();
- transformer.transform(source, result);
+ pipesClient = new PipesClient(pipesConfig, configPath);
Review Comment:
This will be only a single forked process. Use PipesParser if you want to
enable multiple pipesClients. Not sure which you'd prefer here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]