mynameborat commented on a change in pull request #912: SEP-19 : Refactoring sideInputs from SamzaContainer to ContainerStorageManager URL: https://github.com/apache/samza/pull/912#discussion_r259967191
########## File path: samza-core/src/main/scala/org/apache/samza/storage/ContainerStorageManager.java ########## @@ -364,14 +615,137 @@ public void start() throws SamzaException { executorService.shutdown(); - // Stop consumers - this.systemConsumers.values().forEach(systemConsumer -> systemConsumer.stop()); + // Stop store consumers + this.storeConsumers.values().forEach(systemConsumer -> systemConsumer.stop()); // Now re-create persistent stores in read-write mode, leave non-persistent stores as-is recreatePersistentTaskStoresInReadWriteMode(this.containerModel, jobContext, containerContext, - storageEngineFactories, changelogSystemStreams, serdes, taskInstanceMetrics, taskInstanceCollectors); + storageEngineFactories, serdes, taskInstanceMetrics, taskInstanceCollectors); - LOG.info("Restore complete"); + LOG.info("Store Restore complete"); + } + + // Read sideInputs until all sideInputStreams are caughtup, so start() can return + private void startSideInputs() { + + LOG.info("SideInput Restore started"); + + // initialize the sideInputStorageManagers + getSideInputStorageManagers().forEach(sideInputStorageManager -> sideInputStorageManager.init()); + + // start the checkpointing thread at the commit-ms frequency + sideInputsFlushFuture = sideInputsFlushExecutor.scheduleWithFixedDelay(new Runnable() { + @Override + public void run() { + getSideInputStorageManagers().forEach(sideInputStorageManager -> sideInputStorageManager.flush()); + } + }, 0, new TaskConfig(config).getCommitMs(), TimeUnit.MILLISECONDS); + + // set the latch to the number of sideInput SSPs + this.sideInputsCaughtUp = new CountDownLatch(this.sideInputStorageManagers.keySet().size()); + + // register all side input SSPs with the consumers + for (SystemStreamPartition ssp : sideInputStorageManagers.keySet()) { + String startingOffset = sideInputStorageManagers.get(ssp).getStartingOffset(ssp); + + if (startingOffset == null) { + throw new SamzaException("No offset defined for SideInput SystemStreamPartition : " + ssp); + } + + // register startingOffset with the sysConsumer and register a metric for it + sideInputSystemConsumers.register(ssp, startingOffset, null); + taskInstanceMetrics.get(sideInputStorageManagers.get(ssp).getTaskName()).addOffsetGauge( + ssp, ScalaJavaUtil.toScalaFunction(() -> sideInputStorageManagers.get(ssp).getLastProcessedOffset(ssp))); + + SystemStreamMetadata systemStreamMetadata = streamMetadataCache.getSystemStreamMetadata(ssp.getSystemStream(), false); + SystemStreamMetadata.SystemStreamPartitionMetadata sspMetadata = + (systemStreamMetadata == null) ? null : systemStreamMetadata.getSystemStreamPartitionMetadata().get(ssp.getPartition()); + + // record a copy of the sspMetadata, to later check if its caught up + initialSideInputSSPMetadata.put(ssp, sspMetadata); + + // check if the ssp is caught to upcoming, even at start + checkSideInputCaughtUp(ssp, startingOffset, SystemStreamMetadata.OffsetType.UPCOMING, false); + } + + // start the systemConsumers for consuming input + this.sideInputSystemConsumers.start(); + + // create a thread for sideInput reads + Thread readSideInputs = new Thread(() -> { + while (!shutDownSideInputRead) { + IncomingMessageEnvelope envelope = sideInputSystemConsumers.choose(true); + if (envelope != null) { + + if (!envelope.isEndOfStream()) + sideInputStorageManagers.get(envelope.getSystemStreamPartition()).process(envelope); + + checkSideInputCaughtUp(envelope.getSystemStreamPartition(), envelope.getOffset(), + SystemStreamMetadata.OffsetType.NEWEST, envelope.isEndOfStream()); + + } else { + LOG.trace("No incoming message was available"); + } + } + }); + + readSideInputs.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { + @Override + public void uncaughtException(Thread t, Throwable e) { + sideInputException = Optional.of(e); + sideInputsCaughtUp.countDown(); + } + }); + + try { + readSideInputs.start(); + // Make the main thread wait until all sideInputs have been caughtup or thrown an exception + this.sideInputsCaughtUp.await(); + + if (sideInputException.isPresent()) { // Throw exception if there was an exception in catching-up sideInputs Review comment: Can you add the jira ticket if exists to capture the scenario where we need to communicate to the container in case of the exception after bootstrapping? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services