[
https://issues.apache.org/jira/browse/APEXMALHAR-1897?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15203324#comment-15203324
]
ASF GitHub Bot commented on APEXMALHAR-1897:
--------------------------------------------
Github user amberarrow commented on a diff in the pull request:
https://github.com/apache/incubator-apex-malhar/pull/145#discussion_r56766102
--- Diff:
library/src/main/java/com/datatorrent/lib/state/managed/ManagedTimeUnifiedStateImpl.java
---
@@ -0,0 +1,238 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datatorrent.lib.state.managed;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.RemoteIterator;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Queues;
+import com.google.common.util.concurrent.Futures;
+
+import com.datatorrent.lib.fileaccess.FileAccess;
+import com.datatorrent.lib.state.BucketedState;
+import com.datatorrent.netlet.util.Slice;
+
+/**
+ * In this implementation of {@link ManagedState} the buckets in memory
are time-buckets.
+ * <p/>
+ *
+ * <b>Difference from {@link ManagedTimeStateImpl}</b>: <br/>
+ * <ol>
+ * <li>The main buckets in {@link ManagedTimeStateImpl} are unique adhoc
long ids which the user provides with the
+ * key. In this implementation the main buckets are time buckets. The user
provides just the time and the time bucket is
+ * derived from it.
+ * </li>
+ * <br/>
+ *
+ * <li>In regards to the bucket data on disk, in {@link
ManagedTimeStateImpl} the buckets are persisted on disk
+ * with each bucket data further grouped into time-buckets:
{base_path}/{bucketId}/{time-bucket id}. <br/>
+ * In this implementation operator id is used as bucketId (on disk) and
there is just one time-bucket under a
+ * particular operator id:
+ * {base_path}/{operator id}/{time bucket id}.
+ * </li>
+ * <br/>
+ *
+ * <li>In {@link ManagedTimeStateImpl} a bucket belongs to just one
partition. Multiple partitions cannot write to
+ * the same bucket. <br/>
+ * In this implementation multiple partitions can be working with the same
time-bucket (since time-bucket is derived
+ * from time). This works because on the disk the time-bucket data is
segregated under each operator id.
+ * </li>
+ * <br/>
+ *
+ * <li>While {@link ManagedTimeStateImpl} can support dynamic partitioning
by pre-allocating buckets this will not
+ * be able to support dynamic partitioning efficiently.
+ * </li>
+
+ * </ol>
+ */
+public class ManagedTimeUnifiedStateImpl extends AbstractManagedStateImpl
implements BucketedState
+{
+ private final transient LinkedBlockingQueue<Long> purgedTimeBuckets =
Queues.newLinkedBlockingQueue();
+
+ public ManagedTimeUnifiedStateImpl()
+ {
+ bucketsFileSystem = new TimeUnifiedBucketsFileSystem();
+ }
+
+ @Override
+ public int getNumBuckets()
+ {
+ return timeBucketAssigner.getNumBuckets();
+ }
+
+ @Override
+ public void put(long time, Slice key, Slice value)
+ {
+ long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
+ if (timeBucket == -1) {
+ //time is expired so return null.
--- End diff --
return null => ignore key/value
> Create ManagedState
> -------------------
>
> Key: APEXMALHAR-1897
> URL: https://issues.apache.org/jira/browse/APEXMALHAR-1897
> Project: Apache Apex Malhar
> Issue Type: Sub-task
> Reporter: Chandni Singh
> Assignee: Chandni Singh
> Fix For: 3.4.0
>
>
> ManagedState is described in the document below:
> https://docs.google.com/document/d/1gRWN9ufKSZSZD0N-pthlhpC9TZ8KwJ6hJlAX6nxl5f8/edit#heading=h.z87ti1fwyt0t
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)