Github user hanm commented on a diff in the pull request:

    https://github.com/apache/zookeeper/pull/590#discussion_r219661330
  
    --- Diff: 
src/java/main/org/apache/zookeeper/server/watch/WatcherCleaner.java ---
    @@ -0,0 +1,176 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.zookeeper.server.watch;
    +
    +import java.util.Set;
    +import java.util.HashSet;
    +import java.util.Random;
    +import java.util.concurrent.atomic.AtomicInteger;
    +
    +import org.apache.zookeeper.common.Time;
    +import org.apache.zookeeper.server.RateLogger;
    +import org.apache.zookeeper.server.WorkerService;
    +import org.apache.zookeeper.server.WorkerService.WorkRequest;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +/**
    + * Thread used to lazily clean up the closed watcher, it will trigger the
    + * clean up when the dead watchers get certain number or some number of
    + * seconds has elapsed since last clean up.
    + *
    + * Cost of running it:
    + *
    + * - need to go through all the paths even if the watcher may only
    + *   watching a single path
    + * - block in the path BitHashSet when we try to check the dead watcher
    + *   which won't block other stuff
    + */
    +public class WatcherCleaner extends Thread {
    +
    +    private static final Logger LOG = 
LoggerFactory.getLogger(WatcherCleaner.class);
    +    private final RateLogger RATE_LOGGER = new RateLogger(LOG);
    +
    +    private volatile boolean stopped = false;
    +    private final Object cleanEvent = new Object();
    +    private final Random r = new Random(System.nanoTime());
    +    private final WorkerService cleaners;
    +
    +    private final Set<Integer> deadWatchers;
    +    private final DeadWatcherListener listener;
    +    private final int watcherCleanThreshold;
    +    private final int watcherCleanIntervalInSeconds;
    +    private final int maxInProcessingDeadWatchers;
    +    private final AtomicInteger totalDeadWatchers = new AtomicInteger();
    +
    +    public WatcherCleaner(DeadWatcherListener listener) {
    +        this(listener,
    +            Integer.getInteger("zookeeper.watcherCleanThreshold", 1000),
    +            Integer.getInteger("zookeeper.watcherCleanIntervalInSeconds", 
600),
    +            Integer.getInteger("zookeeper.watcherCleanThreadsNum", 2),
    +            Integer.getInteger("zookeeper.maxInProcessingDeadWatchers", 
-1));
    +    }
    +
    +    public WatcherCleaner(DeadWatcherListener listener,
    +            int watcherCleanThreshold, int watcherCleanIntervalInSeconds,
    +            int watcherCleanThreadsNum, int maxInProcessingDeadWatchers) {
    +        this.listener = listener;
    +        this.watcherCleanThreshold = watcherCleanThreshold;
    +        this.watcherCleanIntervalInSeconds = watcherCleanIntervalInSeconds;
    +        int suggestedMaxInProcessingThreshold =
    +                watcherCleanThreshold * watcherCleanThreadsNum;
    +        if (maxInProcessingDeadWatchers > 0 &&
    +                maxInProcessingDeadWatchers < 
suggestedMaxInProcessingThreshold) {
    +            maxInProcessingDeadWatchers = 
suggestedMaxInProcessingThreshold;
    +            LOG.info("The maxInProcessingDeadWatchers config is smaller " +
    +                    "than the suggested one, change it to use {}",
    +                    maxInProcessingDeadWatchers);
    +        }
    +        this.maxInProcessingDeadWatchers = maxInProcessingDeadWatchers;
    +        this.deadWatchers = new HashSet<Integer>();
    +        this.cleaners = new WorkerService("DeadWatcherCleanner",
    +                watcherCleanThreadsNum, false);
    +
    +        LOG.info("watcherCleanThreshold={}, 
watcherCleanIntervalInSeconds={}" +
    +                ", watcherCleanThreadsNum={}, 
maxInProcessingDeadWatchers={}",
    +                watcherCleanThreshold, watcherCleanIntervalInSeconds,
    +                watcherCleanThreadsNum, maxInProcessingDeadWatchers);
    +    }
    +
    +    public void addDeadWatcher(int watcherBit) {
    +        // Wait if there are too many watchers waiting to be closed,
    +        // this is will slow down the socket packet processing and
    +        // the adding watches in the ZK pipeline.
    +        while (maxInProcessingDeadWatchers > 0 && !stopped &&
    +                totalDeadWatchers.get() >= maxInProcessingDeadWatchers) {
    +            try {
    +                RATE_LOGGER.rateLimitLog("Waiting for dead watchers 
cleaning");
    +                synchronized(totalDeadWatchers) {
    +                    totalDeadWatchers.wait(100);
    +                }
    +            } catch (InterruptedException e) {
    +                LOG.info("Got interrupted while waiting for dead watches " 
+
    +                        "queue size");
    +            }
    +        }
    +        synchronized (this) {
    +            if (deadWatchers.add(watcherBit)) {
    +                totalDeadWatchers.incrementAndGet();
    +                if (deadWatchers.size() >= watcherCleanThreshold) {
    +                    synchronized (cleanEvent) {
    +                        cleanEvent.notifyAll();
    +                    }
    +                }
    +            }
    +        }
    +    }
    +
    +    @Override
    +    public void run() {
    +        while (!stopped) {
    +            synchronized (cleanEvent) {
    +                try {
    +                    if (deadWatchers.size() < watcherCleanThreshold) {
    +                        int maxWaitMs = (watcherCleanIntervalInSeconds +
    +                            r.nextInt(watcherCleanIntervalInSeconds / 2 + 
1)) * 1000;
    +                        cleanEvent.wait(maxWaitMs);
    +                    }
    +                } catch (InterruptedException e) {
    +                    LOG.info("Received InterruptedException while " +
    +                            "waiting for cleanEvent");
    +                    break;
    +                }
    +            }
    +
    +            if (deadWatchers.isEmpty()) {
    +                continue;
    +            }
    +
    +            synchronized (this) {
    +                // Snapshot of the current dead watchers
    +                final Set<Integer> snapshot = new 
HashSet<Integer>(deadWatchers);
    +                deadWatchers.clear();
    --- End diff --
    
    Is there a particular reason to copy the `deadWatchers`, clear it 
immediately and then do the work, instead of just operate on `deadWatchers` 
directly and only clear it after the work is done? I assume the motivation was 
to free `deadWatchers` earlier so we can pipeline the work: adding more dead 
watchers while the previous pipeline of cleaning was in progress, but it looks 
like the new dead watchers will block on `totalDeadWatchers`, which will only 
be reset after previous dead watchers were cleaned up.


---

Reply via email to