Github user revans2 commented on a diff in the pull request: https://github.com/apache/storm/pull/2475#discussion_r159895567 --- Diff: storm-client/src/jvm/org/apache/storm/utils/ShellLogHandler.java --- @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.storm.utils; + +import org.apache.logging.log4j.ThreadContext; +import org.apache.storm.multilang.ShellMsg; +import org.apache.storm.task.TopologyContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class ShellLogHandler { + public static final Logger LOG = LoggerFactory.getLogger(ShellLogHandler.class); + + public static final String ID = "id"; + public static final String NAME = "name"; + public static final String PARENT = "parent"; + public static final String PID = "pid"; + public static final String TASK = "task"; + + private ShellLogHandler() { + } + + private static void putIfNotNull(String key, Object value) { + if (value != null) { + ThreadContext.put(key, value.toString()); + } + } + + /** + * Update the {@link ThreadContext} with information about the logged + * message, including the pid and task. + * + * @param shellMsg + * - the {@link ShellMsg} containing the ID. + * @param process + * - the current {@link ShellProcess}. + * @param context + * - the current {@link TopologyContext}. + */ + private static void updateContext(ShellMsg shellMsg, ShellProcess process, TopologyContext context) { + putIfNotNull(ID, shellMsg.getId()); + // Calling this only once allows the same parent ID to be attached to + // all log messages from a tuple tree + if (!ThreadContext.containsKey(PARENT)) { + putIfNotNull(PARENT, shellMsg.getId()); + } --- End diff -- @hmcc If parent id is intended to be used to search for all log messages about a given tuple tree then there is a bug. You set parent ID once and only once. This means that "parent" will be set to the ID of the first log message with an ID ever sent. Every single tuple is likely to have a different parent so it will end up being a totally useless value. Also I agree with @HeartSaVioR I think there is a much cleaner way to get the same results that you want, and not having all of the overhead for those that don't want it. Because the logging is thread specific. I would propose a ShellLogHandler interface that can be set for implementations of ShellBolt and ShellSpout. That way you can override it, and there is no overhead for people who don't want it. I would propose something like ``` interface ShellLogHandler { void setupContext(ShellProcess process, TopologyContext context); void log(ShellMessage msg); } ``` `setupContext` would be called from the BoltReaderRunnable thread. It could set the ThreadContext for that thread for things like the PID, etc. These are things that will not change during the lifetime of the Bolt. For the spout it is going to be more overhead because there is no way currently to know if you are on a thread by yourself or sharing a thread with another spout. So for each log message the spout would call `setupContext` followed by `log`. You also don't need to modify storm.py as you can just use sendMsgToParent yourself directly and have your own custom helper log methods with the ID.
---