This is an automated email from the ASF dual-hosted git repository. skrawcz pushed a commit to branch stefan/flip-telemetry in repository https://gitbox.apache.org/repos/asf/hamilton.git
commit af45cfb8a61ad54182808fbe9b830d05ccba0a7c Author: Stefan Krawczyk <[email protected]> AuthorDate: Sat Mar 7 22:47:15 2026 -0800 Remove usage telemetry (PostHog) from Hamilton ASF policy does not permit phone-home telemetry in released artifacts. This removes all PostHog event tracking while keeping hamilton/telemetry.py as a backwards-compatible no-op stub so downstream code that calls `telemetry.disable_telemetry()` continues to work without changes. --- contrib/hamilton/contrib/__init__.py | 10 - docs/get-started/license.rst | 23 -- docs/index.md | 1 - docs/reference/disabling-telemetry.md | 40 -- hamilton/async_driver.py | 67 +--- hamilton/cli/__main__.py | 3 - hamilton/contrib/__init__.py | 8 - hamilton/dataflows/__init__.py | 35 +- hamilton/driver.py | 138 +------ hamilton/plugins/h_experiments/__main__.py | 5 - hamilton/registry.py | 2 - hamilton/telemetry.py | 535 +------------------------- plugin_tests/h_dask/conftest.py | 5 - plugin_tests/h_narwhals/conftest.py | 21 - plugin_tests/h_pandas/conftest.py | 21 - plugin_tests/h_polars/conftest.py | 21 - plugin_tests/h_ray/conftest.py | 5 - plugin_tests/h_spark/conftest.py | 21 - plugin_tests/h_vaex/conftest.py | 21 - tests/conftest.py | 5 - tests/test_async_driver.py | 36 -- tests/test_hamilton_driver.py | 188 +-------- tests/test_telemetry.py | 237 ------------ ui/backend/server/trackingserver_base/apps.py | 35 -- ui/sdk/tests/conftest.py | 20 - 25 files changed, 22 insertions(+), 1481 deletions(-) diff --git a/contrib/hamilton/contrib/__init__.py b/contrib/hamilton/contrib/__init__.py index 951292d1..1d6df3cf 100644 --- a/contrib/hamilton/contrib/__init__.py +++ b/contrib/hamilton/contrib/__init__.py @@ -23,22 +23,12 @@ try: except ImportError: from version import VERSION as __version__ # noqa: F401 -from hamilton import telemetry - - -def track(module_name: str): - """Function to call to track module usage.""" - if hasattr(telemetry, "create_and_send_contrib_use"): # makes sure Hamilton version is fine. - telemetry.create_and_send_contrib_use(module_name, __version__) - @contextmanager def catch_import_errors(module_name: str, file_location: str, logger: logging.Logger): try: # Yield control to the inner block which will have the import statements. yield - # After all imports succeed send telemetry - track(module_name) except ImportError as e: location = file_location[: file_location.rfind("/")] logger.error("ImportError: %s", e) diff --git a/docs/get-started/license.rst b/docs/get-started/license.rst index 71576cc6..132cfaca 100644 --- a/docs/get-started/license.rst +++ b/docs/get-started/license.rst @@ -3,26 +3,3 @@ License ======= Apache Hamilton is released under the `Apache 2.0 License <https://github.com/apache/hamilton/blob/main/LICENSE>`_. - - - -Usage analytics & data privacy ------------------------------------ -By default, when using Apache Hamilton, it collects anonymous usage data to help improve Apache Hamilton and know where to apply development -efforts. - -We capture three types of events: one when the `Driver` object is instantiated, one when the `execute()` call on the \ -`Driver` object completes, and one for most `Driver` object function invocations. -No user data or potentially sensitive information is or ever will be collected. The captured data is limited to: - -* Operating System and Python version -* A persistent UUID to indentify the session, stored in ~/.hamilton.conf. -* Error stack trace limited to Apache Hamilton code, if one occurs. -* Information on what features you're using from Apache Hamilton: decorators, adapters, result builders. -* How Apache Hamilton is being used: number of final nodes in DAG, number of modules, size of objects passed to `execute()`, \ - the name of the Driver function being invoked. - - -Else see :doc:`/reference/disabling-telemetry` for how to disable telemetry. - -Otherwise we invite you to inspect telemetry.py for details. diff --git a/docs/index.md b/docs/index.md index 659e4ea4..2c2c25de 100644 --- a/docs/index.md +++ b/docs/index.md @@ -76,7 +76,6 @@ reference/lifecycle-hooks/index reference/result-builders/index reference/io/index reference/dataflows/index -reference/disabling-telemetry.md ``` ```{toctree} diff --git a/docs/reference/disabling-telemetry.md b/docs/reference/disabling-telemetry.md deleted file mode 100644 index 1ab75a9c..00000000 --- a/docs/reference/disabling-telemetry.md +++ /dev/null @@ -1,40 +0,0 @@ -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Telemetry - -If you do not wish to participate in telemetry capture, one can opt-out with one of the following methods: -1. Set it to false programmatically in your code before creating a Hamilton Driver: - ```python - from hamilton import telemetry - telemetry.disable_telemetry() - ``` -2. Set the key `telemetry_enabled` to `false` in ~/.hamilton.conf under the `DEFAULT` section: - ``` - [DEFAULT] - telemetry_enabled = False - ``` -3. Set HAMILTON_TELEMETRY_ENABLED=false as an environment variable. Either setting it for your shell session: - ```bash - export HAMILTON_TELEMETRY_ENABLED=false - ``` - or passing it as part of the run command: - ```bash - HAMILTON_TELEMETRY_ENABLED=false python NAME_OF_MY_DRIVER.py - ``` diff --git a/hamilton/async_driver.py b/hamilton/async_driver.py index a6c45b61..46f995e7 100644 --- a/hamilton/async_driver.py +++ b/hamilton/async_driver.py @@ -18,15 +18,12 @@ import asyncio import inspect import logging -import sys -import time import typing import uuid -from types import ModuleType from typing import Any import hamilton.lifecycle.base as lifecycle_base -from hamilton import base, driver, graph, lifecycle, node, telemetry +from hamilton import base, driver, graph, lifecycle, node from hamilton.execution.graph_functions import create_error_message from hamilton.io.materialization import ExtractorFactory, MaterializerFactory @@ -375,9 +372,6 @@ class AsyncDriver(driver.Driver): "display_graph=True is not supported for the async graph adapter. " "Instead you should be using visualize_execution." ) - start_time = time.time() - run_successful = True - error = None _final_vars = self._create_final_vars(final_vars) try: outputs = await self.raw_execute(_final_vars, overrides, display_graph, inputs=inputs) @@ -386,67 +380,8 @@ class AsyncDriver(driver.Driver): return self.adapter.call_lifecycle_method_sync("do_build_result", outputs=outputs) return outputs except Exception as e: - run_successful = False logger.error(driver.SLACK_ERROR_MESSAGE) - error = telemetry.sanitize_error(*sys.exc_info()) raise e - finally: - duration = time.time() - start_time - # ensure we can capture telemetry in async friendly way. - if telemetry.is_telemetry_enabled(): - - async def make_coroutine(): - self.capture_execute_telemetry( - error, final_vars, inputs, overrides, run_successful, duration - ) - - try: - # we don't have to await because we are running within the event loop. - asyncio.create_task(make_coroutine()) - except Exception as e: - if logger.isEnabledFor(logging.DEBUG): - logger.error(f"Encountered error submitting async telemetry:\n{e}") - - def capture_constructor_telemetry( - self, - error: str | None, - modules: tuple[ModuleType], - config: dict[str, Any], - adapter: base.HamiltonGraphAdapter, - ): - """Ensures we capture constructor telemetry the right way in an async context. - - This is a simpler wrapper around what's in the driver class. - - :param error: sanitized error string, if any. - :param modules: tuple of modules to build DAG from. - :param config: config to create the driver. - :param adapter: adapter class object. - """ - if telemetry.is_telemetry_enabled(): - try: - # check whether the event loop has been started yet or not - loop = asyncio.get_event_loop() - if loop.is_running(): - loop.run_in_executor( - None, - super(AsyncDriver, self).capture_constructor_telemetry, - error, - modules, - config, - adapter, - ) - else: - - async def make_coroutine(): - super(AsyncDriver, self).capture_constructor_telemetry( - error, modules, config, adapter - ) - - loop.run_until_complete(make_coroutine()) - except Exception as e: - if logger.isEnabledFor(logging.DEBUG): - logger.error(f"Encountered error submitting async telemetry:\n{e}") class Builder(driver.Builder): diff --git a/hamilton/cli/__main__.py b/hamilton/cli/__main__.py index 13036954..891cce44 100644 --- a/hamilton/cli/__main__.py +++ b/hamilton/cli/__main__.py @@ -32,7 +32,6 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) from hamilton import driver -from hamilton import telemetry from hamilton.cli import commands logger = logging.getLogger(__name__) @@ -120,8 +119,6 @@ def main( ] = False, ): """Hamilton CLI""" - if telemetry.is_telemetry_enabled(): - telemetry.create_and_send_cli_event(ctx.invoked_subcommand) state.verbose = verbose state.json_out = json_out logger.debug(f"verbose set to {verbose}") diff --git a/hamilton/contrib/__init__.py b/hamilton/contrib/__init__.py index e6d37700..f6bc625f 100644 --- a/hamilton/contrib/__init__.py +++ b/hamilton/contrib/__init__.py @@ -25,20 +25,12 @@ from contextlib import contextmanager __version__ = "__unknown__" # this will be overwritten once sf-hamilton-contrib is installed. -from hamilton import telemetry - @contextmanager def catch_import_errors(module_name: str, file_location: str, logger: logging.Logger): try: # Yield control to the inner block which will have the import statements. yield - # After all imports succeed send telemetry - if "." in module_name: - telemetry.create_and_send_contrib_use(module_name, __version__) - else: - # we are importing it dynamically thus a "package" isn't present so file_location has the info. - telemetry.create_and_send_contrib_use(file_location, __version__) except ImportError as e: location = file_location[: file_location.rfind("/")] logger.error("ImportError: %s", e) diff --git a/hamilton/dataflows/__init__.py b/hamilton/dataflows/__init__.py index 1fece4ab..3c8c0bfc 100644 --- a/hamilton/dataflows/__init__.py +++ b/hamilton/dataflows/__init__.py @@ -22,7 +22,6 @@ dataflows. TODO: expect this to have a CLI interface in the future. """ -import functools import importlib import json import logging @@ -36,7 +35,7 @@ from collections.abc import Callable from types import ModuleType from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Type, Union -from hamilton import driver, telemetry +from hamilton import driver if TYPE_CHECKING: import builtins @@ -65,37 +64,17 @@ OFFICIAL_PATH = DATAFLOW_FOLDER + "/" + COMMON_PATH + "/dagworks/{dataflow}" def _track_function_call(call_fn: Callable) -> Callable: - """Decorator to wrap the __call__ to count usage. + """No-op decorator kept for backwards compatibility. - :param call_fn: the `__call__` function. - :return: the wrapped call function. + :param call_fn: the function. + :return: the same function, unwrapped. """ - - @functools.wraps(call_fn) - def track_call(*args, **kwargs): - event_json = telemetry.create_dataflow_function_invocation_event_json(call_fn.__name__) - telemetry.send_event_json(event_json) - return call_fn(*args, **kwargs) - - return track_call + return call_fn def _track_download(is_official: bool, user: str | None, dataflow_name: str, version: str): - """Inner function to track "downloads" of a dataflow. - - :param is_official: is this an official dataflow? False == user. - :param user: If not official, what is the github user name. - :param dataflow_name: the name of the dataflow - :param version: the version. Either git hash, or the package version. - """ - if is_official: - category = "DAGWORKS" - else: - category = "USER" - event_json = telemetry.create_dataflow_download_event_json( - category, user, dataflow_name, version - ) - telemetry.send_event_json(event_json) + """No-op. Telemetry has been removed.""" + pass def _get_request(url: str) -> tuple[int, str]: diff --git a/hamilton/driver.py b/hamilton/driver.py index 6186aa71..cc043993 100644 --- a/hamilton/driver.py +++ b/hamilton/driver.py @@ -16,7 +16,6 @@ # under the License. import abc -import functools import importlib import importlib.util import json @@ -24,7 +23,6 @@ import logging import operator import pathlib import sys -import time # required if we want to run this code stand alone. import typing @@ -61,41 +59,19 @@ if __name__ == "__main__": import base import graph import node - import telemetry else: - from . import base, graph, node, telemetry + from . import base, graph, node logger = logging.getLogger(__name__) def capture_function_usage(call_fn: Callable) -> Callable: - """Decorator to wrap some driver functions for telemetry capture. + """No-op decorator kept for backwards compatibility. - We want to use this for non-constructor and non-execute functions. - We don't capture information about the arguments at this stage, - just the function name. - - :param call_fn: the Driver function to capture. - :return: wrapped function. + :param call_fn: the Driver function. + :return: the same function, unwrapped. """ - - @functools.wraps(call_fn) - def wrapped_fn(*args, **kwargs): - try: - return call_fn(*args, **kwargs) - finally: - if telemetry.is_telemetry_enabled(): - try: - function_name = call_fn.__name__ - event_json = telemetry.create_driver_function_invocation_event(function_name) - telemetry.send_event_json(event_json) - except Exception as e: - if logger.isEnabledFor(logging.DEBUG): - logger.error( - f"Failed to send telemetry for function usage. Encountered: {e}\n" - ) - - return wrapped_fn + return call_fn # This is kept in here for backwards compatibility @@ -450,7 +426,6 @@ class Driver: adapter = self.normalize_adapter_input(adapter, use_legacy_adapter=_use_legacy_adapter) if adapter.does_hook("pre_do_anything", is_async=False): adapter.call_all_lifecycle_hooks_sync("pre_do_anything") - error = None self.graph_modules = modules try: self.graph = graph.FunctionGraph.from_modules( @@ -477,12 +452,8 @@ class Driver: self.graph_executor = _graph_executor self.config = config except Exception as e: - error = telemetry.sanitize_error(*sys.exc_info()) logger.error(SLACK_ERROR_MESSAGE) raise e - finally: - # TODO -- update this to use the lifecycle methods - self.capture_constructor_telemetry(error, modules, config, adapter) def _repr_mimebundle_(self, include=None, exclude=None, **kwargs): """Attribute read by notebook renderers @@ -494,47 +465,6 @@ class Driver: dot = self.display_all_functions() return dot._repr_mimebundle_(include=include, exclude=exclude, **kwargs) - def capture_constructor_telemetry( - self, - error: str | None, - modules: tuple[ModuleType], - config: dict[str, Any], - adapter: lifecycle_base.LifecycleAdapterSet, - ): - """Captures constructor telemetry. Notes: - (1) we want to do this in a way that does not break. - (2) we need to account for all possible states, e.g. someone passing in None, or assuming that - the entire constructor code ran without issue, e.g. `adapter` was assigned to `self`. - - :param error: the sanitized error string to send. - :param modules: the list of modules, could be None. - :param config: the config dict passed, could be None. - :param adapter: the adapter passed in, might not be attached to `self` yet. - """ - if telemetry.is_telemetry_enabled(): - try: - # adapter_name = telemetry.get_adapter_name(adapter) - lifecycle_adapter_names = telemetry.get_all_adapters_names(adapter) - result_builder = telemetry.get_result_builder_name(adapter) - # being defensive here with ensuring values exist - payload = telemetry.create_start_event_json( - len(self.graph.nodes) if hasattr(self, "graph") else 0, - len(modules) if modules else 0, - len(config) if config else 0, - dict(self.graph.decorator_counter) if hasattr(self, "graph") else {}, - "deprecated -- see lifecycle_adapters_used", - lifecycle_adapter_names, - result_builder, - self.driver_run_id, - error, - self.graph_executor.__class__.__name__, - ) - telemetry.send_event_json(payload) - except Exception as e: - # we don't want this to fail at all! - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Error caught in processing telemetry: {e}") - @staticmethod def validate_inputs( fn_graph: graph.FunctionGraph, @@ -617,11 +547,9 @@ class Driver: "display_graph=True is deprecated. It will be removed in the 2.0.0 release. " "Please use visualize_execution()." ) - start_time = time.time() run_id = str(uuid.uuid4()) run_successful = True error_execution = None - error_telemetry = None outputs = None _final_vars = self._create_final_vars(final_vars) if self.adapter.does_hook("pre_graph_execute", is_async=False): @@ -647,7 +575,6 @@ class Driver: run_successful = False logger.error(SLACK_ERROR_MESSAGE) error_execution = e - error_telemetry = telemetry.sanitize_error(*sys.exc_info()) raise e finally: if self.adapter.does_hook("post_graph_execute", is_async=False): @@ -659,10 +586,6 @@ class Driver: error=error_execution, results=outputs, ) - duration = time.time() - start_time - self.capture_execute_telemetry( - error_telemetry, _final_vars, inputs, overrides, run_successful, duration - ) return outputs def _create_final_vars(self, final_vars: list[str | Callable | Variable]) -> list[str]: @@ -675,45 +598,6 @@ class Driver: _final_vars = common.convert_output_values(final_vars, _module_set) return _final_vars - def capture_execute_telemetry( - self, - error: str | None, - final_vars: list[str], - inputs: dict[str, Any], - overrides: dict[str, Any], - run_successful: bool, - duration: float, - ): - """Captures telemetry after execute has run. - - Notes: - (1) we want to be quite defensive in not breaking anyone's code with things we do here. - (2) thus we want to double-check that values exist before doing something with them. - - :param error: the sanitized error string to capture, if any. - :param final_vars: the list of final variables to get. - :param inputs: the inputs to the execute function. - :param overrides: any overrides to the execute function. - :param run_successful: whether this run was successful. - :param duration: time it took to run execute. - """ - if telemetry.is_telemetry_enabled(): - try: - payload = telemetry.create_end_event_json( - run_successful, - duration, - len(final_vars) if final_vars else 0, - len(overrides) if isinstance(overrides, dict) else 0, - len(inputs) if isinstance(overrides, dict) else 0, - self.driver_run_id, - error, - ) - telemetry.send_event_json(payload) - except Exception as e: - # we don't want this to fail at all! - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Error caught in processing telemetry: \n{e}") - @deprecation.deprecated( warn_starting=(1, 0, 0), fail_starting=(2, 0, 0), @@ -1646,10 +1530,8 @@ class Driver: """ if additional_vars is None: additional_vars = [] - start_time = time.time() run_successful = True error_execution = None - error_telemetry = None run_id = str(uuid.uuid4()) outputs = (None, None) final_vars = self._create_final_vars(additional_vars) @@ -1707,7 +1589,6 @@ class Driver: except Exception as e: run_successful = False logger.error(SLACK_ERROR_MESSAGE) - error_telemetry = telemetry.sanitize_error(*sys.exc_info()) error_execution = e raise e finally: @@ -1720,15 +1601,6 @@ class Driver: error=error_execution, results=outputs[1], ) - duration = time.time() - start_time - self.capture_execute_telemetry( - error_telemetry, - final_vars + materializer_vars, - inputs, - overrides, - run_successful, - duration, - ) return outputs @capture_function_usage diff --git a/hamilton/plugins/h_experiments/__main__.py b/hamilton/plugins/h_experiments/__main__.py index fa848cff..bb54be4d 100644 --- a/hamilton/plugins/h_experiments/__main__.py +++ b/hamilton/plugins/h_experiments/__main__.py @@ -19,7 +19,6 @@ import argparse import os from pathlib import Path -from hamilton import telemetry from hamilton.plugins.h_experiments.cache import JsonCache @@ -32,8 +31,6 @@ def main(): raise ModuleNotFoundError( "Some dependencies are missing. Make sure to `pip install sf-hamilton[experiments]`" ) from e - if telemetry.is_telemetry_enabled(): - telemetry.create_and_send_expt_server_event("startup") parser = argparse.ArgumentParser(prog="hamilton-experiments") parser.description = "Hamilton Experiment Server launcher" @@ -59,8 +56,6 @@ def main(): os.environ["HAMILTON_EXPERIMENTS_PATH"] = str(Path(args.path).resolve()) uvicorn.run("hamilton.plugins.h_experiments.server:app", host=args.host, port=args.port) - if telemetry.is_telemetry_enabled(): - telemetry.create_and_send_expt_server_event("shutdown") if __name__ == "__main__": diff --git a/hamilton/registry.py b/hamilton/registry.py index 8e5b383c..cda2fa80 100644 --- a/hamilton/registry.py +++ b/hamilton/registry.py @@ -53,8 +53,6 @@ ExtensionName = Literal[ ] HAMILTON_EXTENSIONS: tuple[ExtensionName, ...] = get_args(ExtensionName) HAMILTON_AUTOLOAD_ENV = "HAMILTON_AUTOLOAD_EXTENSIONS" -# NOTE the variable DEFAULT_CONFIG_LOCAITON is redundant with `hamilton.telemetry` -# but this `registry` module must avoid circular imports DEFAULT_CONFIG_LOCATION = pathlib.Path("~/.hamilton.conf").expanduser() # This is a dictionary of extension name -> dict with dataframe and column types. diff --git a/hamilton/telemetry.py b/hamilton/telemetry.py index f6623d5e..4e2c333a 100644 --- a/hamilton/telemetry.py +++ b/hamilton/telemetry.py @@ -16,538 +16,19 @@ # under the License. """ -This module contains code that relates to sending Hamilton usage telemetry. +Telemetry has been removed from Hamilton. -To disable sending telemetry there are three ways: - -1. Set it to false programmatically in your driver: - >>> from hamilton import telemetry - >>> telemetry.disable_telemetry() -2. Set it to `false` in ~/.hamilton.conf under `DEFAULT` - [DEFAULT] - telemetry_enabled = True -3. Set HAMILTON_TELEMETRY_ENABLED=false as an environment variable: - HAMILTON_TELEMETRY_ENABLED=false python run.py - or: - export HAMILTON_TELEMETRY_ENABLED=false +This module is kept as a no-op stub for backwards compatibility, +so that any user code calling ``telemetry.disable_telemetry()`` +will not break. """ -import configparser -import json -import logging -import os -import platform -import threading -import traceback -import uuid -from urllib import request - -try: - from . import base - from .lifecycle import base as lifecycle_base - from .version import VERSION -except ImportError: - from version import VERSION - - from hamilton import base - from hamilton.lifecycle import base as lifecycle_base - -logger = logging.getLogger(__name__) - -STR_VERSION = ".".join([str(i) for i in VERSION]) -HOST = "https://app.posthog.com" -TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints -API_KEY = "phc_mZg8bkn3yvMxqvZKRlMlxjekFU5DFDdcdAsijJ2EH5e" -START_EVENT = "os_hamilton_run_start" -END_EVENT = "os_hamilton_run_end" -DRIVER_FUNCTION = "os_hamilton_driver_function_call" -DATAFLOW_FUNCTION = "os_hamilton_dataflow_function_call" -DATAFLOW_DOWNLOAD = "os_hamilton_dataflow_download_call" -DATAFLOW_IMPORT = "os_hamilton_dataflow_import_call" -CLI_COMMAND = "os_hamilton_cli_command" -EXPERIMENT_SERVER = "os_hamilton_experiment_server" -TIMEOUT = 2 -MAX_COUNT_SESSION = 10 # max number of events collected per python process -DEFAULT_CONFIG_URI = os.environ.get("HAMILTON_CONFIG_URI", "~/.hamilton.conf") -DEFAULT_CONFIG_LOCATION = os.path.expanduser(DEFAULT_CONFIG_URI) - - -def _load_config(config_location: str) -> configparser.ConfigParser: - """Pulls config. Gets/sets default anonymous ID. - - Creates the anonymous ID if it does not exist, writes it back if so. - :param config_location: location of the config file. - """ - config = configparser.ConfigParser() - try: - with open(config_location) as f: - config.read_file(f) - except Exception: - config["DEFAULT"] = {} - else: - if "DEFAULT" not in config: - config["DEFAULT"] = {} - - if "anonymous_id" not in config["DEFAULT"]: - config["DEFAULT"]["anonymous_id"] = str(uuid.uuid4()) - try: - with open(config_location, "w") as f: - config.write(f) - except Exception: - pass - return config - - -def _check_config_and_environ_for_telemetry_flag( - telemetry_default: bool, config_obj: configparser.ConfigParser -): - """Checks the config and environment variables for the telemetry value. - - Note: the environment variable has greater precedence than the config value. - """ - telemetry_enabled = telemetry_default - if "telemetry_enabled" in config_obj["DEFAULT"]: - try: - telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") - except ValueError as e: - logger.debug( - f"Unable to parse value for `telemetry_enabled` from config. Encountered {e}" - ) - if os.environ.get("HAMILTON_TELEMETRY_ENABLED") is not None: - env_value = os.environ.get("HAMILTON_TELEMETRY_ENABLED") - # set the value - config_obj["DEFAULT"]["telemetry_enabled"] = env_value - try: - telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") - except ValueError as e: - logger.debug( - "Unable to parse value for `HAMILTON_TELEMETRY_ENABLED` from environment. " - f"Encountered {e}" - ) - return telemetry_enabled - - -config = _load_config(DEFAULT_CONFIG_LOCATION) -g_telemetry_enabled = _check_config_and_environ_for_telemetry_flag(True, config) -g_anonymous_id = config["DEFAULT"]["anonymous_id"] -call_counter = 0 - def disable_telemetry(): - """Disables telemetry tracking.""" - global g_telemetry_enabled - g_telemetry_enabled = False + """No-op. Telemetry has been removed.""" + pass def is_telemetry_enabled() -> bool: - """Returns whether telemetry tracking is enabled or not. - - Increments a counter to stop sending telemetry after 1000 invocations. - """ - if g_telemetry_enabled: - global call_counter - if call_counter == 0: - # Log only the first time someone calls this function; don't want to spam them. - logger.info( - "Note: Hamilton collects completely anonymous data about usage. " - "This will help us improve Hamilton over time. " - "See https://hamilton.apache.org/get-started/license/#usage-analytics-data-privacy" - " for details." - ) - call_counter += 1 - if call_counter > MAX_COUNT_SESSION: - # we have hit our limit -- disable telemetry. - return False - return True - else: - return False - - -# base properties to instantiate on module load. -BASE_PROPERTIES = { - "os_type": os.name, - "os_version": platform.platform(), - "python_version": f"{platform.python_version()}/{platform.python_implementation()}", - "distinct_id": g_anonymous_id, - "hamilton_version": list(VERSION), - "telemetry_version": "0.0.1", - "$process_person_profile": False, -} - - -def create_start_event_json( - number_of_nodes: int, - number_of_modules: int, - number_of_config_items: int, - decorators_used: dict[str, int], - graph_adapter_used: str, - lifecycle_adapters_used: list[str], - result_builder_used: str, - driver_run_id: uuid.UUID, - error: str | None, - graph_executor_class: str, -): - """Creates the start event JSON. - - The format we want to follow is the one for [post-hog](# https://posthog.com/docs/api/post-only-endpoints). - - :param number_of_nodes: the number of nodes in the graph - :param number_of_modules: the number of modules parsed - :param number_of_config_items: the number of items in configuration - :param decorators_used: a dict of decorator -> count - :param graph_adapter_used: the name of the graph adapter used - :param result_builder_used: the name of the result builder used - :param driver_run_id: the ID of the run - :param error: an error string if any - :param driver_class: the name of the driver class used to call this - :return: dictionary to send. - """ - event = { - "api_key": API_KEY, - "event": START_EVENT, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - payload = { - "number_of_nodes": number_of_nodes, # approximately how many nodes were in the DAG? - "number_of_modules": number_of_modules, # approximately how many modules were used? - "number_of_config_items": number_of_config_items, # how many configs are people passing in? - "decorators_used": decorators_used, # what decorators were used, and how many times? - "graph_adapter_used": graph_adapter_used, # what was the graph adapter used? - "result_builder_used": result_builder_used, # what was the result builder used? - "driver_run_id": str(driver_run_id), # was this a new driver object? or? - "error": error, # if there was an error, what was the trace? (limited to Hamilton code) - "graph_executor_class": graph_executor_class, # what driver class was used to call this - "lifecycle_adapters_used": lifecycle_adapters_used, # what lifecycle adapters were used? - } - event["properties"].update(payload) - return event - - -def create_end_event_json( - is_success: bool, - runtime_seconds: float, - number_of_outputs: int, - number_of_overrides: int, - number_of_inputs: int, - driver_run_id: uuid.UUID, - error: str | None, -): - """Creates the end event JSON. - - The format we want to follow is the one for [post-hog](# https://posthog.com/docs/api/post-only-endpoints). - - :param is_success: whether execute was successful - :param runtime_seconds: how long execution took - :param number_of_outputs: the number of outputs requested - :param number_of_overrides: the number of overrides provided - :param number_of_inputs: the number of inputs provided - :param driver_run_id: the run ID of this driver run - :param error: the error string if any - :return: dictionary to send. - """ - event = { - "api_key": API_KEY, - "event": END_EVENT, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - payload = { - "is_success": is_success, # was this run successful? - "runtime_seconds": runtime_seconds, # how long did it take - "number_of_outputs": number_of_outputs, # how many outputs were requested - "number_of_overrides": number_of_overrides, # how many outputs were requested - "number_of_inputs": number_of_inputs, # how many user provided things are there - "driver_run_id": str(driver_run_id), # let's tie this to a particular driver instantiation - "error": error, # if there was an error, what was the trace? (limited to Hamilton code) - } - event["properties"].update(payload) - return event - - -def create_driver_function_invocation_event(function_name: str) -> dict: - """Function to create payload for tracking function name invocation. - - :param function_name: the name of the driver function - :return: dict representing the JSON to send. - """ - event = { - "api_key": API_KEY, - "event": DRIVER_FUNCTION, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - payload = { - "function_name": function_name, # what was the name of the driver function? - } - event["properties"].update(payload) - return event - - -def create_dataflow_function_invocation_event_json( - canonical_function_name: str, -) -> dict: - """Function that creates JSON to track dataflow module function calls. - - :param canonical_function_name: the name of the function in the dataflow module. - :return: the dictionary representing the event. - """ - event = { - "api_key": API_KEY, - "event": DATAFLOW_FUNCTION, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - payload = { - "function_name": canonical_function_name, # what was the name of the driver function? - } - event["properties"].update(payload) - return event - - -def create_dataflow_download_event_json( - category: str, - user: str, - dataflow_name: str, - version: str, -) -> dict: - """Function that creates JSON to track dataflow download calls. - - :param category: the category of the dataflow. DAGWORKS or USER. - :param user: the user's github handle, if applicable. - :param dataflow_name: the name of the dataflow. - :param version: the git commit version of the dataflow, OR the sf-hamilton-contrib package version, or __unknown__. - :return: dictionary representing the event. - """ - event = { - "api_key": API_KEY, - "event": DATAFLOW_DOWNLOAD, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - _category = "DAGWORKS" if category == "DAGWORKS" else "USER" - - payload = { - "category": _category, - "dataflow_name": dataflow_name, - "commit_version": version, - } - if _category == "USER": - payload["github_user"] = user - event["properties"].update(payload) - return event - - -def create_and_send_contrib_use(module_name: str, version: str): - """Function to send contrib module use -- this is used from the contrib package. - - :param module_name: the name of the module, or file location of the code. - :param version: the package version. - """ - if module_name == "__main__" or module_name == "__init__": - return - try: - # we need to handle the case that sf-hamilton-contrib is not installed. - # if that's the case the file location will be the module name. - if ".py" in module_name: - contrib_index = module_name.rfind("/contrib/") - if contrib_index == -1: - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - "Encountered error while constructing create_and_send_contrib_use." - ) - return - parts = module_name[contrib_index:].split(os.sep)[1:-1] - dataflows_index = module_name.find("/dataflows/") - # get the commit sha out as the version - version = module_name[ - dataflows_index + len("/dataflows/") : module_name.find("/contrib/") - ] - else: - parts = module_name.split(".") - version = "sf-contrib-" + ".".join(map(str, version)) - if "dagworks" in parts: - category = "DAGWORKS" - user = None - else: - category = "USER" - user = parts[-2] - dataflow = parts[-1] - event_json = create_dataflow_download_event_json(category, user, dataflow, version) - event_json["event"] = DATAFLOW_IMPORT # overwrite the event name. - except Exception as e: - # capture any exception! - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - f"Encountered error while constructing create_and_send_contrib_use json:\n{e}" - ) - else: - send_event_json(event_json) - - -def _send_event_json(event_json: dict): - """Internal function to send the event JSON to posthog. - - :param event_json: the dictionary of data to JSON serialize and send - """ - headers = { - "Content-Type": "application/json", - "Authorization": "TODO", - "User-Agent": f"hamilton/{STR_VERSION}", - } - try: - data = json.dumps(event_json).encode() - req = request.Request(TRACK_URL, data=data, headers=headers) - with request.urlopen(req, timeout=TIMEOUT) as f: - res = f.read() - if f.code != 200: - raise RuntimeError(res) - except Exception as e: - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Failed to send telemetry data: {e}") - else: - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Succeed in sending telemetry consisting of [{data}].") - - -def send_event_json(event_json: dict): - """Sends the event json in its own thread. - - :param event_json: the data to send - """ - if not g_telemetry_enabled: - raise RuntimeError("Won't send; tracking is disabled!") - try: - th = threading.Thread(target=_send_event_json, args=(event_json,)) - th.start() - except Exception as e: - # capture any exception! - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Encountered error while sending event JSON via it's own thread:\n{e}") - - -def sanitize_error(exc_type, exc_value, exc_traceback) -> str: - """Sanitizes an incoming error and pulls out a string to tell us where it came from. - - :param exc_type: pulled from `sys.exc_info()` - :param exc_value: pulled from `sys.exc_info()` - :param exc_traceback: pulled from `sys.exc_info()` - :return: string to use for telemetry - """ - try: - te = traceback.TracebackException(exc_type, exc_value, exc_traceback, limit=-5) - sanitized_string = "" - for stack_item in te.stack: - stack_file_path = stack_item.filename.split(os.sep) - # take last 4 places only -- that's how deep hamilton is. - stack_file_path = stack_file_path[-4:] - try: - # find first occurrence - index = stack_file_path.index("hamilton") - except ValueError: - sanitized_string += "...<USER_CODE>...\n" - continue - file_name = "..." + "/".join(stack_file_path[index:]) - sanitized_string += f"{file_name}, line {stack_item.lineno}, in {stack_item.name}\n" - return sanitized_string - except Exception as e: - # we don't want this to fail - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Encountered exception sanitizing error. Got:\n{e}") - return "FAILED_TO_SANITIZE_ERROR" - - -def get_all_adapters_names(adapter: lifecycle_base.LifecycleAdapterSet) -> list[str]: - """Gives a list of all adapter names in the LifecycleAdapterSet. - Simply a loop over the adapters it contains. - - :param adapter: LifecycleAdapterSet object. - :return: list of adapter names. - """ - adapters = adapter.adapters - out = [] - for adapter in adapters: - out.append(get_adapter_name(adapter)) - return out - - -def get_adapter_name(adapter: lifecycle_base.LifecycleAdapter) -> str: - """Get the class name of the `hamilton` adapter used. - - If we detect it's not a Hamilton one, we do not track it. - - :param adapter: base.HamiltonGraphAdapter object. - :return: string module + class name of the adapter. - """ - # Check whether it's a hamilton based adapter - if adapter.__module__.startswith("hamilton."): - adapter_name = f"{adapter.__module__}.{adapter.__class__.__name__}" - else: - adapter_name = "custom_adapter" - return adapter_name - - -def get_result_builder_name(adapter: lifecycle_base.LifecycleAdapterSet) -> str: - """Get the class name of the `hamilton` result builder used. - - If we detect it's not a base one, we do not track it. - - :param adapter: base.HamiltonGraphAdapter object. - :return: string module + class name of the result builder. - """ - # if there is an attribute, get that out to use as the class to inspect - result_builders = [item for item in adapter.adapters if hasattr(item, "build_result")] - if len(result_builders) == 0: - result_builder_name = "no_result_builder" - return result_builder_name - class_to_inspect = result_builders[0] - # all_adapters = adapter.adapters - if hasattr(class_to_inspect, "result_builder"): - class_to_inspect = class_to_inspect.result_builder - # Go by class itself - if isinstance(class_to_inspect, base.StrictIndexTypePandasDataFrameResult): - result_builder_name = "hamilton.base.StrictIndexTypePandasDataFrameResult" - elif isinstance(class_to_inspect, base.PandasDataFrameResult): - result_builder_name = "hamilton.base.PandasDataFrameResult" - elif isinstance(class_to_inspect, base.DictResult): - result_builder_name = "hamilton.base.DictResult" - elif isinstance(class_to_inspect, base.NumpyMatrixResult): - result_builder_name = "hamilton.base.NumpyMatrixResult" - else: - result_builder_name = "custom_builder" - return result_builder_name - - -def create_and_send_cli_event(command: str): - """Function that creates JSON and sends to track CLI usage. - - :param command: the CLI command run. - """ - event = { - "api_key": API_KEY, - "event": CLI_COMMAND, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - - payload = { - "command": command, - } - event["properties"].update(payload) - send_event_json(event) - - -def create_and_send_expt_server_event(step: str): - """Function that creates JSON and sends to track experiment server usage.""" - event = { - "api_key": API_KEY, - "event": EXPERIMENT_SERVER, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - if step in ["startup", "shutdown"]: - payload = { - "step": step, - } - event["properties"].update(payload) - send_event_json(event) + """Always returns False. Telemetry has been removed.""" + return False diff --git a/plugin_tests/h_dask/conftest.py b/plugin_tests/h_dask/conftest.py index 616fb894..182965bd 100644 --- a/plugin_tests/h_dask/conftest.py +++ b/plugin_tests/h_dask/conftest.py @@ -15,11 +15,6 @@ # specific language governing permissions and limitations # under the License. -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() - # dask_expr got made default, except for python 3.9 and below import sys diff --git a/plugin_tests/h_narwhals/conftest.py b/plugin_tests/h_narwhals/conftest.py deleted file mode 100644 index b131ad9a..00000000 --- a/plugin_tests/h_narwhals/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() diff --git a/plugin_tests/h_pandas/conftest.py b/plugin_tests/h_pandas/conftest.py deleted file mode 100644 index b131ad9a..00000000 --- a/plugin_tests/h_pandas/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() diff --git a/plugin_tests/h_polars/conftest.py b/plugin_tests/h_polars/conftest.py deleted file mode 100644 index b131ad9a..00000000 --- a/plugin_tests/h_polars/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() diff --git a/plugin_tests/h_ray/conftest.py b/plugin_tests/h_ray/conftest.py index dd0219c0..2a0690fd 100644 --- a/plugin_tests/h_ray/conftest.py +++ b/plugin_tests/h_ray/conftest.py @@ -17,11 +17,6 @@ import sys -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() - # Skip tests that require packages not yet available on Python 3.14 collect_ignore = [] if sys.version_info >= (3, 14): diff --git a/plugin_tests/h_spark/conftest.py b/plugin_tests/h_spark/conftest.py deleted file mode 100644 index b131ad9a..00000000 --- a/plugin_tests/h_spark/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() diff --git a/plugin_tests/h_vaex/conftest.py b/plugin_tests/h_vaex/conftest.py deleted file mode 100644 index b131ad9a..00000000 --- a/plugin_tests/h_vaex/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() diff --git a/tests/conftest.py b/tests/conftest.py index 0adbe615..7ed250b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,11 +17,6 @@ import sys -from hamilton import telemetry - -# disable telemetry for all tests! -telemetry.disable_telemetry() - # Skip tests that require packages not yet available on Python 3.14 collect_ignore = [] if sys.version_info >= (3, 14): diff --git a/tests/test_async_driver.py b/tests/test_async_driver.py index a0c32879..788ed525 100644 --- a/tests/test_async_driver.py +++ b/tests/test_async_driver.py @@ -16,7 +16,6 @@ # under the License. import asyncio -from unittest import mock import pandas as pd import pytest @@ -97,41 +96,6 @@ async def test_driver_end_to_end(): } [email protected] [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) -async def test_driver_end_to_end_telemetry(send_event_json): - dr = async_driver.AsyncDriver({}, simple_async_module, result_builder=base.DictResult()) - with mock.patch("hamilton.telemetry.g_telemetry_enabled", False): - # don't count this telemetry tracking invocation - all_vars = [var.name for var in dr.list_available_variables() if var.name != "return_df"] - result = await dr.execute(final_vars=all_vars, inputs={"external_input": 1}) - result["a"] = result["a"].to_dict() - result["b"] = result["b"].to_dict() - assert result == { - "a": pd.Series([1, 2, 3]).to_dict(), - "another_async_func": 8, - "async_func_with_param": 4, - "b": pd.Series([4, 5, 6]).to_dict(), - "external_input": 1, - "non_async_func_with_decorator": {"result_1": 9, "result_2": 5}, - "result_1": 9, - "result_2": 5, - "result_3": 1, - "result_4": 2, - "return_dict": {"result_3": 1, "result_4": 2}, - "simple_async_func": 2, - "simple_non_async_func": 7, - } - # to ensure the last telemetry invocation finishes executing - # get all tasks -- and the current task, and await all others. - tasks = asyncio.all_tasks() - current_task = asyncio.current_task() - await asyncio.gather(*[t for t in tasks if t != current_task]) - assert send_event_json.called - assert len(send_event_json.call_args_list) == 2 - - @pytest.mark.asyncio async def test_async_driver_end_to_end_async_lifecycle_methods(): tracked_calls = [] diff --git a/tests/test_hamilton_driver.py b/tests/test_hamilton_driver.py index 95b645f1..ffea258f 100644 --- a/tests/test_hamilton_driver.py +++ b/tests/test_hamilton_driver.py @@ -15,12 +15,10 @@ # specific language governing permissions and limitations # under the License. -from unittest import mock -import pandas as pd import pytest -from hamilton import base, node, telemetry +from hamilton import base, node from hamilton.caching.adapter import HamiltonCacheAdapter from hamilton.driver import ( Builder, @@ -42,8 +40,6 @@ import tests.resources.test_driver_serde_worker import tests.resources.test_for_materialization import tests.resources.very_simple_dag -telemetry.MAX_COUNT_SESSION = 100 - """This file tests driver capabilities. Anything involving execution is tested for multiple executors/driver configuration. Anything not involving execution is tested for just the single driver configuration. @@ -220,188 +216,6 @@ def test_driver_variables_exposes_original_function(): assert originating_functions["a"] == (tests.resources.very_simple_dag.b,) # a is an input [email protected]("hamilton.telemetry.send_event_json") -def test_capture_constructor_telemetry_disabled(send_event_json): - """Tests that we don't do anything if telemetry is disabled.""" - send_event_json.return_value = "" - Driver({}, tests.resources.tagging) # this will exercise things underneath. - assert send_event_json.called is False - - [email protected]("hamilton.telemetry.get_adapter_name") [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) -def test_capture_constructor_telemetry_error(send_event_json, get_adapter_name): - """Tests that we don't error if an exception occurs""" - get_adapter_name.side_effect = ValueError("TELEMETRY ERROR") - Driver({}, tests.resources.tagging) # this will exercise things underneath. - assert send_event_json.called is False - - [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) -def test_capture_constructor_telemetry_none_values(send_event_json): - """Tests that we don't error if there are none values""" - Driver({}, None, None) # this will exercise things underneath. - assert send_event_json.called is True - - [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) -def test_capture_constructor_telemetry(send_event_json): - """Tests that we send an event if we could. Validates deterministic parts.""" - Driver({}, tests.resources.very_simple_dag) - # assert send_event_json.called is True - assert len(send_event_json.call_args_list) == 1 # only called once - # check contents of what it was called with: - send_event_json_call = send_event_json.call_args_list[0] - actual_event_dict = send_event_json_call[0][0] - assert actual_event_dict["api_key"] == "phc_mZg8bkn3yvMxqvZKRlMlxjekFU5DFDdcdAsijJ2EH5e" - assert actual_event_dict["event"] == "os_hamilton_run_start" - # validate schema - expected_properties = { - "$process_person_profile", - "os_type", - "os_version", - "python_version", - "distinct_id", - "hamilton_version", - "telemetry_version", - "number_of_nodes", - "number_of_modules", - "number_of_config_items", - "decorators_used", - "graph_adapter_used", - "result_builder_used", - "driver_run_id", - "error", - "graph_executor_class", - "lifecycle_adapters_used", - } - actual_properties = actual_event_dict["properties"] - assert set(actual_properties.keys()) == expected_properties - # validate static parts - assert actual_properties["error"] is None - assert actual_properties["number_of_nodes"] == 2 # b, and input a - assert actual_properties["number_of_modules"] == 1 - assert actual_properties["number_of_config_items"] == 0 - assert actual_properties["number_of_config_items"] == 0 - assert actual_properties["graph_adapter_used"] == "deprecated -- see lifecycle_adapters_used" - assert actual_properties["result_builder_used"] == "hamilton.base.PandasDataFrameResult" - assert actual_properties["lifecycle_adapters_used"] == ["hamilton.base.PandasDataFrameResult"] - - [email protected]("hamilton.telemetry.send_event_json") [email protected]( - "driver_factory", - [ - (lambda: Driver({}, tests.resources.very_simple_dag)), - ( - lambda: ( - Builder() - .enable_dynamic_execution(allow_experimental_mode=True) - .with_modules(tests.resources.very_simple_dag) - .with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult())) - .with_remote_executor(executors.SynchronousLocalTaskExecutor()) - .build() - ) - ), - ], -) -def test_capture_execute_telemetry_disabled(send_event_json, driver_factory): - """Tests that we don't do anything if telemetry is disabled.""" - dr = driver_factory() - results = dr.execute(["b"], inputs={"a": 1}) - expected = pd.DataFrame([{"b": 1}]) - pd.testing.assert_frame_equal(results, expected) - assert send_event_json.called is False - - [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) [email protected]( - "driver_factory", - [ - (lambda: Driver({}, tests.resources.very_simple_dag)), - ( - lambda: ( - Builder() - .enable_dynamic_execution(allow_experimental_mode=True) - .with_modules(tests.resources.very_simple_dag) - .with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult())) - .with_remote_executor(executors.SynchronousLocalTaskExecutor()) - .build() - ) - ), - ], -) -def test_capture_execute_telemetry_error(send_event_json, driver_factory): - """Tests that we don't error if an exception occurs""" - send_event_json.side_effect = [None, ValueError("FAKE ERROR"), None] - dr = driver_factory() - results = dr.execute(["b"], inputs={"a": 1}) - expected = pd.DataFrame([{"b": 1}]) - pd.testing.assert_frame_equal(results, expected) - assert send_event_json.called is True - assert len(send_event_json.call_args_list) == 2 - - [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) [email protected]( - "driver_factory", - [ - (lambda: Driver({}, tests.resources.very_simple_dag)), - ( - lambda: ( - Builder() - .enable_dynamic_execution(allow_experimental_mode=True) - .with_modules(tests.resources.very_simple_dag) - .with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult())) - .with_remote_executor(executors.SynchronousLocalTaskExecutor()) - .build() - ) - ), - ], -) -def test_capture_execute_telemetry(send_event_json, driver_factory): - """Happy path with values passed.""" - dr = driver_factory() - results = dr.execute(["b"], inputs={"a": 1}, overrides={"b": 2}) - expected = pd.DataFrame([{"b": 2}]) - pd.testing.assert_frame_equal(results, expected) - assert send_event_json.called is True - assert len(send_event_json.call_args_list) == 2 - - [email protected]("hamilton.telemetry.send_event_json") [email protected]("hamilton.telemetry.g_telemetry_enabled", True) [email protected]( - "driver_factory", - [ - (lambda: Driver({"a": 1}, tests.resources.very_simple_dag)), - ( - lambda: ( - Builder() - .enable_dynamic_execution(allow_experimental_mode=True) - .with_modules(tests.resources.very_simple_dag) - .with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult())) - .with_remote_executor(executors.SynchronousLocalTaskExecutor()) - .with_config({"a": 1}) - .build() - ) - ), - ], -) -def test_capture_execute_telemetry_none_values(send_event_json, driver_factory): - """Happy path with none values.""" - dr = driver_factory() - results = dr.execute(["b"]) - expected = pd.DataFrame([{"b": 1}]) - pd.testing.assert_frame_equal(results, expected) - assert len(send_event_json.call_args_list) == 2 - - @pytest.mark.parametrize( "driver_factory", [ diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py deleted file mode 100644 index 52ba2878..00000000 --- a/tests/test_telemetry.py +++ /dev/null @@ -1,237 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import configparser -import os -import sys -import uuid -from typing import Any -from unittest import mock - -import pytest - -from hamilton import async_driver, base, node, telemetry -from hamilton.lifecycle import base as lifecycle_base - -telemetry.MAX_COUNT_SESSION = 100 - - [email protected] -def blank_conf_file(tmp_path_factory): - """Fixture to load config file without an ID""" - file_location = tmp_path_factory.mktemp("home") / "hamilton.conf" - with open(file_location, "w") as conf_file: - conf = configparser.ConfigParser() - conf.write(conf_file) - return file_location - - [email protected] -def existing_conf_file(tmp_path_factory): - """Fixture to load config file with an ID""" - file_location = tmp_path_factory.mktemp("home") / "hamilton.conf" - with open(file_location, "w") as conf_file: - conf = configparser.ConfigParser() - conf["DEFAULT"]["anonymous_id"] = "testing123-id" - conf.write(conf_file) - return file_location - - -def test__load_config_exists_with_id(existing_conf_file): - """Tests loading a config that has an ID.""" - config = telemetry._load_config(existing_conf_file) - a_id = config["DEFAULT"]["anonymous_id"] - assert a_id == "testing123-id" - - -def test__load_config_exists_without_id(blank_conf_file): - """Tests load from existing file without an ID.""" - config = telemetry._load_config(blank_conf_file) - a_id = config["DEFAULT"]["anonymous_id"] - assert str(uuid.UUID(a_id, version=4)) == a_id - # check it was written back - with open(blank_conf_file, "r") as conf_file: - actual_config = configparser.ConfigParser() - actual_config.read_file(conf_file) - assert a_id == actual_config["DEFAULT"]["anonymous_id"] - - -def test__load_config_new(tmp_path_factory): - """Tests no config file existing and one being created.""" - file_location = tmp_path_factory.mktemp("home") / "hamilton123.conf" - config = telemetry._load_config(file_location) - a_id = config["DEFAULT"]["anonymous_id"] - assert str(uuid.UUID(a_id, version=4)) == a_id - # check it was written back - with open(file_location, "r") as conf_file: - actual_config = configparser.ConfigParser() - actual_config.read_file(conf_file) - assert a_id == actual_config["DEFAULT"]["anonymous_id"] - - [email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": ""}) -def test__check_config_and_environ_for_telemetry_flag_not_present(): - """Tests not present in both.""" - conf = configparser.ConfigParser() - actual = telemetry._check_config_and_environ_for_telemetry_flag(False, conf) - assert actual is False - - [email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": ""}) -def test__check_config_and_environ_for_telemetry_flag_in_config(): - """tests getting from config.""" - conf = configparser.ConfigParser() - conf["DEFAULT"]["telemetry_enabled"] = "tRuE" - actual = telemetry._check_config_and_environ_for_telemetry_flag(False, conf) - assert actual is True - - [email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": "TrUe"}) -def test__check_config_and_environ_for_telemetry_flag_in_env(): - """tests getting from env.""" - conf = configparser.ConfigParser() - actual = telemetry._check_config_and_environ_for_telemetry_flag(False, conf) - assert actual is True - - [email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": "TrUe"}) -def test__check_config_and_environ_for_telemetry_flag_env_overrides(): - """tests that env overrides the config.""" - conf = configparser.ConfigParser() - conf["DEFAULT"]["telemetry_enabled"] = "FALSE" - actual = telemetry._check_config_and_environ_for_telemetry_flag(False, conf) - assert actual is True - - [email protected]( - os.environ.get("CI") != "true", - reason="This test is currently flaky when run locally -- " - "it has to be run exactly as it is in CI. " - "As it is not a high-touch portion of the codebase, " - "we default it not to run locally.", -) -def test_sanitize_error_general(): - """Tests sanitizing code in the general case. - - Run this test how circleci runs it. - - It's too hard to test code that isn't in the repo, or at least it hasn't occurred to - me how to mock it easily. - """ - try: - # make a call in a hamilton module to mimic something from hamilton - # but the stack trace should block the stack call from this function. - telemetry.get_adapter_name(None) - except AttributeError: - actual = telemetry.sanitize_error(*sys.exc_info()) - # this strips the full path -- note: line changes in telemetry.py will change this... - # so replace with line XXX - import re - - actual = re.sub(r"line \d\d\d", "line XXX", actual) - expected = """...hamilton/hamilton/tests/test_telemetry.py, line XXX, in test_sanitize_error_general\n...hamilton/hamilton/hamilton/telemetry.py, line XXX, in get_adapter_name\n""" - - # if this fails -- run it how github actions run it - assert actual == expected - - -# classes for the tests below -class CustomAdapter(base.HamiltonGraphAdapter): - @staticmethod - def check_input_type(node_type: type, input_value: Any) -> bool: - pass - - @staticmethod - def check_node_type_equivalence(node_type: type, input_type: type) -> bool: - pass - - def execute_node(self, node: node.Node, kwargs: dict[str, Any]) -> Any: - pass - - def __init__(self, result_builder: base.ResultMixin): - self.result_builder = result_builder - - -class CustomResultBuilder(base.ResultMixin): - pass - - [email protected]( - ("adapter", "expected"), - [ - ( - base.SimplePythonDataFrameGraphAdapter(), - "hamilton.base.SimplePythonDataFrameGraphAdapter", - ), - ( - base.DefaultAdapter(), - "hamilton.base.DefaultAdapter", - ), - ( - async_driver.AsyncGraphAdapter(base.DictResult()), - "hamilton.async_driver.AsyncGraphAdapter", - ), - (CustomAdapter(base.DictResult()), "custom_adapter"), - ], -) -def test_get_adapter_name(adapter, expected): - """Tests get_adapter_name""" - actual = telemetry.get_adapter_name(adapter) - assert actual == expected - - [email protected]( - ("adapter", "expected"), - [ - (base.SimplePythonDataFrameGraphAdapter(), "hamilton.base.PandasDataFrameResult"), - (base.DefaultAdapter(), "hamilton.base.DictResult"), - ( - base.SimplePythonGraphAdapter(base.NumpyMatrixResult()), - "hamilton.base.NumpyMatrixResult", - ), - ( - base.SimplePythonGraphAdapter(base.StrictIndexTypePandasDataFrameResult()), - "hamilton.base.StrictIndexTypePandasDataFrameResult", - ), - (base.SimplePythonGraphAdapter(CustomResultBuilder()), "custom_builder"), - (async_driver.AsyncGraphAdapter(base.DictResult()), "hamilton.base.DictResult"), - (CustomAdapter(base.DictResult()), "hamilton.base.DictResult"), - (CustomAdapter(CustomResultBuilder()), "custom_builder"), - ], -) -def test_get_result_builder_name(adapter, expected): - """Tests getting the result builder name. This is largely backwards compatibility - but still provides nice information as to the provided tooling the user leverages.""" - actual = telemetry.get_result_builder_name(lifecycle_base.LifecycleAdapterSet(adapter)) - assert actual == expected - - -def test_is_telemetry_enabled_false(): - """Tests that we don't increment the counter when we're disabled.""" - before = telemetry.call_counter - telemetry_enabled = telemetry.is_telemetry_enabled() - assert telemetry.call_counter == before - assert telemetry_enabled is False - - [email protected]("hamilton.telemetry.g_telemetry_enabled", True) -def test_is_telemetry_disabled_true(): - """Tests that we do increment the counter when we're enabled.""" - before = telemetry.call_counter - telemetry_enabled = telemetry.is_telemetry_enabled() - assert telemetry.call_counter == before + 1 - assert telemetry_enabled is True diff --git a/ui/backend/server/trackingserver_base/apps.py b/ui/backend/server/trackingserver_base/apps.py index d6c2ccd1..3a04552b 100644 --- a/ui/backend/server/trackingserver_base/apps.py +++ b/ui/backend/server/trackingserver_base/apps.py @@ -15,32 +15,10 @@ # specific language governing permissions and limitations # under the License. -import os.path -import uuid - from django.apps import AppConfig from django.conf import settings from django.db import models -from hamilton.telemetry import API_KEY, BASE_PROPERTIES, is_telemetry_enabled, send_event_json - - -def create_server_event_json(telemetry_key: str) -> dict: - """Function to create payload for tracking server event. - - :param event_name: the name of the server event - :return: dict representing the JSON to send. - """ - old_anonymous_id = BASE_PROPERTIES["distinct_id"] - event = { - "event": "os_hamilton_ui_server_start", - "api_key": API_KEY, - "properties": {"telemetry_key": telemetry_key, "old_anonymous_id": old_anonymous_id}, - } - event["properties"].update(BASE_PROPERTIES) - event["properties"]["distinct_id"] = telemetry_key - return event - def set_max_length_for_charfield(model_class, field_name, max_length=1024): field = model_class._meta.get_field(field_name) @@ -51,18 +29,6 @@ class TrackingServerConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "trackingserver_base" - def enable_telemetry(self): - if is_telemetry_enabled() and settings.HAMILTON_ENV == "local": - telemetry_file = "/tmp/hamilton-telemetry.txt" - if not os.path.exists(telemetry_file): - telemetry_key = str(uuid.uuid4()) - with open(telemetry_file, "w") as f: - f.write(telemetry_key) - else: - with open(telemetry_file, "r") as f: - telemetry_key = f.read().strip() - send_event_json(create_server_event_json(telemetry_key)) - def sqllite_compatibility(self): if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3": from django.apps import apps @@ -73,5 +39,4 @@ class TrackingServerConfig(AppConfig): set_max_length_for_charfield(model, field.name) def ready(self): - self.enable_telemetry() self.sqllite_compatibility() diff --git a/ui/sdk/tests/conftest.py b/ui/sdk/tests/conftest.py deleted file mode 100644 index ce510679..00000000 --- a/ui/sdk/tests/conftest.py +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from hamilton import telemetry - -telemetry.disable_telemetry()
