This is an automated email from the ASF dual-hosted git repository.

bneradt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 64a8f60f11 SnowflakeID for organizationally unique identifiers (#12265)
64a8f60f11 is described below

commit 64a8f60f1199ec000d73d96f2a8db18e82080084
Author: Brian Neradt <[email protected]>
AuthorDate: Tue Jul 1 11:16:03 2025 -0500

    SnowflakeID for organizationally unique identifiers (#12265)
    
    UUID values are 128 bit values designed to be univerally unique
    identifiers across all practical uses and environments. Snowflake ID
    values are 64 bit values that are designed with a more limited goal: to
    be unique across all values in a single organization. By limiting their
    scope, they limit their size which is helpful for performance and
    storage needs.
    
    This updates the connection id to use SnowflakeID values so that they
    are unique across reboots and typically unique across ATS instances in a
    cdn.
    
    This also adds psfid, a logging field similar to puuid, but that logs
    the process's snowflake ID instead of its UUID.
    
    Depends upon: #12264
---
 doc/admin-guide/logging/formatting.en.rst         |   4 +
 include/iocore/utils/Machine.h                    |   9 +-
 include/proxy/ProxySession.h                      |   3 +-
 include/proxy/logging/LogAccess.h                 |   1 +
 include/tscore/SnowflakeID.h                      | 270 ++++++++++++++++++++++
 src/api/InkAPI.cc                                 |   4 +-
 src/iocore/utils/Machine.cc                       |  41 +++-
 src/proxy/http/HttpTransact.cc                    |   2 +-
 src/proxy/http/HttpTransactHeaders.cc             |   6 +-
 src/proxy/logging/Log.cc                          |   5 +
 src/proxy/logging/LogAccess.cc                    |  23 +-
 src/traffic_server/traffic_server.cc              |   2 +-
 src/tscore/CMakeLists.txt                         |   2 +
 src/tscore/SnowflakeID.cc                         | 147 ++++++++++++
 src/tscore/unit_tests/test_SnowflakeID.cc         | 229 ++++++++++++++++++
 tests/gold_tests/logging/new_log_flds.test.py     |   5 +-
 tests/gold_tests/logging/new_log_flds_observer.py |  25 +-
 17 files changed, 753 insertions(+), 25 deletions(-)

diff --git a/doc/admin-guide/logging/formatting.en.rst 
b/doc/admin-guide/logging/formatting.en.rst
index e5613cabfa..ee1e8bb341 100644
--- a/doc/admin-guide/logging/formatting.en.rst
+++ b/doc/admin-guide/logging/formatting.en.rst
@@ -399,6 +399,7 @@ Identifiers
 .. _crid:
 .. _cruuid:
 .. _puuid:
+.. _psfid:
 
 Logging fields used to obtain various unique identifiers for transactions or
 objects.
@@ -412,6 +413,9 @@ cruuid Client Request UUID of the current client request; 
generated by
                       concatenating the puuid_ and crid_ field values.
 puuid  Proxy Server   UUID for the currently running :program:`traffic_server`
                       process. Regenerated on every |TS| startup.
+psfid  Proxy Server   The Base64 encoded snowflake ID for the currently running
+                      :program:`traffic_server` process. Regenerated on every
+                      |TS| startup.
 ====== ============== =========================================================
 
 .. _admin-logging-fields-lengths:
diff --git a/include/iocore/utils/Machine.h b/include/iocore/utils/Machine.h
index 924e2086dd..c54383b5e3 100644
--- a/include/iocore/utils/Machine.h
+++ b/include/iocore/utils/Machine.h
@@ -31,6 +31,7 @@
 #pragma once
 
 #include "tscore/ink_inet.h"
+#include "tscore/SnowflakeID.h"
 #include "tscore/ink_uuid.h"
 
 #include <unordered_set>
@@ -90,9 +91,11 @@ struct Machine {
   IpEndpoint ip4; ///< IPv4 address if present.
   IpEndpoint ip6; ///< IPv6 address if present.
 
-  std::string host_name;
-  std::string ip_hex_string; ///< IP address as hex string
-  ATSUuid     uuid;
+  std::string                            host_name;
+  uint64_t                               host_fnv1a;    ///<  A 64 bit FNV-1a 
hash of the host name.
+  std::string                            ip_hex_string; ///< IP address as hex 
string
+  ATSUuid                                process_uuid;
+  std::unique_ptr<SnowflakeIdNoSequence> process_snowflake_id;
 
   ~Machine();
 
diff --git a/include/proxy/ProxySession.h b/include/proxy/ProxySession.h
index f043d92a51..d3267045de 100644
--- a/include/proxy/ProxySession.h
+++ b/include/proxy/ProxySession.h
@@ -31,6 +31,7 @@
 #include "api/InkAPIInternal.h"
 #include "proxy/http/HttpSessionAccept.h"
 #include "proxy/IPAllow.h"
+#include "tscore/SnowflakeID.h"
 
 // Emit a debug message conditional on whether this particular client session
 // has debugging enabled. This should only be called from within a client 
session
@@ -225,7 +226,7 @@ private:
 inline int64_t
 ProxySession::next_connection_id()
 {
-  return ink_atomic_increment(&next_cs_id, 1);
+  return SnowflakeID::get_next_value();
 }
 
 inline void
diff --git a/include/proxy/logging/LogAccess.h 
b/include/proxy/logging/LogAccess.h
index e19e7de285..27e1574d15 100644
--- a/include/proxy/logging/LogAccess.h
+++ b/include/proxy/logging/LogAccess.h
@@ -247,6 +247,7 @@ public:
   int marshal_plugin_identity_id(char *);                          // INT
   int marshal_plugin_identity_tag(char *);                         // STR
   int marshal_process_uuid(char *);                                // STR
+  int marshal_process_sfid(char *);                                // STR
   int marshal_client_http_connection_id(char *);                   // INT
   int marshal_client_http_transaction_id(char *);                  // INT
   int marshal_client_http_transaction_priority_weight(char *);     // INT
diff --git a/include/tscore/SnowflakeID.h b/include/tscore/SnowflakeID.h
new file mode 100644
index 0000000000..4f38bccd5a
--- /dev/null
+++ b/include/tscore/SnowflakeID.h
@@ -0,0 +1,270 @@
+/** @file
+ *
+ *  Basic implementation of Snowflake Id.
+ *
+ *  @section license License
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <mutex>
+#include <string>
+#include <string_view>
+#include <sys/types.h>
+
+/** A utility class for the various SnowflakeID classes.
+ *
+ * Because each SnowflakeID has its own uint64_t structure, composition is
+ * preferable over inheritance. This class provides the base functionality
+ * used across the various SnowflakeID classes.
+ */
+class SnowflakeIDUtils
+{
+public:
+  /**
+   * @param[in] id The snowflake id value.
+   */
+  SnowflakeIDUtils(uint64_t id);
+  ~SnowflakeIDUtils() = default;
+
+  // Copy and move are default.
+  SnowflakeIDUtils(SnowflakeIDUtils const &) = default;
+  SnowflakeIDUtils(SnowflakeIDUtils &&)      = default;
+
+  /** Set the machine id for this ATS host.
+   * @note This must be called before any SnowflakeId instances are created.
+   * @param machine_id The machine ID to set.
+   */
+  static void set_machine_id(uint64_t machine_id);
+
+  /** Retrieve the machine ID.
+   * @return The machine ID that was set.
+   */
+  static uint64_t
+  get_machine_id()
+  {
+    return global_machine_id;
+  }
+
+  /** Convert the snowflake value to a string.
+   * @return An encoded string representation of the snowflake ID.
+   */
+  std::string_view get_string() const;
+
+public:
+  /** The epoch for our snowflake IDs. Midnight January 1, 2025 */
+  static constexpr uint64_t EPOCH = 1735689600000ULL; // 2025-01-01T00:00:00Z
+
+private:
+  /** The host identifier.
+   *
+   * This is the value that makes snowflake IDs unique across different
+   * machines. Within an organization.
+   */
+  static std::atomic<uint64_t> global_machine_id;
+
+  /** The snowflake value.
+   *
+   * This is the 64-bit integer that represents the snowflake ID.
+   */
+  uint64_t const m_snowflake_value = 0;
+
+  /** Cached string representation of the ID.
+   * This is lazily computed when get_string() is called and cached for future
+   * calls.
+   */
+  mutable std::string m_id_string;
+};
+
+/** An implementation of Snowflake ID.
+ *
+ * UUID (Universally Unique Identifier) is a 128 bit integer designed to be
+ * unique across space and time anywhere. Snowflake ID is a 64 bit value that 
is
+ * designed to be unique within a certain environment. It accomplishes this via
+ * a millisecond time component, a machine identifier component, and a sequence
+ * counter for snowflakes created on the machine in the same millisecond. Its
+ * scope is smaller than UUID, but it is more efficient in terms of storage and
+ * performance.
+ *
+ * Limitations:
+ * The underlying 64 bit integer has advantages in size, performance, and
+ * representation, but comes with its own limitations for uniqueness. The
+ * default Snowflake ID below has 41 bits for the timestamp, 12 bits for the
+ * machine ID, and 10 bits for the sequence number. This means that:
+ *
+ * - The timestamp can represent up to 2^41 milliseconds, which is about 69
+ *   years. After that point, snowflake IDs will start to repeat.
+ *
+ * - The machine ID, used to keep snowflake IDs unique across different
+ *   machines, can represent up to 2^12 (4096) different machines. If you have
+ *   more than 4096 machines, unique snowflake IDs will not be guaranteed.
+ *
+ * - The sequence number can represent up to 2^10 (1024) different snowflakes
+ *   generated in the same millisecond on the same machine. If you generate 
more
+ *   than 1024 snowflakes in the same millisecond on the same machine, unique
+ *   snowflake IDs will not be possible.
+ *
+ * API Expectations:
+ * @a set_machine_id must be called before any SnowflakeID instances are
+ * created.
+ *
+ */
+class SnowflakeID
+{
+public:
+  /** Create a unique snowflake ID using the current time, machine id, and a
+   * sequence counter.
+   */
+  SnowflakeID();
+  ~SnowflakeID() = default;
+
+  /** A convience function to create a snowflake and immediately return its ID.
+   * @note This function is thread-safe and will return a unique ID each time 
it
+   * is called.
+   * @return A 64-bit unsigned integer representing the next Snowflake ID.
+   */
+  static uint64_t
+  get_next_value()
+  {
+    return SnowflakeID().get_value();
+  }
+
+  /** Return the snowflake value.
+   * @return The snowflake ID as a 64-bit unsigned integer.
+   */
+  uint64_t
+  get_value() const
+  {
+    return m_snowflake.value;
+  }
+
+  /** Return a readable string representing the snowflake id.
+   * @note This string is lazily computed when this function is called and
+   * cached for future calls.
+   * @return An encoded string representation of the snowflake ID.
+   */
+  std::string_view get_string() const;
+
+private:
+  /** A generator singleton encapsulating the state across the generation of 
snowflake ids. */
+  class SnowflakeIDGenerator
+  {
+  public:
+    /** Get the singleton instance of the SnowflakeIDGenerator.
+     * @return A reference to the singleton instance.
+     */
+    static SnowflakeIDGenerator &instance();
+
+    /** Generate a new snowflake ID.
+     * @return The next Snowflake ID.
+     */
+    uint64_t get_next_id();
+
+  private:
+    /** The timestamp of the last created snowflake ID. */
+    uint64_t m_last_timestamp = 0;
+
+    /** The sequence value of the last created snowflake ID. */
+    uint64_t m_last_sequence = 0;
+
+    /** A mutex used to make snowflake ID created thread safe. */
+    std::mutex m_mutex;
+  };
+
+  /** Generate the next snowflake value. */
+  static uint64_t generate_next_snowflake_value();
+
+private:
+  union snowflake_t {
+    uint64_t value = 0;
+    struct {
+// Layout the bytes according to endianness such that subsequent snowflake IDs
+// are always increasing in value.
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+      uint64_t sequence    : 10; /// Increments from 0 when the timestamp is 
the same.
+      uint64_t machine_id  : 12; /// Masked with @a global_machine_id.
+      uint64_t timestamp   : 41; /// Milliseconds since epoch.
+      uint64_t always_zero : 1;  /// Reserved always 0. For signedness 
compatibility.
+#else                            // big-endian
+      uint64_t always_zero : 1;  /// Reserved always 0. For signedness 
compatibility.
+      uint64_t timestamp   : 41; /// Milliseconds since epoch.
+      uint64_t machine_id  : 12; /// Masked with @a global_machine_id.
+      uint64_t sequence    : 10; /// Increments from 0 when the timestamp is 
the same.
+#endif                           // __BYTE_ORDER__
+    } pieces;
+  } m_snowflake;
+
+  /** The common utility functions used across SnowflakeID flavors. */
+  SnowflakeIDUtils m_utils;
+};
+
+/** A modified snowflake ID without bits assigned to a sequence number.
+ *
+ * This type of snowflake is useful for organizationally unique IDs that are
+ * created once per ATS instance and therefore don't need a sequence number.
+ */
+class SnowflakeIdNoSequence
+{
+public:
+  SnowflakeIdNoSequence();
+  ~SnowflakeIdNoSequence() = default;
+
+  /** Return the snowflake value.
+   * @return The snowflake ID as a 64-bit unsigned integer.
+   */
+  uint64_t
+  get_value() const
+  {
+    return m_snowflake.value;
+  }
+
+  /** Return a readable string representing the snowflake id.
+   * @note This string is lazily computed when this function is called and
+   * cached for future calls.
+   * @return An encoded string representation of the snowflake ID.
+   */
+  std::string_view get_string() const;
+
+private:
+  /** Generate a new SnoflakeIdNoSequence value. */
+  static uint64_t generate_next_snowflake_value();
+
+private:
+  union snowflake_t {
+    uint64_t value = 0;
+    struct {
+// Layout the bytes according to endianness such that subsequent snowflake IDs
+// are always increasing in value.
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+      uint64_t machine_id  : 22; /// Masked with @a global_machine_id.
+      uint64_t timestamp   : 41; /// Milliseconds since epoch
+      uint64_t always_zero : 1;  /// Reserved always 0. For signedness 
compatibility.
+#else                            // big-endian
+      uint64_t always_zero : 1;  /// Reserved always 0. For signedness 
compatibility.
+      uint64_t timestamp   : 41; /// Milliseconds since epoch
+      uint64_t machine_id  : 22; /// Masked with @a global_machine_id.
+#endif                           // __BYTE_ORDER__
+
+    } pieces;
+  } m_snowflake;
+
+  /** The common utility functions used across SnowflakeID flavors. */
+  SnowflakeIDUtils m_utils;
+};
diff --git a/src/api/InkAPI.cc b/src/api/InkAPI.cc
index 13bd26f53b..fb408a752e 100644
--- a/src/api/InkAPI.cc
+++ b/src/api/InkAPI.cc
@@ -8642,7 +8642,7 @@ TSUuid
 TSProcessUuidGet()
 {
   Machine *machine = Machine::instance();
-  return reinterpret_cast<TSUuid>(&machine->uuid);
+  return reinterpret_cast<TSUuid>(&machine->process_uuid);
 }
 
 const char *
@@ -8664,7 +8664,7 @@ TSClientRequestUuidGet(TSHttpTxn txnp, char *uuid_str)
   sdk_assert(sdk_sanity_check_null_ptr((void *)uuid_str) == TS_SUCCESS);
 
   HttpSM     *sm      = reinterpret_cast<HttpSM *>(txnp);
-  const char *machine = const_cast<char 
*>(Machine::instance()->uuid.getString());
+  const char *machine = const_cast<char 
*>(Machine::instance()->process_uuid.getString());
   int         len;
 
   len = snprintf(uuid_str, TS_CRUUID_STRING_LEN + 1, "%s-%" PRId64 "", 
machine, sm->sm_id);
diff --git a/src/iocore/utils/Machine.cc b/src/iocore/utils/Machine.cc
index 83690c0c9b..1134dcf7fb 100644
--- a/src/iocore/utils/Machine.cc
+++ b/src/iocore/utils/Machine.cc
@@ -21,10 +21,11 @@
   limitations under the License.
  */
 
-#include "tscore/ink_inet.h"
-#include "tscore/ink_assert.h"
-#include "tscore/Diags.h"
 #include "iocore/utils/Machine.h"
+#include "tscore/Diags.h"
+#include "tscore/SnowflakeID.h"
+#include "tscore/ink_assert.h"
+#include "tscore/ink_inet.h"
 
 #if HAVE_IFADDRS_H
 #include <ifaddrs.h>
@@ -33,6 +34,32 @@
 // Singleton
 Machine *Machine::_instance = nullptr;
 
+/** Compute the FNV-1a hash for the given string.
+ * @param[in] str The string to hash.
+ * @return The 64-bit FNV-1a hash of the string.
+ */
+static constexpr uint64_t
+compute_fnv1a(std::string_view str)
+{
+  // Parameters set under guidance from:
+  // http://isthe.com/chongo/tech/comp/fnv/#google_vignette.
+  constexpr uint64_t FNV_PRIME  = 1099511628211u;
+  constexpr uint64_t FNV_OFFSET = 14695981039346656037u;
+  uint64_t           hash       = FNV_OFFSET;
+
+  // Process each character in lowercase
+  for (char c : str) {
+    hash ^= static_cast<unsigned int>(c);
+    hash *= FNV_PRIME;
+  }
+  return hash;
+}
+
+// Verify our FNV-1a hash implementation.
+static_assert(compute_fnv1a("") == 14695981039346656037u, "64 bit FNV-1 hash 
for '' should be 14695981039346656037");
+static_assert(compute_fnv1a("e1.myedge.colo.acme.com") == 9637442596227468504u,
+              "64 bit FNV-1 hash for 'e1.myedge.colo.acme.com' should be 
9637442596227468504");
+
 Machine *
 Machine::instance()
 {
@@ -59,8 +86,8 @@ Machine::Machine(char const *the_hostname, sockaddr const 
*addr)
   ip_text_buffer ip_strbuf;
   char           localhost[1024];
 
-  uuid.initialize(TS_UUID_V4);
-  ink_release_assert(nullptr != uuid.getString()); // The Process UUID must be 
available on startup
+  process_uuid.initialize(TS_UUID_V4);
+  ink_release_assert(nullptr != process_uuid.getString()); // The Process UUID 
must be available on startup
 
   if (!ats_is_ip(addr)) {
     if (!the_hostname) {
@@ -205,6 +232,10 @@ Machine::Machine(char const *the_hostname, sockaddr const 
*addr)
     }
   }
 
+  host_fnv1a = compute_fnv1a(host_name);
+  SnowflakeIDUtils::set_machine_id(host_fnv1a);
+  process_snowflake_id = std::make_unique<SnowflakeIdNoSequence>();
+
   char hex_buff[TS_IP6_SIZE * 2 + 1];
   ats_ip_to_hex(&ip.sa, hex_buff, sizeof(hex_buff));
   ip_hex_string.assign(hex_buff);
diff --git a/src/proxy/http/HttpTransact.cc b/src/proxy/http/HttpTransact.cc
index b53e52b2ad..6f21adc4f2 100644
--- a/src/proxy/http/HttpTransact.cc
+++ b/src/proxy/http/HttpTransact.cc
@@ -6598,7 +6598,7 @@ HttpTransact::will_this_request_self_loop(State *s)
     // Now check for a loop using the Via string.
     int              count     = 0;
     MIMEField       *via_field = 
s->hdr_info.client_request.field_find(static_cast<std::string_view>(MIME_FIELD_VIA));
-    std::string_view uuid{Machine::instance()->uuid.getString()};
+    std::string_view uuid{Machine::instance()->process_uuid.getString()};
 
     while (via_field) {
       // No need to waste cycles comma separating the via values since we want 
to do a match anywhere in the
diff --git a/src/proxy/http/HttpTransactHeaders.cc 
b/src/proxy/http/HttpTransactHeaders.cc
index 891001fcc8..4a26790675 100644
--- a/src/proxy/http/HttpTransactHeaders.cc
+++ b/src/proxy/http/HttpTransactHeaders.cc
@@ -747,7 +747,7 @@ 
HttpTransactHeaders::insert_via_header_in_request(HttpTransact::State *s, HTTPHd
   via_string += nstrcpy(via_string, s->http_config_param->proxy_hostname);
 
   *via_string++ = '[';
-  memcpy(via_string, Machine::instance()->uuid.getString(), 
TS_UUID_STRING_LEN);
+  memcpy(via_string, Machine::instance()->process_uuid.getString(), 
TS_UUID_STRING_LEN);
   via_string    += TS_UUID_STRING_LEN;
   *via_string++  = ']';
   *via_string++  = ' ';
@@ -990,12 +990,12 @@ 
HttpTransactHeaders::add_forwarded_field_to_request(HttpTransact::State *s, HTTP
 
     const Machine &m = *Machine::instance();
 
-    if (optSet[HttpForwarded::BY_UUID] and m.uuid.valid()) {
+    if (optSet[HttpForwarded::BY_UUID] and m.process_uuid.valid()) {
       if (hdr.size()) {
         hdr << ';';
       }
 
-      hdr << "by=_" << m.uuid.getString();
+      hdr << "by=_" << m.process_uuid.getString();
     }
 
     if (optSet[HttpForwarded::BY_IP] and m.ip.isValid()) {
diff --git a/src/proxy/logging/Log.cc b/src/proxy/logging/Log.cc
index 6f75b6d882..c11ccd1309 100644
--- a/src/proxy/logging/Log.cc
+++ b/src/proxy/logging/Log.cc
@@ -505,6 +505,11 @@ Log::init_fields()
   global_field_list.add(field, false);
   field_symbol_hash.emplace("puuid", field);
 
+  field =
+    new LogField("process_snowflake_id", "psfid", LogField::STRING, 
&LogAccess::marshal_process_sfid, &LogAccess::unmarshal_str);
+  global_field_list.add(field, false);
+  field_symbol_hash.emplace("psfid", field);
+
   field = new LogField("client_req_content_len", "cqcl", LogField::sINT, 
&LogAccess::marshal_client_req_content_len,
                        &LogAccess::unmarshal_int_to_str);
   global_field_list.add(field, false);
diff --git a/src/proxy/logging/LogAccess.cc b/src/proxy/logging/LogAccess.cc
index 52e761e6c5..10a77b266a 100644
--- a/src/proxy/logging/LogAccess.cc
+++ b/src/proxy/logging/LogAccess.cc
@@ -164,7 +164,26 @@ LogAccess::marshal_process_uuid(char *buf)
   int len = round_strlen(TS_UUID_STRING_LEN + 1);
 
   if (buf) {
-    const char *str = const_cast<char 
*>(Machine::instance()->uuid.getString());
+    const char *str = const_cast<char 
*>(Machine::instance()->process_uuid.getString());
+    marshal_str(buf, str, len);
+  }
+  return len;
+}
+
+int
+LogAccess::marshal_process_sfid(char *buf)
+{
+  char const *str = nullptr;
+  int         len = 0;
+
+  if (Machine *machine = Machine::instance(); machine) {
+    std::string_view snowflake_id = 
machine->process_snowflake_id->get_string();
+    str                           = snowflake_id.data();
+    len                           = snowflake_id.length();
+  }
+
+  len = INK_ALIGN_DEFAULT(len + 1);
+  if (buf) {
     marshal_str(buf, str, len);
   }
   return len;
@@ -2166,7 +2185,7 @@ int
 LogAccess::marshal_client_req_uuid(char *buf)
 {
   char        str[TS_CRUUID_STRING_LEN + 1];
-  const char *uuid = Machine::instance()->uuid.getString();
+  const char *uuid = Machine::instance()->process_uuid.getString();
   int         len  = snprintf(str, sizeof(str), "%s-%" PRId64 "", uuid, 
m_http_sm->sm_id);
 
   ink_assert(len <= TS_CRUUID_STRING_LEN);
diff --git a/src/traffic_server/traffic_server.cc 
b/src/traffic_server/traffic_server.cc
index 663fed2e50..a3504aa4bc 100644
--- a/src/traffic_server/traffic_server.cc
+++ b/src/traffic_server/traffic_server.cc
@@ -2034,7 +2034,7 @@ main(int /* argc ATS_UNUSED */, const char **argv)
     Machine::init(hostname, &machine_addr.sa);
   }
 
-  RecRegisterStatString(RECT_PROCESS, "proxy.process.version.server.uuid", 
(char *)Machine::instance()->uuid.getString(),
+  RecRegisterStatString(RECT_PROCESS, "proxy.process.version.server.uuid", 
(char *)Machine::instance()->process_uuid.getString(),
                         RECP_NON_PERSISTENT);
 
   res_track_memory = 
RecGetRecordInt("proxy.config.res_track_memory").value_or(0);
diff --git a/src/tscore/CMakeLists.txt b/src/tscore/CMakeLists.txt
index 25a8297c90..8c9475305a 100644
--- a/src/tscore/CMakeLists.txt
+++ b/src/tscore/CMakeLists.txt
@@ -55,6 +55,7 @@ add_library(
   ParseRules.cc
   Random.cc
   Regression.cc
+  SnowflakeID.cc
   TextBuffer.cc
   Throttler.cc
   Tokenizer.cc
@@ -165,6 +166,7 @@ if(BUILD_TESTING)
     unit_tests/test_PriorityQueue.cc
     unit_tests/test_Ptr.cc
     unit_tests/test_Random.cc
+    unit_tests/test_SnowflakeID.cc
     unit_tests/test_Throttler.cc
     unit_tests/test_Tokenizer.cc
     unit_tests/test_arena.cc
diff --git a/src/tscore/SnowflakeID.cc b/src/tscore/SnowflakeID.cc
new file mode 100644
index 0000000000..c828e3609d
--- /dev/null
+++ b/src/tscore/SnowflakeID.cc
@@ -0,0 +1,147 @@
+/**
+  @file Implement Snowflake ID.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+*/
+
+#include "tscore/SnowflakeID.h"
+#include "tscore/Diags.h"
+#include "tscore/ink_base64.h"
+#include "tscore/ink_hrtime.h"
+#include "tsutil/DbgCtl.h"
+
+#include <cstdint>
+#include <cinttypes>
+#include <memory>
+#include <mutex>
+#include <string_view>
+
+std::atomic<uint64_t> SnowflakeIDUtils::global_machine_id{0};
+
+DbgCtl dbg_ctl_snowflake{"snowflake"};
+
+SnowflakeIDUtils::SnowflakeIDUtils(uint64_t id) : m_snowflake_value{id} {}
+
+void
+SnowflakeIDUtils::set_machine_id(uint64_t machine_id)
+{
+  Dbg(dbg_ctl_snowflake, "Setting machine ID to: %" PRIx64, machine_id);
+  global_machine_id = machine_id;
+}
+
+std::string_view
+SnowflakeIDUtils::get_string() const
+{
+  if (m_id_string.empty()) {
+    // Base64 encode the snowflake ID as m_id_string.
+    constexpr size_t                   max_encoded_size = 
ats_base64_encode_dstlen(sizeof(m_snowflake_value));
+    std::array<char, max_encoded_size> encoded_buffer;
+    size_t                             encoded_length   = 0;
+    auto                              *snowflake_char_p = 
reinterpret_cast<char const *>(&m_snowflake_value);
+    if (ats_base64_encode(snowflake_char_p, sizeof(m_snowflake_value), 
encoded_buffer.data(), max_encoded_size, &encoded_length)) {
+      m_id_string = std::string(encoded_buffer.data(), encoded_length);
+    } else {
+      // Very unlikely.
+      Error("Failed to encode snowflake ID: %" PRIx64, m_snowflake_value);
+    }
+  }
+  return m_id_string;
+}
+
+SnowflakeID::SnowflakeIDGenerator &
+SnowflakeID::SnowflakeIDGenerator::instance()
+{
+  static SnowflakeIDGenerator g;
+  return g;
+}
+
+uint64_t
+SnowflakeID::SnowflakeIDGenerator::get_next_id()
+{
+  ink_release_assert(SnowflakeIDUtils::get_machine_id() != 0);
+  snowflake_t new_snowflake;
+  new_snowflake.pieces.always_zero = 0;
+  new_snowflake.pieces.machine_id  = SnowflakeIDUtils::get_machine_id();
+  uint64_t now                     = ink_hrtime_to_msec(ink_get_hrtime()) - 
SnowflakeIDUtils::EPOCH;
+
+  // Comparing and setting uint64_t values is one CPU cycle each. Setting a
+  // bit field takes more, maybe 6 CPU cycles or so. Therefore, we modify the
+  // bit field values outside of the lock.
+  uint64_t local_last_sequence = 0;
+  {
+    std::lock_guard<std::mutex> lock(m_mutex);
+    if (now == m_last_timestamp) {
+      // If the timestamp is the same as the last one, increment the sequence.
+      ++m_last_sequence;
+    } else if (now > m_last_timestamp) {
+      // If the timestamp is greater than the last one, update the last
+      // timestamp seen and reset the sequence.
+      m_last_sequence  = 0;
+      m_last_timestamp = now;
+    } else { // now < m_last_timestamp
+      // Presumably, another thread set an even newer timestamp than the one we
+      // got before the lock. This would imply the lock was held over a
+      // millisecond, which probably indicates the box is not healthy. This
+      // should be exceedingly rare, but if it happens, we use the newer
+      // timestamp that the other thread set.
+      now              = m_last_timestamp;
+      m_last_sequence += 1;
+    }
+    local_last_sequence = m_last_sequence;
+  } // Release the lock.
+  new_snowflake.pieces.timestamp = now;
+  new_snowflake.pieces.sequence  = local_last_sequence;
+  return new_snowflake.value;
+}
+
+uint64_t
+SnowflakeID::generate_next_snowflake_value()
+{
+  return SnowflakeIDGenerator::instance().get_next_id();
+}
+
+SnowflakeID::SnowflakeID() : m_snowflake{.value = 
generate_next_snowflake_value()}, m_utils{m_snowflake.value} {}
+
+std::string_view
+SnowflakeID::get_string() const
+{
+  return m_utils.get_string();
+}
+
+uint64_t
+SnowflakeIdNoSequence::generate_next_snowflake_value()
+{
+  snowflake_t new_snowflake;
+  new_snowflake.pieces.always_zero = 0;
+  new_snowflake.pieces.machine_id  = SnowflakeIDUtils::get_machine_id();
+  uint64_t const now               = ink_hrtime_to_msec(ink_get_hrtime());
+  new_snowflake.pieces.timestamp   = now;
+  return new_snowflake.value;
+}
+
+SnowflakeIdNoSequence::SnowflakeIdNoSequence() : m_snowflake{.value = 
generate_next_snowflake_value()}, m_utils{m_snowflake.value}
+{
+}
+
+std::string_view
+SnowflakeIdNoSequence::get_string() const
+{
+  // No sequence number, so we can use the same method as SnowflakeIDUtils.
+  return m_utils.get_string();
+}
diff --git a/src/tscore/unit_tests/test_SnowflakeID.cc 
b/src/tscore/unit_tests/test_SnowflakeID.cc
new file mode 100644
index 0000000000..04f8e1fe02
--- /dev/null
+++ b/src/tscore/unit_tests/test_SnowflakeID.cc
@@ -0,0 +1,229 @@
+/** @file
+ *
+ *  Verify SnowflakeID behavior.
+ *
+ *  @section license License
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include "tscore/SnowflakeID.h"
+
+#include <chrono>
+#include <cstdint>
+#include <string_view>
+#include <thread>
+
+#include "catch.hpp"
+
+using std::chrono::duration_cast;
+using std::chrono::milliseconds;
+using std::chrono::system_clock;
+
+namespace
+{
+
+// These unions help us extract the fields in a platform-independent way,
+// namely regardless of endianness. They obviously will need to be updated if
+// the production versions of these structs change, but the tests will fail if
+// that happens allerting the developer to maintain these.
+union SnowflakeIDValue {
+  uint64_t value;
+  struct {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    uint64_t sequence    : 10;
+    uint64_t machine_id  : 12;
+    uint64_t timestamp   : 41;
+    uint64_t always_zero : 1;
+#else  // big-endian
+    uint64_t always_zero : 1;
+    uint64_t timestamp   : 41;
+    uint64_t machine_id  : 12;
+    uint64_t sequence    : 10;
+#endif // __BYTE_ORDER__
+  };
+};
+
+union SnowflakeIDNoSequenceValue {
+  uint64_t value;
+  struct {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    uint64_t machine_id  : 22;
+    uint64_t timestamp   : 41;
+    uint64_t always_zero : 1;
+#else  // big-endian
+    uint64_t always_zero : 1;
+    uint64_t timestamp   : 41;
+    uint64_t machine_id  : 22;
+#endif // __BYTE_ORDER__
+  };
+};
+
+} // anonymous namespace
+
+TEST_CASE("SnowflakeIDUtils", "[libts][SnowflakeID]")
+{
+  REQUIRE(SnowflakeIDUtils::get_machine_id() == 0);
+
+  uint64_t machine_id = 0xabc;
+  SnowflakeIDUtils::set_machine_id(machine_id);
+  REQUIRE(SnowflakeIDUtils::get_machine_id() == machine_id);
+
+  SnowflakeIDUtils utils{0u};
+  // base64 of 8 zero bytes == "AAAAAAAAAAA="
+  constexpr std::string_view expected_base64{"AAAAAAAAAAA="};
+  REQUIRE(utils.get_string() == expected_base64);
+  // Verify caching.
+  REQUIRE(utils.get_string() == expected_base64);
+}
+
+TEST_CASE("SnowflakeID", "[libts][SnowflakeID]")
+{
+  constexpr uint64_t machine_id          = 0x123456789abcdef;
+  constexpr uint64_t expected_machine_id = machine_id & ((1u << 12) - 1); // 
12 bits for machine ID
+  SnowflakeIDUtils::set_machine_id(machine_id);
+
+  // Generate two IDs back to back. The idea is that they should be generated 
in
+  // the same millisecond, so the sequence number should increment from 0 to 1.
+  uint64_t ms_since_unix_epoch_before = 
duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
+  uint64_t v1                         = SnowflakeID::get_next_value();
+  uint64_t v2                         = SnowflakeID::get_next_value();
+  REQUIRE(v1 != 0);
+  REQUIRE(v2 != 0);
+
+  // Use the test's union to extract fields.
+  SnowflakeIDValue u1{.value = v1};
+  SnowflakeIDValue u2{.value = v2};
+
+  // If by some remote happenstance the two snowflakes were generated in
+  // different milliseconds, regenerate them.
+  int retry_count = 0;
+  while (u1.timestamp != u2.timestamp) {
+    if (retry_count++ > 10) {
+      // Something is seriously wrong...don't infinite loop.
+      FAIL("Failed to generate two snowflake IDs in the same millisecond.");
+    }
+    ms_since_unix_epoch_before = 
duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
+    v1                         = SnowflakeID::get_next_value();
+    v2                         = SnowflakeID::get_next_value();
+    u1.value                   = v1;
+    u2.value                   = v2;
+  }
+  REQUIRE(u1.always_zero == 0);
+  REQUIRE(u2.always_zero == 0);
+
+  // This should be true per the above loop, but for the sake of clarity, test
+  // explicitly using the Catch framework.
+  REQUIRE(u1.timestamp == u2.timestamp);
+  uint64_t const ms_since_unix_epoch_after = 
duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
+
+  // Make sure our snowflake IDs are offset from our designated epoch of 
January
+  // 1, 2025.
+  auto compute_epoch_ms = []() -> uint64_t {
+    std::tm tm{};
+    tm.tm_year = 2025 - 1900; // years since 1900
+    tm.tm_mon  = 0;           // January == 0
+    tm.tm_mday = 1;
+    tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
+    tm.tm_isdst                        = 0; // no daylight‐saving
+    return static_cast<uint64_t>(timegm(&tm)) * 1000;
+  };
+  uint64_t const ats_epoch = compute_epoch_ms();
+  // Verify that we set EPOCH to the corret hard-coded value.
+  REQUIRE(ats_epoch == SnowflakeIDUtils::EPOCH);
+  // Sanity check that I'm thinking about the values correctly before I 
subtract
+  // one value from another.
+  assert(ms_since_unix_epoch_before > ats_epoch);
+  uint64_t const adjusted_ms_since_unix_epoch_before_for_ats_epoch = 
ms_since_unix_epoch_before - ats_epoch;
+
+  // delta_ms is almost certainly 0, but I don't want the test to fail rarely
+  // when we, by happenstance, grab the current time locally at a different
+  // millisecond than SnowflakeID::get_next_value().
+  uint64_t const delta_ms                = ms_since_unix_epoch_after - 
ms_since_unix_epoch_before;
+  uint64_t const expected_timestamp_low  = 
adjusted_ms_since_unix_epoch_before_for_ats_epoch;
+  uint64_t const expected_timestamp_high = expected_timestamp_low + delta_ms;
+  REQUIRE(((expected_timestamp_low <= u1.timestamp) && (u1.timestamp <= 
expected_timestamp_high)));
+
+  // The machine ID of both should be the expected value.
+  REQUIRE(u1.machine_id == expected_machine_id);
+  REQUIRE(u2.machine_id == expected_machine_id);
+
+  // Each successive ID should be greater than the previous one.
+  REQUIRE(v2 > v1);
+
+  // Verify that the sequence number increased from 0 to 1.
+  REQUIRE(u1.sequence == 0u);
+  REQUIRE(u2.sequence == 1u);
+
+  // Verify behavior when the timestamp increases.
+  std::this_thread::sleep_for(std::chrono::milliseconds(2));
+  uint64_t v3 = SnowflakeID::get_next_value();
+
+  // Verify that the machine ID is still the same.
+  SnowflakeIDValue u3{.value = v3};
+  REQUIRE(u3.always_zero == 0);
+  REQUIRE(u3.machine_id == expected_machine_id);
+
+  // Since over a millisecond has passed.
+  REQUIRE(u3.timestamp > u1.timestamp);
+
+  // Each successive ID should be greater than the previous one, even across
+  // milliseconds.
+  REQUIRE(v3 > v2);
+
+  // Verify that the sequence number is reset to 0.
+  REQUIRE(u3.sequence == 0u);
+
+  // Sanity check getting a string representation.
+  SnowflakeID obj;
+  auto        s1 = obj.get_string();
+  REQUIRE(!s1.empty());
+}
+
+TEST_CASE("SnowflakeIdNoSequence", "[libts][SnowflakeID]")
+{
+  constexpr uint64_t machine_id          = 0x123456789abcdef;
+  constexpr uint64_t expected_machine_id = machine_id & ((1u << 22) - 1); // 
22 bits for machine ID
+  SnowflakeIDUtils::set_machine_id(machine_id);
+
+  SnowflakeIdNoSequence obj;
+  uint64_t              v1 = obj.get_value();
+
+  // Use the test's union to extract fields.
+  SnowflakeIDNoSequenceValue u1{.value = v1};
+  REQUIRE(u1.always_zero == 0);
+  REQUIRE(u1.machine_id == expected_machine_id);
+
+  // Sleep a bit to ensure the next ID is generated in a different millisecond.
+  std::this_thread::sleep_for(std::chrono::milliseconds(2));
+  uint64_t v2 = SnowflakeIdNoSequence().get_value();
+
+  // Successive IDs should be greater than the previous one.
+  REQUIRE(v2 > v1);
+
+  // Use the test's union to extract fields for the second value.
+  SnowflakeIDNoSequenceValue u2{.value = v2};
+  REQUIRE(u2.always_zero == 0);
+  REQUIRE(u2.machine_id == expected_machine_id);
+
+  // Verify that the timestamp is different.
+  REQUIRE(u2.timestamp > u1.timestamp);
+
+  // Sanity check getting a string representation.
+  auto s = obj.get_string();
+  REQUIRE(!s.empty());
+}
diff --git a/tests/gold_tests/logging/new_log_flds.test.py 
b/tests/gold_tests/logging/new_log_flds.test.py
index 4d105b71e6..f7c2b22774 100644
--- a/tests/gold_tests/logging/new_log_flds.test.py
+++ b/tests/gold_tests/logging/new_log_flds.test.py
@@ -39,7 +39,8 @@ ts.addDefaultSSLFiles()
 
 ts.Disk.records_config.update(
     {
-        # 'proxy.config.diags.debug.enabled': 1,
+        'proxy.config.diags.debug.enabled': 1,
+        'proxy.config.diags.debug.tags': 'snowflake|http',
         'proxy.config.ssl.server.cert.path': '{0}'.format(ts.Variables.SSLDir),
         'proxy.config.ssl.server.private_key.path': 
'{0}'.format(ts.Variables.SSLDir),
     })
@@ -59,7 +60,7 @@ ts.Disk.logging_yaml.AddLines(
 logging:
   formats:
     - name: custom
-      format: "%<ccid> %<ctid> %<cssn>"
+      format: "%<psfid> %<ccid> %<ctid> %<cssn>"
   logs:
     - filename: test_new_log_flds
       format: custom
diff --git a/tests/gold_tests/logging/new_log_flds_observer.py 
b/tests/gold_tests/logging/new_log_flds_observer.py
index 59bd4dc195..ec90b145e6 100644
--- a/tests/gold_tests/logging/new_log_flds_observer.py
+++ b/tests/gold_tests/logging/new_log_flds_observer.py
@@ -20,6 +20,7 @@ Examines log generated by new_log_flds.test.py, returns 0 if 
valid, 1 if not.
 import sys
 import csv
 
+psfid = []
 ccid = []
 ctid = []
 
@@ -28,29 +29,43 @@ ctid = []
 ln_num = 0
 for ln in csv.reader(sys.stdin, delimiter=' '):
     ln_num += 1
-    if len(ln) != 3:
+    if len(ln) != 4:
+        print(f'Error: Expected 4 fields at line {ln_num}, got {len(ln)}', 
file=sys.stderr)
         exit(code=1)
-    i = int(ln[0])
+    i = ln[0]
+    psfid.append(i)
+    i = int(ln[1])
     if i < 0:
+        print(f'Error: Invalid ccid value at line {ln_num}: {i}', 
file=sys.stderr)
         exit(code=1)
     ccid.append(i)
-    i = int(ln[1])
+    i = int(ln[2])
     if i < 0:
+        print(f'Error: Invalid ctid value at line {ln_num}: {i}', 
file=sys.stderr)
         exit(code=1)
     ctid.append(i)
     if ln_num == 7:
-        if ln[2] != "reallyreallyreallyreallylong.com":
+        if ln[3] != "reallyreallyreallyreallylong.com":
+            print(f'Error: Expected ctid value 
"reallyreallyreallyreallylong.com" at line {ln_num}, got {ln[2]}', 
file=sys.stderr)
             exit(code=1)
     else:
-        if ln[2] != "-":
+        if ln[3] != "-":
+            print(f'Error: Expected ctid value "-" at line {ln_num}, got 
{ln[2]}', file=sys.stderr)
             exit(code=1)
 
 # Validate contents of report.
 #
+psfid_set = set(psfid)
+if len(psfid_set) != 1:
+    print("All psfid values should be the same", file=sys.stderr)
+    exit(code=1)
+
 if (ccid[0] != ccid[1] and ccid[1] != ccid[2] and ccid[2] == ccid[3] and 
ctid[2] != ctid[3] and ccid[3] != ccid[4] and
         ccid[4] == ccid[5] and ctid[4] != ctid[5] and ccid[5] != ccid[6]):
+    print("Report is valid", file=sys.stderr)
     exit(code=0)
 
 # Failure exit if report was not valid.
 #
+print("Report is not valid", file=sys.stderr)
 exit(code=1)


Reply via email to