apache · masaori335 · Apr 16, 2026
diff --git a/doc/developer-guide/core-architecture/hostdb.en.rst b/doc/developer-guide/core-architecture/hostdb.en.rst
@@ -50,12 +50,10 @@ a flag, where a value of ``TS_TIME_ZERO`` indicates a live target and any other
 down info.
 
 If an info is marked down (has a non-zero last failure time) there is a "fail window" during which
-no connections are permitted. After this time the info is considered to be a "zombie". If all infos
+no connections are permitted. After this time the info is considered to be a "suspect". If all infos
 for a record are down then a specific error message is generated (body factory tag
-"connect#all_down"). Otherwise if the selected info is a zombie, a request is permitted but the
-zombie is immediately marked down again, preventing any additional requests until either the fail
-window has passed or the single connection succeeds. A successful connection clears the last file
-time and the info becomes alive.
+"connect#all_down"). Otherwise if the selected info is a suspect, connections are permitted and the
+info will transition back to up on success or down on failure.
 
 Runtime Structure
 =================
@@ -152,8 +150,8 @@ Future
 
 There is still some work to be done in future PRs.
 
-*  The fail window and the zombie window should be separate values. It is quite reasonable to want
-   to configure a very short fail window (possibly 0) with a moderately long zombie window so that
+*  The fail window and the suspect window should be separate values. It is quite reasonable to want
+   to configure a very short fail window (possibly 0) with a moderately long suspect window so that
    probing connections can immediately start going upstream at a low rate.
 
 *  Failing an upstream should be more loosely connected to transactions. Currently there is a one
@@ -189,7 +187,7 @@ This version has several major architectural changes from the previous version.
 
 *  State information has been promoted to atomics and updates are immediate rather than scheduled.
    This also means the data in the state machine is a reference to a shared object, not a local copy.
-   The promotion was necessary to coordinate zombie connections to upstreams marked down across transactions.
+   The promotion was necessary to coordinate suspect connections to upstreams marked down across transactions.
 
 *  The "resolve key" is now a separate data object from the HTTP request. This is a subtle but
    major change. The effect is requests can be routed to different upstreams without changing

diff --git a/include/iocore/hostdb/HostDBProcessor.h b/include/iocore/hostdb/HostDBProcessor.h
@@ -123,61 +123,76 @@ enum class HostDBType : uint8_t {
 };
 
 /** Information about a single target.
+ *
+ * Each instance tracks the health state of one upstream address. The state is derived from @c last_failure and the caller-supplied
+ * @a fail_window:
+ *
+ * | State   | Description                                                                      |
+ * |---------|----------------------------------------------------------------------------------|
+ * | Up      | No known failure; eligible for normal selection.                                 |
+ * | Down    | Blocked; no connections permitted until @c last_failure + @a fail_window elapses |
+ * | Suspect | Fail window has elapsed; connections are permitted.                              |
+ * |         | On success transitions to Up (@c mark_up); on failure returns to Down.           |
+ *
+ * State transition diagram:
+ *
+ * @startuml
+ * hide empty description
+ *
+ * [*] --> Up
+ * Up      --> Down    : connect failure\n(mark_down)
+ * Down    --> Suspect : fail_window elapses
+ * Suspect --> Up      : connect success\n(mark_up)
+ * Suspect --> Down    : connect failure\n(mark_down)
+ * @enduml
+ *
+ * State transition and `fail_window` time chart:
+ *
+ *              |<-- fail_window  -->|
+ *  -+----------+--------------------+--------------------+----------+----> time
+ *   |    Up    |        Down        |       Suspect      |    Up    |
+ *  -+----------+--------------------+--------------------+----------+---->
+ *              ^                    ^                    ^
+ *               \                    \                    \
+ *        (last_failure)   (last_failure + fail_window)    (connect success)
+ *
  */
-struct HostDBInfo {
+class HostDBInfo
+{
+public:
   using self_type = HostDBInfo; ///< Self reference type.
 
+  /// Health state of this target.
+  enum class State {
+    UP,
+    DOWN,
+    SUSPECT,
+  };
+
   /// Default constructor.
   HostDBInfo() = default;
 
   HostDBInfo &operator=(HostDBInfo const &that);
 
   /// Absolute time of when this target failed.
   /// A value of zero (@c TS_TIME_ZERO ) indicates no failure.
-  ts_time last_fail_time() const;
-
-  /// Target is alive - no known failure.
-  bool is_alive();
-
-  /// Target has failed and is still in the blocked time window.
-  bool is_down(ts_time now, ts_seconds fail_window);
-
-  /** Select this target.
-   *
-   * @param now Current time.
-   * @param fail_window Failure window.
-   * @return Status of the selection.
-   *
-   * If a zombie is selected the failure time is updated to make it appear down to other threads in a thread safe
-   * manner. The caller should check @c last_fail_time to see if a zombie was selected.
-   */
-  bool select(ts_time now, ts_seconds fail_window) const;
-
-  /** Mark the entry as down.
-   *
-   * @param now Time of the failure.
-   * @return @c true if @a this was marked down, @c false if not.
-   *
-   * This can return @c false if the entry is already marked down, in which case the failure time is not updated.
-   */
-  bool mark_down(ts_time now);
+  ts_time     last_fail_time() const;
+  uint8_t     fail_count() const;
+  char const *srvname() const;
 
-  std::pair<bool, uint8_t> increment_fail_count(ts_time now, uint8_t max_retries);
+  /// Return the current health state of this target.
+  State state(ts_time now, ts_seconds fail_window) const;
 
-  /** Mark the target as up / alive.
-   *
-   * @return Previous alive state of the target.
-   */
-  bool mark_up();
+  // Sugars of checking state
+  bool is_up() const;
+  bool is_down(ts_time now, ts_seconds fail_window) const;
+  bool is_suspect(ts_time now, ts_seconds fail_window) const;
 
-  char const *srvname() const;
+  // State controllers
+  bool                     mark_up();
+  bool                     mark_down(ts_time now, ts_seconds fail_window);
+  std::pair<bool, uint8_t> increment_fail_count(ts_time now, uint8_t max_retries, ts_seconds fail_window);
 
-  /** Migrate data after a DNS update.
-   *
-   * @param that Source item.
-   *
-   * This moves only specific state information, it is not a generic copy.
-   */
   void migrate_from(self_type const &that);
 
   /// A target is either an IP address or an SRV record.
@@ -187,16 +202,8 @@ struct HostDBInfo {
     SRVInfo srv; ///< SRV record.
   } data{IpAddr{}};
 
-  /// Data that migrates after updated DNS records are processed.
-  /// @see migrate_from
-  /// @{
-  /// Last time a failure was recorded.
-  std::atomic<ts_time> last_failure{TS_TIME_ZERO};
-  /// Count of connection failures
-  std::atomic<uint8_t> fail_count{0};
   /// Expected HTTP version of the target based on earlier transactions.
   HTTPVersion http_version = HTTP_INVALID;
-  /// @}
 
   self_type &assign(IpAddr const &addr);
 
@@ -207,96 +214,12 @@ struct HostDBInfo {
   HostDBType type = HostDBType::UNSPEC; ///< Invalid data.
 
   friend HostDBContinuation;
-};
-
-inline HostDBInfo &
-HostDBInfo::operator=(HostDBInfo const &that)
-{
-  if (this != &that) {
-    memcpy(static_cast<void *>(this), static_cast<const void *>(&that), sizeof(*this));
-  }
-  return *this;
-}
-
-inline ts_time
-HostDBInfo::last_fail_time() const
-{
-  return last_failure;
-}
-
-inline bool
-HostDBInfo::is_alive()
-{
-  return this->last_fail_time() == TS_TIME_ZERO;
-}
-
-/**
-  Check if this HostDBInfo is currently marked DOWN (true) or UP (false). Returns true while within the `fail_window` period after
-  `last_failure`. Once `fail_window` expires, the host is treated as UP and this function returns false.
-
-                    |<-- fail_window -->|
-    ----------------+-------------------+-----------------> time
-           UP       |       DOWN        |        UP
-    (is_down=false) |  (is_down=true)   | (is_down=false)
-                    |                   |
-                    ^                   ^
-                     \                   \
-                      last_failure        last_failure + fail_window
- */
-inline bool
-HostDBInfo::is_down(ts_time now, ts_seconds fail_window)
-{
-  auto last_fail = this->last_fail_time();
-  return (last_fail != TS_TIME_ZERO) && (now <= last_fail + fail_window);
-}
-
-inline bool
-HostDBInfo::mark_up()
-{
-  auto t        = last_failure.exchange(TS_TIME_ZERO);
-  bool was_down = t != TS_TIME_ZERO;
-  if (was_down) {
-    fail_count.store(0);
-  }
-  return was_down;
-}
-
-inline bool
-HostDBInfo::mark_down(ts_time now)
-{
-  auto t0{TS_TIME_ZERO};
-  return last_failure.compare_exchange_strong(t0, now);
-}
-
-inline std::pair<bool, uint8_t>
-HostDBInfo::increment_fail_count(ts_time now, uint8_t max_retries)
-{
-  auto fcount      = ++fail_count;
-  bool marked_down = false;
-  if (fcount >= max_retries) {
-    marked_down = mark_down(now);
-  }
-  return std::make_pair(marked_down, fcount);
-}
-
-inline bool
-HostDBInfo::select(ts_time now, ts_seconds fail_window) const
-{
-  auto t0 = this->last_fail_time();
-  if (t0 == TS_TIME_ZERO) {
-    return true; // it's alive and so is valid for selection.
-  }
-  // Return true and give it a try if enough time is elapsed since the last failure
-  return (t0 + fail_window < now);
-}
+  // friend HostDBRecord;
 
-inline void
-HostDBInfo::migrate_from(HostDBInfo::self_type const &that)
-{
-  this->last_failure = that.last_failure.load();
-  this->fail_count   = that.fail_count.load();
-  this->http_version = that.http_version;
-}
+private:
+  std::atomic<ts_time> _last_failure{TS_TIME_ZERO}; ///< Last time a failure was recorded
+  std::atomic<uint8_t> _fail_count{0};              ///< Count of connection failures
+};
 
 // ----
 /** Root item for HostDB.
@@ -371,15 +294,12 @@ class HostDBRecord : public RefCountObj
 
   /** Pick the next round robin and update the record atomically.
    *
-   * @note This may select a zombie server and reserve it for the caller, therefore the caller must
-   * attempt to connect to the selected target if possible.
+   * @note This may select a suspect server. The caller must attempt to connect to the selected
+   * target if possible.
    *
-   * @param now Current time to use for aliveness calculations.
+   * @param now Current time to use for HostDBInfo state calculations.
    * @param fail_window Blackout time for down servers.
-   * @return Status of the updated target.
-   *
-   * If the return value is @c HostDBInfo::Status::DOWN this means all targets are down and there is
-   * no valid upstream.
+   * @return The selected target, or @c nullptr if all targets are down.
    *
    * @note Concurrency - this is not done under lock and depends on the caller for correct use.
    * For strict round robin, it is a feature that every call will get a distinct index. For
@@ -434,9 +354,9 @@ class HostDBRecord : public RefCountObj
    * This accounts for the round robin setting. The default is to use "client affinity" in
    * which case @a hash_addr is as a hash seed to select the target.
    *
-   * This may select a zombie target, which can be detected by checking the target's last
-   * failure time. If it is not @c TS_TIME_ZERO the target is a zombie. Other transactions will
-   * be blocked from selecting that target until @a fail_window time has passed.
+   * This may select a suspect target (fail window elapsed, connections permitted again), which can
+   * be detected by checking the target's last failure time. If it is not @c TS_TIME_ZERO the target
+   * is a suspect. Multiple threads may concurrently select the same suspect target.
    *
    * In cases other than strict round robin, a base target is selected. If valid, that is returned,
    * but if not then the targets in this record are searched until a valid one is found. The result
@@ -588,7 +508,7 @@ struct ResolveInfo {
 
   /// Keep a reference to the base HostDB object, so it doesn't get GC'd.
   Ptr<HostDBRecord> record;
-  HostDBInfo       *active = nullptr; ///< Active host record.
+  HostDBInfo       *active = nullptr; ///< Active HostDBInfo
 
   /// Working address. The meaning / source of the value depends on other elements.
   /// This is the "resolved" address if @a resolved_p is @c true.
@@ -646,19 +566,20 @@ struct ResolveInfo {
    */
   bool resolve_immediate();
 
-  /** Mark the active target as down.
+  /** Mark the active target as DOWN.
    *
-   * @param now Time of failure.
+   * @param now         Time of failure.
+   * @param fail_window The fail window duration (proxy.config.http.down_server.cache_time).
    * @return @c true if the server was marked as down, @c false if not.
    *
    */
-  bool mark_active_server_down(ts_time now);
+  bool mark_active_server_down(ts_time now, ts_seconds fail_window);
 
-  /** Mark the active target as alive.
+  /** Mark the active target as UP.
    *
    * @return @c true if the target changed state.
    */
-  bool mark_active_server_alive();
+  bool mark_active_server_up();
 
   /// Select / resolve to the next RR entry for the record.
   bool select_next_rr();
@@ -863,15 +784,15 @@ ResolveInfo::set_active(sockaddr const *s)
 }
 
 inline bool
-ResolveInfo::mark_active_server_alive()
+ResolveInfo::mark_active_server_up()
 {
   return active->mark_up();
 }
 
 inline bool
-ResolveInfo::mark_active_server_down(ts_time now)
+ResolveInfo::mark_active_server_down(ts_time now, ts_seconds fail_window)
 {
-  return active != nullptr && active->mark_down(now);
+  return active != nullptr && active->mark_down(now, fail_window);
 }
 
 inline bool

diff --git a/src/iocore/hostdb/CMakeLists.txt b/src/iocore/hostdb/CMakeLists.txt
@@ -45,4 +45,5 @@ if(BUILD_TESTING)
   )
   add_catch2_test(NAME test_hostdb_RefCountCache COMMAND $<TARGET_FILE:test_RefCountCache>)
 
+  add_subdirectory(unit_tests)
 endif()
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,4 +45,5 @@ if(BUILD_TESTING) @@
       )
       add_catch2_test(NAME test_hostdb_RefCountCache COMMAND $<TARGET_FILE:test_RefCountCache>)
+      add_subdirectory(unit_tests)
     endif()