From 9d55d8ef12811a63fcf073787fc3f42d5d8bea4c Mon Sep 17 00:00:00 2001
From: lmchilton <lauren.chilton26@gmail.com>
Date: Fri, 10 Apr 2026 12:36:31 -0400
Subject: [PATCH] pmdaopentelemetry: optimization & bug fixes

optimized code to ensure smooth operation under load.
fixes for RHEL-130498 and RHEL-130499: updated the
status code control metric to show an error code of 1
for failing scripts. Added code to allow for more lenient
parsing to match opentelemetry specs.

updated qa tests #1843 and #1726
updated man page
---
 qa/1726.out                                   |   2 +-
 qa/1843.out                                   |  16 +-
 qa/opentelemetry/scripts/failing_script.sh    |   2 +
 src/pmdas/opentelemetry/pmdaopentelemetry.1   |   7 +-
 .../opentelemetry/pmdaopentelemetry.python    | 217 +++++++++---------
 5 files changed, 129 insertions(+), 115 deletions(-)
 create mode 100755 qa/opentelemetry/scripts/failing_script.sh

diff --git a/qa/1726.out b/qa/1726.out
index f327a1959b..9c79cb6394 100644
--- a/qa/1726.out
+++ b/qa/1726.out
@@ -45,7 +45,7 @@ I am a Gauge
     value 10
 
 opentelemetry.simplemetric.metric3 [I am a Histogram]
-    Data Type: 64-bit int  InDom: 164.10242 0x29002802
+    Data Type: 64-bit int  InDom: 164.11266 0x29002c02
     Semantics: counter  Units: none
 Help:
 I am a Histogram
diff --git a/qa/1843.out b/qa/1843.out
index 8c438dccde..e9716e67e7 100644
--- a/qa/1843.out
+++ b/qa/1843.out
@@ -10,11 +10,12 @@ opentelemetry.control.status_code PMID: 164.0.6 [per-end-point source URL respon
     Data Type: 32-bit int  InDom: 164.0 0x29000000
     Semantics: discrete  Units: none
     inst [0 or "control"] value 0
-    inst [1 or "curl.script"] value 0
+    inst [1 or "curl.script"] value 1
     inst [2 or "curl_scripted"] value 0
     inst [3 or "duplicate"] value 0
-    inst [4 or "labelfiltering"] value 0
-    inst [5 or "python_scripted"] value 0
+    inst [4 or "failing_script"] value 1
+    inst [5 or "labelfiltering"] value 0
+    inst [6 or "python_scripted"] value 0
 
 -- source re-addition --
 opentelemetry.simplemetric.metric3_sum
@@ -29,12 +30,13 @@ opentelemetry.control.status_code PMID: 164.0.6 [per-end-point source URL respon
     Data Type: 32-bit int  InDom: 164.0 0x29000000
     Semantics: discrete  Units: none
     inst [0 or "control"] value 0
-    inst [1 or "curl.script"] value 0
+    inst [1 or "curl.script"] value 1
     inst [2 or "curl_scripted"] value 0
     inst [3 or "duplicate"] value 0
-    inst [4 or "labelfiltering"] value 0
-    inst [5 or "python_scripted"] value 0
-    inst [6 or "simplemetric"] value 0
+    inst [4 or "failing_script"] value 1
+    inst [5 or "labelfiltering"] value 0
+    inst [6 or "python_scripted"] value 0
+    inst [7 or "simplemetric"] value 0
 
 -- metric removal of recognized source/metric --
 Error: opentelemetry.simplemetric: Unknown metric name
diff --git a/qa/opentelemetry/scripts/failing_script.sh b/qa/opentelemetry/scripts/failing_script.sh
new file mode 100755
index 0000000000..ecdbef95dd
--- /dev/null
+++ b/qa/opentelemetry/scripts/failing_script.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 1
diff --git a/src/pmdas/opentelemetry/pmdaopentelemetry.1 b/src/pmdas/opentelemetry/pmdaopentelemetry.1
index 4a24df9d4a..6b502453a1 100644
--- a/src/pmdas/opentelemetry/pmdaopentelemetry.1
+++ b/src/pmdas/opentelemetry/pmdaopentelemetry.1
@@ -557,7 +557,7 @@ but the instance domain will be empty and a fetch will return no values.
 A string representing the status of the last fetch of the corresponding source.
 This will generally be
 .B success
-for an http response code of 200.
+for an http response code of 200 or an exit code of 0 for scripted sources.
 This metric can be used for service availability monitoring -
 provided, as stated above, the corresponding source URL is fetched
 too.
@@ -567,7 +567,10 @@ This metric is similar to
 except that it is the integer response code of the last fetch.
 A value of
 .B 200
-usually signifies success and any other value failure.
+usually signifies success and any other value failure. For scripted
+sources, a
+.B zero
+exit code signifies sucess and any non zero code failure.
 This metric can also be used for service availability monitoring,
 with the same caveats as
 .BR opentelemetry.control.status .
diff --git a/src/pmdas/opentelemetry/pmdaopentelemetry.python b/src/pmdas/opentelemetry/pmdaopentelemetry.python
index 35cc84978a..6e86e71ace 100644
--- a/src/pmdas/opentelemetry/pmdaopentelemetry.python
+++ b/src/pmdas/opentelemetry/pmdaopentelemetry.python
@@ -291,28 +291,34 @@ class Metric(object):
         ''' Store instance/value pair of metric with > 1 instances '''
         # keep track of if the number of instances in the inst name list matches the num of inst values ?
         if len(instances) != len(values):
-            self.source.pmda.err("error num instances doesn't match num values")
-        if instances[0] == "inst-0":
-            self.indom_table.prefix_mode = True # Mark for instance# prefixed names
+            self.source.pmda.err(f"Metric {self.mname}: instances({len(instances)}) != values({len(values)})")
+            return
+
+        if instances and instances[0] == "inst-0":
+            self.indom_table.prefix_mode = True
+
+        val_dict = self.values
+        lbl_dict = self.inst_labels
+        lookup = self.indom_table.intern_lookup_value
 
         if metric_type in ("histogram", "summary"):
-            for idx in range (len(instances)):
-                included_labels, optional_labels = self.source.filter_labelset(labels)
-                inst = self.indom_table.intern_lookup_value(instances[idx])
-                self.values[inst] = values[idx]
-                self.inst_labels[inst] = included_labels
+            included_labels, _ = self.source.filter_labelset(labels)
+            for iname, val in zip(instances, values):
+                inst = lookup(iname)
+                val_dict[inst] = val
+                lbl_dict[inst] = included_labels
+
                 if self.source.pmda.dbg:
-                    self.source.pmda.debug('store_inst mname=%s inst=%d instname="%s" value="%s"' % (self.mname, inst, instances[idx], values[idx]))
-                    self.source.pmda.debug('store_inst mname=%s inst_labels[%d]=%s' % (self.mname, inst, included_labels))
+                    self.source.pmda.debug('store_inst mname=%s inst=%d instname="%s" value="%s"' % (self.mname, inst, iname, val))
         else:
-            for idx in range (len(instances)):
-                included_labels, optional_labels = self.source.filter_labelset(labels[idx][idx])
-                inst = self.indom_table.intern_lookup_value(instances[idx])
-                self.values[inst] = values[idx]
-                self.inst_labels[inst] = included_labels
+            for iname, val, lbl in zip(instances, values, labels):
+                inst = lookup(iname)
+                val_dict[inst] = val
+                lbl_dict[inst] = lbl
+
                 if self.source.pmda.dbg:
-                    self.source.pmda.debug('store_inst mname=%s inst=%d instname="%s" value="%s"' % (self.mname, inst, instances[idx], values[idx]))
-                    self.source.pmda.debug('store_inst mname=%s inst_labels[%d]=%s' % (self.mname, inst, included_labels))
+                    self.source.pmda.debug('store_inst mname=%s inst=%d instname="%s" value="%s"' % (self.mname, inst, iname, val))
+
 
     def save(self):
         if self.indom_table is not None:
@@ -458,6 +464,19 @@ class Source(object):
         self.metadatalist = None
         self.document = None
 
+        self.metric_type_map = {
+            "sum":       ("counter", "sum"),
+            "gauge":     ("gauge",   "gauge"),
+            "histogram": ("counter", "histogram"),
+            "summary":   ("counter", "summary")
+        }
+
+        self.metric_data_type_map = {
+            "asInt": "int",
+            "asDouble": "double",
+            "asString": "string"
+        }
+
         self.refresh_time = 0 # "never"
         if not is_scripted:
             # source is a URL.  Create a session for it and initialize a few things
@@ -565,15 +584,13 @@ class Source(object):
     def add_metric(self, metric):
         try:
             mname = metric["name"]
-            fullname = ("opentelemetry.%s.%s") % (self.name, mname)
+            fullname = f"opentelemetry.{self.name}.{mname}"
             if metric["type"] == "histogram" or metric["type"] == "summary":
                 included_labels, optional_labels = self.filter_labelset(metric["labels"])
             elif len(metric["labels"]) == 0:
                 included_labels, optional_labels = self.filter_labelset(metric["labels"][0])
             else:
-                all_labels = {}
-                for idx in range (len(metric["labels"])):
-                    all_labels.update(metric["labels"][idx][idx])
+                all_labels = {k: v for idx, d in enumerate(metric["labels"]) for k, v in d[idx].items()}
                 included_labels, optional_labels = self.filter_labelset(all_labels)
             self.pmda.debug("included_labels '%s'" % (included_labels)) if self.pmda.dbg else None
             self.pmda.debug("optional_labels '%s'" % (optional_labels)) if self.pmda.dbg else None
@@ -645,115 +662,105 @@ class Source(object):
     def parse_metric(self, resource_name, scope_name, metric_dictionary):
         num_metrics = 0
         metric = {}
-        metric["prefix_name"] = "%s.%s" % (resource_name, scope_name)
+        metric["prefix_name"] = f"{resource_name}.{scope_name}"
         metric["name"] = metric_dictionary["name"]
-        metric["unit"] = metric_dictionary["unit"]
-        metric["description"] = metric_dictionary["description"]
+        metric["unit"] = metric_dictionary.get("unit", "")
+        metric["description"] = metric_dictionary.get("description", "")
 
         # determine metric type
-        if "sum" in metric_dictionary:  # sum or counter
-            metric["semantic"] = "counter"
-            metric["type"] = "sum"
-        elif "gauge" in metric_dictionary:  # gauge
-            metric["semantic"] = "gauge"
-            metric["type"] = "gauge"
-        elif "histogram" in metric_dictionary:  # histogram
-            metric["semantic"] = "counter"
-            metric["type"] = "histogram"
-        elif "summary" in metric_dictionary:    # sum
-            metric["semantic"] = "counter"
-            metric["type"] = "summary"
+        found_type = next((t for t in self.metric_type_map if t in metric_dictionary), None)
+        if found_type:
+            metric["semantic"], metric["type"] = self.metric_type_map[found_type]
         else:
-            # what to do in this case? set to gauge or throw an exception?
-            self.pmda.debug("error") if self.pmda.dbg else None
+            metric["semantic"], metric["type"] = ("gauge", "gauge") # Default to gauge
+            if self.pmda.dbg: self.pmda.debug("Unknown OTel metric type")
 
-        if "aggregationTemporality" in metric_dictionary[metric["type"]]:
-            metric["a_temp"] = metric_dictionary[metric["type"]]["aggregationTemporality"]
+        m_type = metric["type"]
+        inner_dict = metric_dictionary[m_type]
 
-        if "isMonotonic" in metric_dictionary[metric["type"]]:
-            metric["isMonotonic"] = metric_dictionary[metric["type"]]["isMonotonic"]
-        else:
-            metric["isMonotonic"] = False
+        metric["a_temp"] = inner_dict.get("aggregationTemporality", "")
+
+        metric["isMonotonic"] = inner_dict.get("isMonotonic", False)
 
         # determine if metric is singular
-        if len(metric_dictionary[metric["type"]]["dataPoints"]) == 1 and metric["type"] != "histogram" and metric["type"] != "summary":
-            metric["is_singular"] = True
-        else:
-            metric["is_singular"] = False
+        data_points = inner_dict.get("dataPoints", [])
+        metric["is_singular"] = (
+            len(data_points) == 1 and
+            m_type not in ("histogram", "summary")
+        )
 
         metric["values"] = []
         metric["instances"] = []
         metric["labels"] = []
-        data_points = metric_dictionary[metric["type"]]["dataPoints"]
-        if metric["type"] == "sum" or metric["type"] == "gauge":
-            inst_num = 0
-            for instance in data_points:
-                metric["time"] = instance["timeUnixNano"]
+        for inst_num, instance in enumerate(data_points):
+            self.pmda.debug("instance: %s" % instance) if self.pmda.dbg else None
+            metric["time"] = instance.get("timeUnixNano", "")
+            if metric["type"] in ("sum", "gauge"):
                 labels = self.collect_attributes(instance["attributes"])
                 metric["labels"].append({inst_num: labels})
 
                 if "instance" in metric["labels"][inst_num]:
                     metric["instances"].append(metric["labels"][inst_num]["instance"])
-                    inst_num += 1
                 elif "instname" in metric["labels"]:
                     metric["instances"].append(metric["labels"][inst_num]["instname"])
-                    inst_num += 1
                 else:
                     metric["instances"].append("inst-%s" % inst_num)
-                    inst_num += 1
-
-                if "asDouble" in instance:
-                    metric["data_type"] = "double"
-                    metric["values"].append(instance["asDouble"])
-                elif "asInt" in instance:
-                    metric["data_type"] = "int"
-                    metric["values"].append(instance["asInt"])
-                elif "asString" in instance:
-                    metric["data_type"] = "string"
-                    metric["values"].append(instance["asString"])
-                else:
-                    self.pmda.debug("error2") if self.pmda.dbg else None
-            if self.add_metric(metric):
-                num_metrics += 1
-        else: # histogram or sum
-            for instance in data_points:
-                count_attr = instance["count"]
-                sum_attr = instance["sum"]
-                metric["time"] = instance["timeUnixNano"]
-                metric["data_type"] = "int"
-                metric["labels"] = self.collect_attributes(instance["attributes"])
-                if metric["type"] == "histogram":
-                    self.pmda.debug("instance: %s" % instance) if self.pmda.dbg else None
-                    metric["values"] = instance["bucketCounts"]
-                    for item in instance["explicitBounds"]:
-                        metric["instances"].append("le=%s" % item)
-                    metric["instances"].append("le=inf")
-                else: # summary
-                    quantiles = instance["quantileValues"]
-                    metric["data_type"] = "double"
-                    for item in quantiles:
-                        metric["instances"].append("quantile: %s" % item["quantile"])
-                        metric["values"].append(item["value"])
-
-                if self.add_metric(metric):
-                    num_metrics += 1
 
-                # add additional metrics for histogram type
-                metric_count_name = metric["name"] + "_count"
-                metric_sum_name = metric["name"] + "_sum"
+                key = next((t for t in self.metric_data_type_map if t in instance), None)
+                metric["data_type"] = self.metric_data_type_map.get(key)
+                self.pmda.debug("d_type: %s" % d_type) if self.pmda.dbg else None
+                metric["values"].append(instance[key])
 
-                metric["name"] = metric_count_name
-                metric["values"].clear(); metric["values"].append(count_attr)
-                metric["is_singular"] = True
-                metric["data_type"] = "int"
                 if self.add_metric(metric):
                     num_metrics += 1
+            # histogram or sum metric
+            else:
+                m_time = instance.get("timeUnixNano")
+                m_labels = self.collect_attributes(instance.get("attributes"))
+                base_name = metric["name"]
+
+                main_metric = metric.copy()
+                main_metric.update({
+                    "time": m_time,
+                    "instances": [],
+                    "values": [],
+                    "labels": m_labels,
+                    "is_singular": False
+                })
+
+                if m_type == "histogram":
+                    main_metric["data_type"] = "int"
+                    main_metric["values"] = instance.get("bucketCounts", [])
+                    main_metric["instances"] = [f"le={b}" for b in instance.get("explicitBounds", [])]
+                    main_metric["instances"].append("le=inf")
+                else: # summary
+                    main_metric["data_type"] = "double"
+                    quantiles = instance.get("quantileValues", [])
+                    main_metric["instances"] = [f"quantile: {q['quantile']}" for q in quantiles]
+                    main_metric["values"] = [q['value'] for q in quantiles]
 
-                metric["name"] = metric_sum_name
-                metric["values"].clear(); metric["values"].append(sum_attr)
-                metric["data_type"] = "double"
-                if self.add_metric(metric):
+                if self.add_metric(main_metric):
                     num_metrics += 1
+
+                suffix_data = (
+                    ("_count", instance.get("count", 0), "int"),
+                    ("_sum", instance.get("sum", 0), "double")
+                )
+
+                for suffix, val, d_type in suffix_data:
+                    sub_metric = metric.copy()
+                    sub_metric.update({
+                        "name": f"{base_name}{suffix}",
+                        "time": m_time,
+                        "values": [val],
+                        "data_type": d_type,
+                        "instances": ["total"],
+                        "labels": m_labels,
+                        "is_singular": True
+                    })
+                    if self.add_metric(sub_metric):
+                        num_metrics += 1
+
         return num_metrics
 
     def parse_lines(self, text):
@@ -762,7 +769,7 @@ class Source(object):
 
         for resource in data:
             resource_attributes = self.collect_attributes(resource["resource"]["attributes"])
-            resource_name = resource_attributes["service_name"]
+            resource_name = resource_attributes.get("service_name", "unknown_service")
             scope_metrics = resource["scopeMetrics"]
             for scope in scope_metrics:
                 scope_name = scope["scope"]["name"]
@@ -907,7 +914,7 @@ class Source(object):
 
         except Exception as e:
             self.pmda.stats_status[self.cluster] = 'failed to fetch URL or execute script %s: %s' % (self.path, e)
-            self.pmda.stats_status_code[self.cluster] = status_code
+            self.pmda.stats_status_code[self.cluster] = 1
             self.pmda.debug('cannot fetch URL or execute script %s: %s' % (self.path, e))
 
     def refresh2(self, timeout):