From 9e029e2460a9b1f86c4c3bc1d78085c4ebb5ef64 Mon Sep 17 00:00:00 2001 From: adrianodemarino Date: Mon, 25 Aug 2025 18:30:04 +0200 Subject: [PATCH 1/2] Add numerical stability to HMM weight calculation, prevent division by zero and round to 8 decimals for consistent results, avoiding micro-variations in high-throughput environments like AWS Fargate in prod-us --- modules/hmm_utils.py | 5 ++++- pyproject.toml | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/hmm_utils.py b/modules/hmm_utils.py index 8bda4a8..224829b 100644 --- a/modules/hmm_utils.py +++ b/modules/hmm_utils.py @@ -157,7 +157,10 @@ def run(self) -> None: ) weights[-2, :] = weights[-1, :] - weights = weights / weights.sum(axis=1, keepdims=True) + weight_sums = weights.sum(axis=1, keepdims=True) + weight_sums = np.where(weight_sums == 0, 1.0, weight_sums) + weights = weights / weight_sums + weights = np.round(weights, 8) weights[weights < 1 / (self.matches.size + 1)] = 0 # create sparse matrix of entire reference panel diff --git a/pyproject.toml b/pyproject.toml index e9cae49..de64701 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "selphi" -version = "1.5.2" +version = "1.5.3" description = "Genotype imputation using PBWT and haplotype selection" authors = ["Adriano De Marino ", "Sandra Bohn "] readme = "README.md" @@ -15,5 +15,5 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.5.2" +version = "1.5.3" version_files = ["pyproject.toml:version"] From 42a85a74af1dfc75e0bcdf7df22956402563e6f2 Mon Sep 17 00:00:00 2001 From: adrianodemarino Date: Mon, 25 Aug 2025 20:11:18 +0200 Subject: [PATCH 2/2] removing sum == 0 --- modules/hmm_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/hmm_utils.py b/modules/hmm_utils.py index 224829b..70feae7 100644 --- a/modules/hmm_utils.py +++ b/modules/hmm_utils.py @@ -157,10 +157,7 @@ def run(self) -> None: ) weights[-2, :] = weights[-1, :] - weight_sums = weights.sum(axis=1, keepdims=True) - weight_sums = np.where(weight_sums == 0, 1.0, weight_sums) - weights = weights / weight_sums - weights = np.round(weights, 8) + weights = np.round(weights / weights.sum(axis=1, keepdims=True), 8) weights[weights < 1 / (self.matches.size + 1)] = 0 # create sparse matrix of entire reference panel