From 3c38db66a8a4af71dd0943f2e78ee1002606d3ec Mon Sep 17 00:00:00 2001 From: Mohit-Lakra Date: Tue, 20 Jan 2026 18:17:40 +0530 Subject: [PATCH] Add sampling-driven dFBA module and optional dependency guards --- README.md | 37 +++++- dingo/MetabolicNetwork.py | 7 +- dingo/__init__.py | 31 ++++- dingo/dfba.py | 233 ++++++++++++++++++++++++++++++++++++++ dingo/loading_models.py | 22 +++- dingo/nullspace.py | 14 ++- tests/test_dfba.py | 89 +++++++++++++++ 7 files changed, 421 insertions(+), 12 deletions(-) create mode 100644 dingo/dfba.py create mode 100644 tests/test_dfba.py diff --git a/README.md b/README.md index 9e122771..d34a7585 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,42 @@ The default option is to run the sequential [Multiphase Monte Carlo Sampling alg **Tip**: After the first run of MMCS algorithm the polytope stored in object `sampler` is usually more rounded than the initial one. Thus, the function `generate_steady_states()` becomes more efficient from run to run. +## Dynamic Flux Balance Analysis (dFBA) + +Dynamic FBA often suffers from non-unique steady-state flux distributions when +transitioning from one time step to the next (Mahadevan et al., 2002). The +DFBAlab project (Sánchez et al., 2014) addresses this via lexicographic linear +programming, while COMETS (Harcombe et al., 2014; Borenstein et al., 2021) +combines optimization with dynamic resource tracking. `dingo` now provides a +`DynamicFBA` helper that follows those ideas but resolves the non-uniqueness via +sampling: at each step the simulator draws multiple optimal flux states, +selects the one closest to the previous profile, and updates the extracellular +medium according to the sampled exchange rates. + +```python +from dingo import MetabolicNetwork, DynamicFBA + +model = MetabolicNetwork.from_json('path/to/model.json') +simulator = DynamicFBA( + model, + time_step=0.25, + total_time=24.0, + initial_biomass=0.1, + initial_concentrations={"EX_glc__D_e": 5.0, "EX_o2_e": 10.0}, + sample_size=64, +) + +result = simulator.run() +print(result.time) +print(result.biomass) +print(result.concentrations["EX_glc__D_e"]) +``` + +`DynamicFBAResult` exposes the time grid, biomass trajectory, extracellular +concentrations, and the flux vector picked at each step. Advanced users can +adjust the sampling method and its hyper-parameters to explore different dFBA +variants or couple multiple organisms (see also DOI:10.1371/journal.pcbi.1007786). + #### Rounding the polytope @@ -316,4 +352,3 @@ The default number of cells is 5x5=25. dingo uses the package `plotly` for plott ![histogram](./doc/aconta_ppc_copula.png) - diff --git a/dingo/MetabolicNetwork.py b/dingo/MetabolicNetwork.py index 7be66c0a..694ae6c9 100644 --- a/dingo/MetabolicNetwork.py +++ b/dingo/MetabolicNetwork.py @@ -10,7 +10,10 @@ import numpy as np import sys from typing import Dict -import cobra +try: + import cobra +except ImportError: # pragma: no cover - optional dependency + cobra = None from dingo.loading_models import read_json_file, read_mat_file, read_sbml_file, parse_cobra_model from dingo.pyoptinterface_based_impl import fba,fva,inner_ball,remove_redundant_facets @@ -84,6 +87,8 @@ def from_sbml(cls, arg): @classmethod def from_cobra_model(cls, arg): + if cobra is None: + raise ImportError("cobra is required to build a MetabolicNetwork from a cobra model.") if (not isinstance(arg, cobra.core.model.Model)): raise Exception( "An unknown input format given to initialize a metabolic network object." diff --git a/dingo/__init__.py b/dingo/__init__.py index 067c8b98..16d195a6 100644 --- a/dingo/__init__.py +++ b/dingo/__init__.py @@ -19,17 +19,28 @@ get_matrices_of_low_dim_polytope, get_matrices_of_full_dim_polytope, ) -from dingo.illustrations import ( - plot_copula, - plot_histogram, -) +try: + from dingo.illustrations import ( + plot_copula, + plot_histogram, + ) +except ImportError: # pragma: no cover - optional plotting deps + plot_copula = None + plot_histogram = None from dingo.parser import dingo_args from dingo.MetabolicNetwork import MetabolicNetwork -from dingo.PolytopeSampler import PolytopeSampler +try: + from dingo.PolytopeSampler import PolytopeSampler +except ImportError: # pragma: no cover - optional sampling deps + PolytopeSampler = None +from dingo.dfba import DynamicFBA, DynamicFBAResult from dingo.pyoptinterface_based_impl import fba, fva, inner_ball, remove_redundant_facets, set_default_solver -from volestipy import HPolytope +try: + from volestipy import HPolytope +except ImportError: # pragma: no cover - optional sampling deps + HPolytope = None def get_name(args_network): @@ -86,6 +97,11 @@ def dingo_main(): if args.histogram: + if plot_histogram is None: + raise ImportError( + "matplotlib and plotly are required to plot histograms." + ) + if args.steady_states is None: raise Exception( "A path to a pickle file that contains steady states of the model has to be given." @@ -164,6 +180,9 @@ def dingo_main(): else: raise Exception("An unknown format file given.") + if PolytopeSampler is None: + raise ImportError("volestipy is required to sample steady states.") + sampler = PolytopeSampler(model) if args.preprocess_only: diff --git a/dingo/dfba.py b/dingo/dfba.py new file mode 100644 index 00000000..d082304a --- /dev/null +++ b/dingo/dfba.py @@ -0,0 +1,233 @@ +"""Dynamic flux balance analysis utilities. + +This module implements a sampling-assisted dynamic FBA (dFBA) workflow inspired by +Mahadevan et al. (2002), Sánchez et al. (2014) and COMETS (Harcombe et al. 2014, +Borenstein et al. 2021). The non-uniqueness of the steady-state FBA solutions is +mitigated by sampling feasible flux distributions at every time step and picking +(at steady state) the flux profile that is closest to the one selected during the +previous step. During the first step the mean value of the sampled fluxes is +used, mirroring the suggestion in DFBAlab. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, Optional + +import math +import warnings + +import numpy as np + +from dingo.MetabolicNetwork import MetabolicNetwork + +try: + from dingo.PolytopeSampler import PolytopeSampler +except ImportError: # pragma: no cover - optional dependency (volestipy) + PolytopeSampler = None + + +@dataclass +class DynamicFBAResult: + """Container collecting the time series produced by a dFBA simulation.""" + + time: np.ndarray + biomass: np.ndarray + concentrations: Dict[str, np.ndarray] + fluxes: np.ndarray + + +class DynamicFBA: + """Dynamic FBA simulator that breaks ties via sampling. + + Parameters + ---------- + model : MetabolicNetwork + Metabolic network that will be simulated. + time_step : float + Time step in hours. + total_time : float + Total length of the simulation horizon in hours. + initial_biomass : float + Initial biomass concentration (gDW/L or in units compatible with the + biomass reaction). + initial_concentrations : dict + Mapping from exchange reaction id to extracellular concentration + (mmol/L or any unit consistent with the flux units). + sample_size : int, optional + Number of flux samples generated at every step (default 128). + sampler_method : str, optional + One of the sampling routines supported by + :func:`PolytopeSampler.generate_steady_states_no_multiphase`. + sampler_kwargs : dict, optional + Extra keyword arguments forwarded to the sampling routine. + biomass_tol : float, optional + Threshold below which the biomass is considered extinct and substrate + uptake is disabled. + """ + + def __init__( + self, + model: MetabolicNetwork, + time_step: float, + total_time: float, + initial_biomass: float, + initial_concentrations: Dict[str, float], + sample_size: int = 128, + sampler_method: str = "billiard_walk", + sampler_kwargs: Optional[Dict[str, float]] = None, + biomass_tol: float = 1e-12, + ) -> None: + if time_step <= 0: + raise ValueError("time_step has to be strictly positive.") + if total_time <= 0: + raise ValueError("total_time has to be strictly positive.") + if initial_biomass < 0: + raise ValueError("initial_biomass must be non-negative.") + if sample_size < 1: + raise ValueError("sample_size must be positive.") + if model.biomass_index is None: + raise ValueError("The metabolic network must expose a biomass reaction.") + + self._model = model + self._dt = float(time_step) + self._total_time = float(total_time) + self._initial_biomass = float(initial_biomass) + self._sample_size = max(1, int(sample_size)) + self._sampler_method = sampler_method + self._biomass_tol = float(biomass_tol) + self._n_steps = int(math.ceil(self._total_time / self._dt)) + + self._rxn_index = {rid: idx for idx, rid in enumerate(self._model.reactions)} + self._tracked_exchanges = self._validate_concentrations(initial_concentrations) + self._initial_concentrations = { + rxn_id: float(initial_concentrations[rxn_id]) + for rxn_id in self._tracked_exchanges + } + + self._sampler_kwargs = { + "burn_in": 0, + "thinning": 1, + "variance": 1.0, + "bias_vector": None, + "ess": max(100, self._sample_size), + } + if sampler_kwargs: + self._sampler_kwargs.update(sampler_kwargs) + + self._base_lb = np.copy(self._model.lb) + + def _validate_concentrations(self, concentrations: Dict[str, float]) -> Dict[str, int]: + if not concentrations: + raise ValueError("At least one extracellular concentration must be provided.") + result = {} + for rxn_id, conc in concentrations.items(): + if rxn_id not in self._rxn_index: + raise KeyError(f"Reaction {rxn_id} not present in the model.") + if conc < 0: + raise ValueError(f"Concentration for {rxn_id} must be non-negative.") + result[rxn_id] = self._rxn_index[rxn_id] + return result + + def run(self) -> DynamicFBAResult: + """Simulate the model dynamics with sampling-guided flux selection.""" + + biomass = self._initial_biomass + concentrations = {rid: val for rid, val in self._initial_concentrations.items()} + time_trace = [0.0] + biomass_trace = [biomass] + conc_trace: Dict[str, list] = { + rid: [val] for rid, val in concentrations.items() + } + flux_trace = [] + prev_flux: Optional[np.ndarray] = None + + try: + for step in range(self._n_steps): + self._apply_dynamic_bounds(concentrations, biomass) + flux = self._select_flux(prev_flux) + flux_trace.append(flux) + prev_flux = flux.copy() + + mu = flux[self._model.biomass_index] + biomass_prev = biomass + biomass = self._update_biomass(biomass, mu) + biomass_trace.append(biomass) + + for rxn_id, idx in self._tracked_exchanges.items(): + concentrations[rxn_id] = self._update_concentration( + concentrations[rxn_id], flux[idx], biomass_prev + ) + conc_trace[rxn_id].append(concentrations[rxn_id]) + + time_trace.append(min(self._total_time, (step + 1) * self._dt)) + finally: + # Restore the base bounds so that the model can be reused afterwards. + self._model.lb = np.copy(self._base_lb) + + time_arr = np.asarray(time_trace) + biomass_arr = np.asarray(biomass_trace) + conc_history = {rid: np.asarray(values) for rid, values in conc_trace.items()} + flux_arr = np.asarray(flux_trace) + + return DynamicFBAResult(time_arr, biomass_arr, conc_history, flux_arr) + + def _apply_dynamic_bounds(self, concentrations: Dict[str, float], biomass: float) -> None: + lb = np.copy(self._base_lb) + if biomass <= self._biomass_tol: + for idx in self._tracked_exchanges.values(): + lb[idx] = 0.0 + else: + for rxn_id, idx in self._tracked_exchanges.items(): + conc = concentrations[rxn_id] + limit = -conc / (self._dt * biomass) + lb[idx] = max(lb[idx], limit) + self._model.lb = lb + + def _select_flux(self, prev_flux: Optional[np.ndarray]) -> np.ndarray: + try: + samples = self._sample_fluxes() + except Exception as exc: # pragma: no cover - defensive path + warnings.warn( + f"Sampling failed ({exc}). Falling back to a single FBA solution.", + RuntimeWarning, + ) + return self._fallback_flux() + + if samples.size == 0: + return self._fallback_flux() + + if prev_flux is None: + selected = samples.mean(axis=0) + else: + distances = np.linalg.norm(samples - prev_flux, axis=1) + selected = samples[np.argmin(distances)] + + return np.asarray(selected, dtype=float) + + def _sample_fluxes(self) -> np.ndarray: + if PolytopeSampler is None: + raise ImportError("volestipy is required for sampling in DynamicFBA.") + + sampler = PolytopeSampler(self._model) + steady_states = sampler.generate_steady_states_no_multiphase( + method=self._sampler_method, + n=self._sample_size, + burn_in=int(self._sampler_kwargs.get("burn_in", 0)), + thinning=int(self._sampler_kwargs.get("thinning", 1)), + variance=float(self._sampler_kwargs.get("variance", 1.0)), + bias_vector=self._sampler_kwargs.get("bias_vector"), + ess=int(self._sampler_kwargs.get("ess", self._sample_size)), + ) + return np.asarray(steady_states.T, dtype=float) + + def _fallback_flux(self) -> np.ndarray: + flux, _ = self._model.fba() + return np.asarray(flux, dtype=float) + + def _update_biomass(self, biomass: float, growth_rate: float) -> float: + return max(0.0, biomass + self._dt * growth_rate * biomass) + + def _update_concentration(self, conc: float, exchange_flux: float, biomass: float) -> float: + new_conc = conc + self._dt * exchange_flux * biomass + return max(0.0, new_conc) diff --git a/dingo/loading_models.py b/dingo/loading_models.py index 84efceb5..0d583ced 100644 --- a/dingo/loading_models.py +++ b/dingo/loading_models.py @@ -8,7 +8,18 @@ import json import numpy as np -import cobra + +try: + import cobra +except ImportError: # pragma: no cover - optional dependency + cobra = None + + +def _require_cobra(): + if cobra is None: + raise ImportError( + "cobra is required to read JSON/MAT/SBML metabolic network files." + ) def read_json_file(input_file): """A Python function to Read a Bigg json file and returns, @@ -23,6 +34,8 @@ def read_json_file(input_file): input_file -- a json file that contains the information about a mettabolic network, for example see http://bigg.ucsd.edu/models """ + _require_cobra() + try: cobra.io.load_matlab_model( input_file ) except: @@ -45,6 +58,8 @@ def read_mat_file(input_file): Keyword arguments: input_file -- a mat file that contains a MATLAB structure with the information about a mettabolic network, for example see http://bigg.ucsd.edu/models """ + _require_cobra() + try: cobra.io.load_matlab_model( input_file ) except: @@ -71,6 +86,8 @@ def read_sbml_file(input_file): input_file -- a xml file that contains an SBML model with the information about a mettabolic network, for example see: https://github.com/VirtualMetabolicHuman/AGORA/blob/master/CurrentVersion/AGORA_1_03/AGORA_1_03_sbml/Abiotrophia_defectiva_ATCC_49176.xml """ + _require_cobra() + try: cobra.io.read_sbml_model( input_file ) except: @@ -82,6 +99,7 @@ def read_sbml_file(input_file): return (parse_cobra_model( model )) def parse_cobra_model(cobra_model): + _require_cobra() inf_bound=1e5 @@ -141,5 +159,3 @@ def parse_cobra_model(cobra_model): - - diff --git a/dingo/nullspace.py b/dingo/nullspace.py index f6ca33ed..f12be7b8 100644 --- a/dingo/nullspace.py +++ b/dingo/nullspace.py @@ -8,8 +8,13 @@ import numpy as np from scipy import linalg -import sparseqr import scipy.sparse.linalg +import warnings + +try: + import sparseqr +except ImportError: # pragma: no cover - optional dependency + sparseqr = None # Build a Python function to compute the nullspace of the stoichiometric matrix and a shifting to the origin @@ -42,6 +47,13 @@ def nullspace_sparse(Aeq, beq): beq -- a m-dimensional vector """ + if sparseqr is None: + warnings.warn( + "sparseqr is not available; using the dense nullspace routine instead.", + RuntimeWarning, + ) + return nullspace_dense(Aeq, beq) + N_shift = np.linalg.lstsq(Aeq, beq, rcond=None)[0] Aeq = Aeq.T Aeq = scipy.sparse.csc_matrix(Aeq) diff --git a/tests/test_dfba.py b/tests/test_dfba.py new file mode 100644 index 00000000..f0ebca9c --- /dev/null +++ b/tests/test_dfba.py @@ -0,0 +1,89 @@ +import unittest +from unittest import mock + +import numpy as np + +from dingo.MetabolicNetwork import MetabolicNetwork +import dingo.dfba as dfba_module + +DynamicFBA = dfba_module.DynamicFBA + + +class TestDynamicFBA(unittest.TestCase): + + def setUp(self): + lb = np.array([-10.0, 0.0], dtype=float) + ub = np.array([0.0, 1000.0], dtype=float) + S = np.array( + [ + [-1.0, 0.0], + [1.0, -1.0], + ], + dtype=float, + ) + metabolites = ["A_ext", "A_int"] + reactions = ["EX_A", "BIOMASS"] + biomass_index = 1 + objective = np.array([0.0, 1.0], dtype=float) + medium = {"EX_A": 10.0} + medium_indices = {"EX_A": 0} + exchanges = ["EX_A"] + + tuple_args = ( + lb, + ub, + S, + metabolites, + reactions, + biomass_index, + objective, + medium, + medium_indices, + exchanges, + ) + self.model = MetabolicNetwork(tuple_args) + + def test_sampling_driven_dynamics(self): + class DummySampler: + def __init__(self, model): + self.calls = 0 + + def generate_steady_states_no_multiphase( + self, + method, + n, + burn_in, + thinning, + variance, + bias_vector, + ess, + ): + self.calls += 1 + base = np.array([[-5.0], [5.0]], dtype=float) + samples = np.tile(base, (1, n)) + if self.calls > 1: + samples[0] -= 0.1 + samples[1] += 0.1 + return samples + + simulator = DynamicFBA( + self.model, + time_step=0.1, + total_time=0.3, + initial_biomass=0.1, + initial_concentrations={"EX_A": 5.0}, + sample_size=4, + ) + + with mock.patch.object(dfba_module, "PolytopeSampler", DummySampler): + result = simulator.run() + + self.assertEqual(result.fluxes.shape[0], 3) + self.assertEqual(result.fluxes.shape[1], len(self.model.reactions)) + self.assertGreater(result.biomass[-1], result.biomass[0]) + self.assertLess(result.concentrations["EX_A"][-1], result.concentrations["EX_A"][0]) + np.testing.assert_allclose(self.model.lb, np.array([-10.0, 0.0])) + + +if __name__ == "__main__": + unittest.main()