Source code for enkie.estimators.equilibrator_gibbs_estimator

"""Estimation of Gibbs free energies using eQuilibrator."""

import logging
from typing import Dict, List, Tuple, Union

import numpy as np
import numpy.linalg as la
from equilibrator_api import ComponentContribution, Reaction

from ..commons import Q
from ..compartment_parameters import CompartmentParameters
from ..constants import DEFAULT_RMSE, LOG10, F, R
from ..miriam_metabolite import MiriamMetabolite
from ..utils import qrvector, qvector
from .gibbs_estimator_interface import GibbsEstimatorInterface

_DEFAULT_INCORRECT_METABOLITES: List[str] = [
    # Glycogen has different composition in different models.
    "glycogen",
    "bigg.metabolite:glycogen",
]

# Hard mappings between identifiers. These will be removed once eQuilibrator updates the
# MetanetX version.
_HARD_MAPPINGS: Dict[str, str] = {
    "bigg.metabolite:ppa": "kegg.compound:C00163",
    "bigg.metabolite:but": "kegg.compound:C00246",
}

logger = logging.getLogger(__name__)


[docs]class EquilibratorGibbsEstimator(GibbsEstimatorInterface): """Estimation of Gibbs free energies using `equilibrator_api`. Parameters ---------- rmse_inf : Q, optional Uncertainty to use for unknown groups or compounds. """ def __init__(self, rmse_inf: Q = DEFAULT_RMSE): assert rmse_inf.check("kJ/mol") self._eq_api = ComponentContribution(rmse_inf=rmse_inf) self._rmse_inf = rmse_inf self._incorrect_metabolites = _DEFAULT_INCORRECT_METABOLITES.copy() @property
[docs] def eq_api(self): """Gets the equilibrator API object.""" return self._eq_api
@property
[docs] def rmse_inf(self): """Gets the uncertainty used for unknown compounds or groups.""" return self._rmse_inf
@property
[docs] def incorrect_metabolites(self) -> List[str]: """Gets the list of metabolites that are not correctly recognized by equilibrator.""" return self._incorrect_metabolites
@incorrect_metabolites.setter def incorrect_metabolites(self, value: List[str]): """Sets the list of metabolites that are not correctly recognized by equilibrator.""" self._incorrect_metabolites = value
[docs] def get_dfg0_prime( self, S: np.array, metabolites: List[MiriamMetabolite], parameters: CompartmentParameters, ) -> Tuple[Q, Q]: """Estimates the standard Gibbs free energies for a reaction network using `equilibrator-api`. Parameters ---------- S : np.array n-by-m stoichiometric matrix of the reaction network. metabolites : List[Metabolite] A m-elements list describing the compounds in the network. compartment_parameters : CompartmentParameters The prior for the physiological parameters of each compartment, such as pH and ionic strength. Returns ------- Tuple[Q, Q] A tuple, whose first element is the vector of the mean estimate, and the second is a square root :math:`Q` of the covariance matrix on the estimation uncertainty :math:`\Sigma`, such that :math:`QQ^\intercal = \Sigma`. """ dfg0_prime_mean, dfg0_prime_cov_sqrt, unknowns_basis = self._get_dfg0( self._make_metabolite_ids(metabolites), qrvector([parameters.pH(m.compartment) for m in metabolites]), qrvector([parameters.pMg(m.compartment) for m in metabolites]), qrvector([parameters.I(m.compartment) for m in metabolites]), parameters.T(), ) # Fit the estimates for unknown degrees of freedom. dfg0_prime_mean = self._fit_unknown_dfg0(S, dfg0_prime_mean, unknowns_basis) # Transform the formation energies according to the properties of their # compartment. dfg0_prime_correction = self._get_dfg0_correction_compartment( metabolites, parameters ) dfg0_prime_mean = dfg0_prime_mean + dfg0_prime_correction return dfg0_prime_mean, dfg0_prime_cov_sqrt
def _get_dfg0( self, met_keys: List[str], met_pH: Q, met_pMg: Q, met_I: Q, met_T: Q, ) -> Tuple[Q, Q, Q]: """Compute estimates for the formation energies of a list of metabolites. The method uses eQuilibrator to return mean as well as covariance of the estimates. Parameters ---------- met_keys : List[str] List of n keys identifying the metabolites. These can be in any format or namespace supported by eQuilibrator. met_pH : Q Vector containing the pH of the compartment of each metabolite. met_pMg : Q Vector containing the pMg of the compartment of each metabolite. met_I : Q Vector containing the ionic strength of the compartment of each metabolite. met_T : Q Vector containing the temperature of the compartment of each metabolite. Returns ------- Tuple[Q, Q, Q] Tuple with a n-dimensional vector containing the estimates of the n metabolites, a square root of the covariance matrix of the estimation uncertainty and a matrix containing the columns of the sqrt-covariance for which equilibrator has no estimate (the uncertainty is virtually infinite). """ # Use half-reactions as a workaround for eQuilibrator not exposing # formation energies explicitely. half_rxn_strings = ["= " + m for m in met_keys] rxns = [self._try_parse_reaction(r) for r in half_rxn_strings] # Only work with metabolites for which eQuilibrator can provide an estimate. # The remaining metabolites will be handled further down. missing_rxn_ids, covered_rxn_ids, covered_rxns = [], [], [] for i, rxn in enumerate(rxns): if rxn is not None: covered_rxn_ids.append(i) covered_rxns.append(rxn) else: missing_rxn_ids.append(i) met_pH = [met_pH[i] for i in covered_rxn_ids] met_I = [met_I[i] for i in covered_rxn_ids] met_pMg = [met_pMg[i] for i in covered_rxn_ids] if len(missing_rxn_ids) > 0: logger.warning( "\n ".join( ["The following metabolites could not be found in eQuilibrator:"] + [met_keys[idx] for idx in missing_rxn_ids] ) ) met_count = len(met_keys) dfg0_mean = Q(np.zeros((met_count, 1)), "kJ/mol") dfg0_cov_sqrt = Q(np.zeros((len(rxns), len(rxns))), "kJ/mol") default_dfg0_std = self.rmse_inf basis_size = 0 unreliable_basis_ids = [] if covered_rxn_ids: dfg0_mean[covered_rxn_ids, 0] = qrvector( [ self._try_get_drg0_prime_single(*rxn_args, met_T) for rxn_args in zip(covered_rxns, met_pH, met_pMg, met_I) ] ) _, cov_sqrt = self.eq_api.standard_dg_prime_multi(covered_rxns, "fullrank") basis_size = cov_sqrt.shape[1] dfg0_cov_sqrt[covered_rxn_ids, 0:basis_size] = Q(cov_sqrt, "kJ/mol") # When equilibrator recognizes an metabolite or group but has # insufficient information to make an estimate, it will return an # unreliable estimate and an uncertainty equal to rmse_inf. We flag # and fit these values later. unreliable_basis_ids = [ i for i in range(basis_size) if np.any(np.abs(dfg0_cov_sqrt[:, i]) >= self.rmse_inf) ] # Fill estimates for non-covered metabolites with conservative values. default_dfg0_mean = np.mean(dfg0_mean[covered_rxn_ids, 0]) dfg0_mean[missing_rxn_ids, 0] = default_dfg0_mean # Enforce correlation between multiple occurrences of the same metabolite in # different compartments when eQuilibrator doesn't recognize the metabolite. metabolite_basis_id: Dict[str, int] = {} eq_basis_size = basis_size for rxn_id in missing_rxn_ids: metabolite_id = met_keys[rxn_id] if metabolite_id in metabolite_basis_id: dfg0_cov_sqrt[ rxn_id, metabolite_basis_id[metabolite_id] ] = default_dfg0_std else: metabolite_basis_id[metabolite_id] = basis_size dfg0_cov_sqrt[rxn_id, basis_size] = default_dfg0_std basis_size += 1 # Identify dimensions for which eQuilibrator has no estimate. unreliable_basis_ids += range(eq_basis_size, basis_size) unknowns_basis = dfg0_cov_sqrt[:, unreliable_basis_ids].m_as("kJ/mol") return dfg0_mean, dfg0_cov_sqrt, unknowns_basis def _get_dfg0_correction_compartment( self, metabolites: List[MiriamMetabolite], parameters: CompartmentParameters ): nH = np.array([[m.nH] for m in metabolites]) z = np.array([[m.z] for m in metabolites]) pH = qvector([parameters.pH(m.compartment) for m in metabolites]) phi = qvector([parameters.phi(m.compartment) for m in metabolites]) pH_correction = -R * parameters.T() * LOG10 * pH * nH phi_correction = F * phi * z return pH_correction + phi_correction def _make_metabolite_id( self, metabolite: MiriamMetabolite, ) -> str: if metabolite.miriam_id in self.incorrect_metabolites: return "__unreliable__:" + metabolite.miriam_id elif metabolite.miriam_id in _HARD_MAPPINGS: return _HARD_MAPPINGS[metabolite.miriam_id] else: return metabolite.miriam_id def _make_metabolite_ids( self, metabolites: List[MiriamMetabolite], ) -> List[str]: return [self._make_metabolite_id(m) for m in metabolites] def _try_parse_reaction( self, rxn_string: str, ) -> Union[Reaction, None]: """Parse a string into a Reaction object. Returns none if eQuilibrator cannot provide an estimate for the free energy of the reaction. """ logging_level = logging.getLogger().level logging.getLogger().setLevel(logging.ERROR) try: reaction = self._eq_api.parse_reaction_formula(rxn_string) except Exception: # pylint: disable=broad-except logging.getLogger().setLevel(logging_level) return None for cpd, _ in reaction.items(): if len(cpd.microspecies) == 0: logging.getLogger().setLevel(logging_level) return None logging.getLogger().setLevel(logging_level) return reaction def _try_get_drg0_prime_single( self, rxn: Reaction, pH: Q, pMg: Q, I: Q, T: Q, ) -> Q: """Estimate the transformed standard free energy of a reaction.""" self._eq_api.p_h = pH self._eq_api.ionic_strength = I self._eq_api.temperature = T self._eq_api.p_mg = pMg return self._eq_api.standard_dg_prime(rxn).value def _fit_unknown_dfg0( self, S: np.ndarray, dfg0_prime_mean: Q, unknowns_basis: np.ndarray ) -> Q: # Perform Ordinary Least Squares on the unknown metabolites. X = S.T @ unknowns_basis y = -S.T @ dfg0_prime_mean.m_as("kJ/mol") beta = la.lstsq(X, y, rcond=None)[0] return dfg0_prime_mean + Q(unknowns_basis @ beta, "kJ/mol")