Source code for geckopy.protein

# Copyright 2021 Ginkgo Bioworks

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""API for proteins."""

import hashlib
import logging
import re
from math import isinf, isnan
from typing import Dict, FrozenSet, Tuple, Union
from warnings import warn

import optlang
from cobra import Configuration, Metabolite, Object, Reaction
from cobra.exceptions import OptimizationError
from cobra.util.context import resettable
from cobra.util.solver import (
    check_solver_status,
    linear_reaction_coefficients,
    set_objective,
)
from cobra.util.util import format_long_string


[docs]LOGGER = logging.getLogger(__name__)
[docs]UNIPROT_PATTERN = re.compile( r"(?:prot_)?[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
)
[docs]config = Configuration()
[docs]class Protein(Object): """Representation of an enzyme. A protein sets an upper bound to a set of reactions, given the kcat and concentration. Adapted from `cobra.Reaction`. In terms of the inner LP model, `Proteins` populates both variables (as pseudorreactions with the mentioned upper_bound) and constraints (as metabolites). Attributes ---------- id: str identifier of protein, should be an Uniprot ID or "prot_<Uniprot ID>" name: str human readable name concentration: float parsed from `initialAmount` of `Species` in the SBML specification. kcats: Kcats 1 / stoichimetry coefficients of its reactions. mw: float TODO: parsed from initalParameters/calculate from formula contribution: float value of flux of variable, only accessible after optimizing the model. lower_bound: float should be 0 upper_bound: float concentration * kcat if there is a concentration, mmw if is part of the pool constraint (unmeasured proteins) or 1000. formula: str charge: float """ def __init__( self, id: Union[str, Metabolite] = None, concentration: float = None, molecular_weight: float = 0.0, name: str = "", ): """Initialize with id.""" self.concentration = concentration self.mw = molecular_weight self._flux = None self.lower_bound = 0 self._reaction = set() self._ub = config.upper_bound self.compartment = "c" self.__metabolites = {} self._annotation = {} if isinstance(id, Metabolite): self.from_metabolite(id) else: Object.__init__(self, id, name) self.formula = "" self.charge = 0.0 self.kcats = Kcats(self) self._reaction.add(self)
[docs] def _set_id_with_model(self, value): """Overwrite parent id setter to change constraints and variables.""" if value in self.model.proteins: raise ValueError(f"This model already constains a protein '{value}'") forward_variable = self.forward_variable reverse_variable = self.reverse_variable self.model.constraints[self.id].name = value self._id = value self.model.proteins._generate_index() forward_variable.name = self.id reverse_variable.name = self.reverse_id
[docs] def from_metabolite(self, met: Metabolite): """Initialize `Protein` from `Metabolite`; i.e., when reading from SBML.""" if not UNIPROT_PATTERN.match(met.id): LOGGER.warning(f"Metabolite {met.id} does not use Uniprot ID.") Object.__init__(self, met.id, met.name) self.compartment = met.compartment self._reaction = met._reaction self.charge = met.charge self.formula = "" self._annotation = met._annotation
[docs] def update_variable_bounds(self): """Sync object bounds with inner model variable bounds.""" if self.model is None: return # We know that `lb <= ub`. if self.lower_bound > 0: self.forward_variable.set_bounds( lb=None if isinf(self.lower_bound) else self.lower_bound, ub=None if isinf(self.upper_bound) else self.upper_bound, ) self.reverse_variable.set_bounds(lb=0, ub=0) elif self.upper_bound < 0: self.forward_variable.set_bounds(lb=0, ub=0) self.reverse_variable.set_bounds( lb=None if isinf(self.upper_bound) else -self.upper_bound, ub=None if isinf(self.lower_bound) else -self.lower_bound, ) else: self.forward_variable.set_bounds( lb=0, ub=None if isinf(self.upper_bound) else self.upper_bound ) self.reverse_variable.set_bounds( lb=0, ub=None if isinf(self.lower_bound) else -self.lower_bound
)
[docs] def suscribe_to_pool(self, mmw: float): """Change internal pseudorreaction to have the common pool as reactant.""" if not hasattr(self, "_model"): warn( f"Cannot set kcat of Protein {self.id} which does not" f" belong to any model." ) return if not hasattr(self._model, "common_protein_pool"): warn( f"Cannot set kcat of Protein {self.id} whose Model does not" f" have a protein pool. Try `model.add_pool()` first." ) return self._model.constraints[ self._model.common_protein_pool.id ].set_linear_coefficients( { self.forward_variable: -mmw, self.reverse_variable: mmw, } ) self._model.common_protein_pool._reaction.add(self) self.metabolites = {self._model.common_protein_pool: -mmw}
[docs] def unsuscribe_to_pool(self): """Change internal pseudorreaction to have the common pool as reactant.""" if hasattr(self, "_model"): if hasattr(self._model, "common_protein_pool"): self._model.constraints[ self._model.common_protein_pool.id ].set_linear_coefficients( { self.forward_variable: 0, self.reverse_variable: 0, } ) self._model.common_protein_pool._reaction.remove(self) self.metabolites = {}
@property
[docs] def concentration(self) -> float: r"""Get upper bounds as [E] (conventionally in $\frac{mmol}{gDW}$). [E] multiplied by the kcat (expressed in the reaction stoichiometry as 1/kcat) yields $\frac{mmol}/{gDW h}$. Taken from [Benjamín J Sánchez et al., 2016] (https://www.embopress.org/doi/full/10.15252/msb.20167411). """ return self._concentration
@concentration.setter @resettable def concentration(self, value): self.add_concentration(value) @property
[docs] def upper_bound(self) -> float: r"""Get upper bounds as [E] (conventionally in $\frac{mmol}{gDW}$). [E] multiplied by the kcat (expressed in the reaction stoichiometry as 1/kcat) yields $\frac{mmol}/{gDW h}$. Taken from [Benjamín J Sánchez et al., 2016] (https://www.embopress.org/doi/full/10.15252/msb.20167411). """ return ( self._ub if self.concentration is None or isinf(self.concentration) or isnan(self.concentration) else self.concentration
) @upper_bound.setter @resettable def upper_bound(self, value: float): self._ub = value self.update_variable_bounds()
[docs] def add_concentration(self, value: float): """Add concentration value. It will unsuscribe the protein to the common protein pool, if suscribed. """ self._concentration = value self.unsuscribe_to_pool() self.update_variable_bounds()
@property
[docs] def reverse_id(self) -> str: """Generate the id of reverse_variable from the reaction's id.""" return "_".join( (self.id, "reverse", hashlib.md5(self.id.encode("utf-8")).hexdigest()[0:5])
) @property
[docs] def contribution(self) -> str: """Get primal value (analogous to flux) in the most recent solution.""" return self.flux
@property
[docs] def flux(self) -> float: """Get flux value in the most recent solution. Flux is the primal value of the corresponding variable in the model. Warnings -------- * Accessing reaction fluxes through a `Solution` object is the safer, preferred, and only guaranteed to be correct way. You can see how to do so easily in the examples. * Reaction flux is retrieved from the currently defined `self._model.solver`. The solver status is checked but there are no guarantees that the current solver state is the one you are looking for. * If you modify the underlying model after an optimization, you will retrieve the old optimization values. Raises ------ RuntimeError If the underlying model was never optimized beforehand or the reaction is not part of a model. OptimizationError If the solver status is anything other than 'optimal'. AssertionError If the flux value is not within the bounds. """ try: check_solver_status(self._model.solver.status) return self.forward_variable.primal - self.reverse_variable.primal except AttributeError: raise RuntimeError(f"Protein '{self.id}' is not part of a model.") # Due to below all-catch, which sucks, need to reraise these. except (RuntimeError, OptimizationError) as err: raise OptimizationError(err) # Would love to catch CplexSolverError and GurobiError here. except Exception as err: raise OptimizationError( f"Likely no solution exists. Original solver message: {err}."
) @property
[docs] def forward_variable(self) -> optlang.Variable: """Get `optlang.Variable` representing the forward flux. Returns ------- optlang.interface.Variable An optlang variable for the forward flux or None if reaction is not associated with a model. """ if self.model is not None: return self.model.variables[self.id] else: return None
@property
[docs] def reverse_variable(self) -> optlang.Variable: """Get `optlang.Variable` representing the reverse flux. Returns ------- optlang.interface.Variable An optlang variable for the reverse flux or None if reaction is not associated with a model. """ if self.model is not None: return self.model.variables[self.reverse_id] else: return None
@property
[docs] def objective_coefficient(self) -> float: """Get the coefficient for this reaction in a linear objective (float). Assuming that the objective of the associated model is summation of fluxes from a set of reactions, the coefficient for each reaction can be obtained individually using this property. A more general way is to use the `model.objective` property directly. """ return linear_reaction_coefficients(self.model, [self]).get(self, 0)
@objective_coefficient.setter def objective_coefficient(self, value: float): if self.model is None: raise AttributeError("Cannot assign objective to a missing model.") if self.flux_expression is not None: set_objective(self.model, {self: value}, additive=True) @property
[docs] def bounds(self) -> Tuple[float, float]: """Get or set the bounds directly from a tuple. Convenience method for setting upper and lower bounds in one line using a tuple of lower and upper bound. Invalid bounds will raise an AssertionError. When using a `HistoryManager` context, this attribute can be set temporarily, reversed when the exiting the context. """ return self.lower_bound, self.upper_bound
@bounds.setter @resettable def bounds(self, value: (float, float)): lower, upper = value # Validate bounds before setting them. Reaction._check_bounds(lower, upper) self.lower_bound = lower self.upper_bound = upper @property
[docs] def _metabolites(self): """Get metabolites.""" return {**self.__metabolites, self: 1}
@_metabolites.setter def _metabolites(self, metabolites: Dict): """Get metabolites.""" self.__metabolites = metabolites @property
[docs] def metabolites(self) -> Dict: """Get metabolite of the protein pseudoreaction as the protein itself.""" return self._metabolites
@metabolites.setter def metabolites(self, metabolites: Dict): self._metabolites = metabolites @property
[docs] def reactions(self) -> FrozenSet[Reaction]: """Retrieve immutable private reactions property.""" return frozenset(reac for reac in self._reaction if isinstance(reac, Reaction))
@property
[docs] def model(self): """Retrieve the model the reaction is a part of.""" return self._model if hasattr(self, "_model") else None
@property
[docs] def reactants(self): """Return a list of reactants for the reaction.""" return [k for k, v in self._metabolites.items() if v < 0]
@property
[docs] def products(self): """Return a list of products for the reaction.""" return [k for k, v in self._metabolites.items() if v >= 0]
[docs] def __setstate__(self, state): """Set attribute to point to model, mimicking `cobra.Reaction`.""" # These are necessary for old pickles which store attributes # which have since been superceded by properties. if "reaction" in state: state.pop("reaction") if "gene_reaction_rule" in state: state["_gene_reaction_rule"] = state.pop("gene_reaction_rule") if "lower_bound" in state: state["_lower_bound"] = state.pop("lower_bound") if "upper_bound" in state: state["_upper_bound"] = state.pop("upper_bound") self.__dict__.update(state) for x in state["_metabolites"]: x._model = self._model x._reaction.add(self)
[docs] def __str__(self) -> str: """Print str representation as id.""" return f"Protein {self.id}"
[docs] def _repr_html_(self) -> str: return f""" <table> <tr> <td><strong>Protein identifier</strong></td><td>{self.id}</td> </tr><tr> <td><strong>Name</strong></td><td>{format_long_string(self.name, 200)} </td> </tr><tr> <td><strong>Memory address</strong></td> <td>0x0%x{id(self)}</td> </tr><tr> </tr><tr> <td><strong>Concentration</strong></td><td>{self.concentration}</td> </tr><tr> <td><strong>Upper bound</strong></td><td>{self.upper_bound}</td> </tr><tr> </tr><tr> <td><strong>Mw</strong></td><td>{self.mw}</td> </tr><tr> <td><strong>In {len(self.reactions)} reaction(s)</strong></td><td> {format_long_string(", ".join( f'{r.id} ({kcat:.2f})' for r, kcat in self.kcats.items() ), 200)} </td> </tr> </table> """
[docs]class Kcats: """Interface to modify kcats of a protein. The user interacts with kcats in 1/s, translated to h to the model in the form stoichiometry coefficients of protein participating in a given reaction. Example ------- .. doctest:: # dictionary of Reaction to kcat in s >>> model.proteins.prot_P0A796.kcats # the user inputs the kcat in 1/s >>> model.proteins.prot_P0A796.kcats["PFKNo1"] = 1 / 30 # the corresponding stoichiometry value is in -h >>> ec_model.reactions.PFKNo1.metabolites[model.proteins.prot_P0A796] == -1/120 """ def __init__(self, prot: Protein): """Initialize with kcats from the model.""" self._protein = prot self._update()
[docs] def _update(self): """Build the map on the fly.""" self._reac_to_kcat = { reac: -1 / reac.metabolites[self._protein] / 3600 for reac in self._protein.reactions
}
[docs] def __getitem__(self, key: Union[Reaction, str]) -> float: """Return the kcat of the protein in the Reaaction `key`.""" self._update() if self._model_warn(key, "get"): return if isinstance(key, str): return self._reac_to_kcat[self._protein.model.reactions.get_by_id(key)] else: return self._reac_to_kcat[key]
[docs] def __setitem__(self, key: Union[Reaction, str], val: float): """Assing kcat as `val` in the given `key` Reaction (as 1/kcat).""" # TODO: wrong if self._model_warn(key, "set"): return if isinstance(key, str): reac = self._protein.model.reactions.get_by_id(key) else: reac = key reac._metabolites[self._protein] = -1 / (3600 * val)
[docs] def _model_warn(self, key: Union[Reaction, str], action: str): if not hasattr(self._protein, "model"): warn( f"Cannot set kcat of Protein {self._protein.id} which does not" f" belong to any model (reaction: {key})." ) return True
[docs] def __iter__(self): """Iterate inner dict.""" self._update() return self._reac_to_kcat.__iter__()
[docs] def items(self): """Iterate inner items.""" self._update() return self._reac_to_kcat.items()
[docs] def keys(self): """Return inner dict's keys.""" self._update() return self._reac_to_kcat.keys()
[docs] def values(self): """Return inner dict's values.""" self._update() return self._reac_to_kcat.values()
[docs] def __repr__(self): """Represent inner dict.""" self._update() return self._reac_to_kcat.__repr__()