Source code for geckopy.integration.pytfa

# Copyright 2021 Ginkgo Bioworks

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Integration layer with pytfa. Pytfa is not installed by default."""

import logging
import pickle
import zlib
from functools import reduce
from typing import Dict, Optional

import cobra
import pandas as pd
import pytfa
from pytfa.optim.variables import GenericVariable, LogConcentration

import geckopy


[docs]LOGGER = logging.getLogger(__name__)
[docs]PROT_DATA = { "pKa": [7], "deltaGf_err": 0, "mass_std": 0.0, "id": "protein", "nH_std": 12, "name": "protein", "formula": "H", "deltaGf_std": 0, "error": "Nil", "charge_std": 0, "struct_cues": {"ART_PROT": 0},
}
[docs]PROT_CUE_DATA = { "datfile": "ART.dgs", "error": 0, "formula": "", "charge": 0, "id": "ART_PROT", "small": True, "names": ["ART_PROT"], "energy": 0,
}
[docs]class ThermoModel(pytfa.ThermoModel): """Derived class to guard LC_vars against rewriting. This is required because we need to include `LC_vars` about proteins before calling `.convert()`, but convert reassigns it to an empty dictionary. """
[docs] _LC_vars: Dict[cobra.Metabolite, GenericVariable] = {}
@property
[docs] def LC_vars(self): """Get dict LogConcentration variables to use in dGr constraints.""" return self._LC_vars
@LC_vars.setter def LC_vars(self, value: GenericVariable): if value: self._LC_vars = value
[docs]def adapt_gecko_to_thermo( ec_model: geckopy.Model, thermodb: Dict, compartment_data: Dict, solver: Optional[str] = None, *args, **kwargs, ) -> pytfa.ThermoModel: """Prepare and convert gecko model to `pytfa.ThermoModel`. It modifies the `model` and the `thermodb` in place. The 'seed_id' annotation is hardcoded in pytfa so the metabolites and the database must have that annotation to be used. If proteins were added without dG energy, they would be treated as missing metabolites so the reactions with enzymes would be ignored. Adding 0 formation energy results in ignoring proteins for dG, since catalyzers do not affect dG. Parameters ---------- model: geckopy.Model thermodb: Dict from pytfa.io.load_thermo. The format is explained at https://pytfa.readthedocs.io/en/latest/thermoDB.html compartment_data: Dict check https://pytfa.readthedocs.io/en/latest/model.html#compartment-data *args, **kwargs: which will be passed to the pytfa.ThermoModel.__init__ """ thermodb["metabolites"]["protein"] = PROT_DATA thermodb["cues"]["ART_PROT"] = PROT_CUE_DATA # preparing + converting the model sets the constrain linear coefficients # of proteins to usual values which may not be the intended behavior right_prot_coeffs = { prot.id: { var.name: coeff for var, coeff in ec_model.solver.constraints[prot.id] .get_linear_coefficients(ec_model.solver.constraints[prot.id].variables) .items() } for prot in ec_model.proteins } tmodel = ThermoModel(thermodb, ec_model, *args, **kwargs) if solver: tmodel.solver = solver tmodel.compartments = compartment_data # pseudometabolites of arm reactions are equaled to its opposite side for prot in tmodel.proteins: # pass ownership of the proteins to the ThermoModel prot._model = tmodel prot.thermo = pytfa.thermo.MetaboliteThermo( PROT_DATA, tmodel.compartments[prot.compartment]["pH"], tmodel.compartments[prot.compartment]["ionicStr"], tmodel.TEMPERATURE, tmodel.MIN_pH, tmodel.MAX_pH, tmodel.Debye_Huckel_B, tmodel.thermo_unit, ) tmodel.prepare() # 0 dG formation for proteins so they are not used in thermo calculations # proteins are not included inside "model.metabolites" so we need to add'em for prot in tmodel.proteins: LC = tmodel.add_variable(LogConcentration, prot, lb=-1000, ub=1000) tmodel.LC_vars[prot] = LC prot.thermo.deltaGf_tr = 0 prot.thermo.deltaGf_err = 0 # restore whatever protein constraints we had previous to the model.repair for prot_id, constraint_terms in right_prot_coeffs.items(): for var_name, coefficient in constraint_terms.items(): tmodel.constraints[prot_id].set_linear_coefficients( {tmodel.variables[var_name]: coefficient} ) tmodel.convert(verbose=False) # tmodel.convert(verbose=False, overwrite_lc_vars=False) return tmodel
[docs]def _prepare_pseudometabolites_dfg(model: geckopy.Model, thermodb: Dict): r"""Equal :math:`\Delta G_f` of pseudometabolites to their opposite sides.""" prepared_reactions = 0 # reac = cobra.Reaction("EX_revolver") # reac.add_metabolites({cobra.Metabolite("revolver", compartment="c"): -1}) # model.add_reaction(reac) # reac.bounds = -1000, 1000 for reaction in model.reactions: if reaction.id.startswith("arm_"): real_mets = [ thermodb["metabolites"][met.annotation["seed_id"]] for met in reaction.metabolites if "seed_id" in met.annotation and not met.id.startswith("pmet") ] if len(real_mets) == len(reaction.metabolites) - 1: pmet, *_ = [ met for met in reaction.metabolites if met.id.startswith("pmet") ] pmet.annotation["seed_id"] = pmet.id pmet.formula = "".join([met["formula"] for met in real_mets]) thermodb["metabolites"][pmet.id] = { "pKa": [7], "deltaGf_err": max(met["deltaGf_err"] for met in real_mets), "mass_std": sum(met["mass_std"] for met in real_mets), "id": pmet.id, "nH_std": 12, "name": pmet.name, "formula": pmet.formula, "deltaGf_std": sum(met["deltaGf_std"] for met in real_mets), "error": "Nil", "charge_std": -sum(met["charge_std"] for met in real_mets), "struct_cues": { k: sum(x["struct_cues"].get(k, 0) for x in real_mets) for k in reduce( lambda a, b: a | set(b["struct_cues"]), real_mets, set(), ) }, } # reaction.add_metabolites( # {model.metabolites.get_by_id("revolver"): # -1 if "REV" in reaction.id else 1} # ) prepared_reactions += 1 LOGGER.info(f"Succesfully prepared {prepared_reactions} arm reactions")
[docs]def translate_model_mnx_to_seed( model: cobra.Model, thermodb: Dict, mnx_file: str, ): """Add a seed_id annotation to every metabolite.""" df = pd.read_csv( mnx_file, sep="\t", comment="#", names=["xref", "mnx", "annotation"], ) # perf filter df = df.loc[df.xref.str.startswith("seed"), :] df.xref = df.xref.apply(lambda x: x[-8:]) for met in model.metabolites: if "seed.id" in met.annotation: met.annotation["seed_id"] = met.annotation["seed.id"] if "seed_id" in met.annotation: continue if "metanetx.chemical" in met.annotation: mnx_id = met.annotation["metanetx.chemical"] if isinstance(mnx_id, str): annotations = df.loc[ df.mnx == met.annotation["metanetx.chemical"], "xref" ] else: annotations = df.loc[ df.mnx.isin(met.annotation["metanetx.chemical"]), "xref" ] annotation = [ann for ann in annotations if ann in thermodb["metabolites"]] if annotation: # just pick the first one met.annotation["seed_id"] = annotation[0]
[docs]def write_thermodb(thermodb: Dict, filename: str): """Deserialize the `thermoDB` to a compressed file at `filename`.""" with open(filename, "wb") as f: f.write(zlib.compress(pickle.dumps(thermodb)))
[docs]def get_thermo_coverage(model: pytfa.thermo.ThermoModel, total=True): """Return the number of reactions that were assigned a thermodynamic variable. Parameters ---------- model: pytfa.ThermoModel total: bool whether to report the total number of reactions covered or the percentage. Default: True """ thermo_set = get_thermo_reactions(model) return len(thermo_set) if total else len(thermo_set) / len(model.reactions)
[docs]def get_thermo_reactions(model: pytfa.thermo.ThermoModel): r"""Return the set of reactions that were assigned a thermodynamic variable. In pytfa, a reaction will have thermodynamic constraints if and only if all of the metabolites in the reaction have the :math:`\Delta G_f`. """ return {reac for reac in model.reactions if reac.thermo["computed"]}