Source code for geckopy.integration.pytfa

# Copyright 2021 Ginkgo Bioworks

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Integration layer with pytfa. Pytfa is not installed by default."""

import logging
import pickle
import zlib
from functools import reduce
from typing import Dict, Optional

import cobra
import pandas as pd
import pytfa
from pytfa.optim.variables import GenericVariable, LogConcentration

import geckopy


[docs]LOGGER = logging.getLogger(__name__)
[docs]PROT_DATA = {
    "pKa": [7],
    "deltaGf_err": 0,
    "mass_std": 0.0,
    "id": "protein",
    "nH_std": 12,
    "name": "protein",
    "formula": "H",
    "deltaGf_std": 0,
    "error": "Nil",
    "charge_std": 0,
    "struct_cues": {"ART_PROT": 0},
}
[docs]PROT_CUE_DATA = {
    "datfile": "ART.dgs",
    "error": 0,
    "formula": "",
    "charge": 0,
    "id": "ART_PROT",
    "small": True,
    "names": ["ART_PROT"],
    "energy": 0,
}


[docs]class ThermoModel(pytfa.ThermoModel):
    """Derived class to guard LC_vars against rewriting.

    This is required because we need to include `LC_vars` about proteins before
    calling `.convert()`, but convert reassigns it to an empty dictionary.
    """

[docs]    _LC_vars: Dict[cobra.Metabolite, GenericVariable] = {}

    @property
[docs]    def LC_vars(self):
        """Get dict LogConcentration variables to use in dGr constraints."""
        return self._LC_vars

    @LC_vars.setter
    def LC_vars(self, value: GenericVariable):
        if value:
            self._LC_vars = value


[docs]def adapt_gecko_to_thermo(
    ec_model: geckopy.Model,
    thermodb: Dict,
    compartment_data: Dict,
    solver: Optional[str] = None,
    *args,
    **kwargs,
) -> pytfa.ThermoModel:
    """Prepare and convert gecko model to `pytfa.ThermoModel`.

    It modifies the `model` and the `thermodb` in place.

    The 'seed_id' annotation is hardcoded in pytfa so the metabolites and the
    database must have that annotation to be used.

    If proteins were added without dG energy, they would be treated as missing
    metabolites so the reactions with enzymes would be ignored. Adding 0
    formation energy results in ignoring proteins for dG, since catalyzers do
    not affect dG.

    Parameters
    ----------
    model: geckopy.Model
    thermodb: Dict
        from pytfa.io.load_thermo. The format is explained at
        https://pytfa.readthedocs.io/en/latest/thermoDB.html
    compartment_data: Dict
        check https://pytfa.readthedocs.io/en/latest/model.html#compartment-data
    *args, **kwargs:
        which will be passed to the pytfa.ThermoModel.__init__
    """
    thermodb["metabolites"]["protein"] = PROT_DATA
    thermodb["cues"]["ART_PROT"] = PROT_CUE_DATA
    # preparing + converting the model sets the constrain linear coefficients
    # of proteins to usual values which may not be the intended behavior
    right_prot_coeffs = {
        prot.id: {
            var.name: coeff
            for var, coeff in ec_model.solver.constraints[prot.id]
            .get_linear_coefficients(ec_model.solver.constraints[prot.id].variables)
            .items()
        }
        for prot in ec_model.proteins
    }
    tmodel = ThermoModel(thermodb, ec_model, *args, **kwargs)
    if solver:
        tmodel.solver = solver
    tmodel.compartments = compartment_data
    # pseudometabolites of arm reactions are equaled to its opposite side
    for prot in tmodel.proteins:
        # pass ownership of the proteins to the ThermoModel
        prot._model = tmodel
        prot.thermo = pytfa.thermo.MetaboliteThermo(
            PROT_DATA,
            tmodel.compartments[prot.compartment]["pH"],
            tmodel.compartments[prot.compartment]["ionicStr"],
            tmodel.TEMPERATURE,
            tmodel.MIN_pH,
            tmodel.MAX_pH,
            tmodel.Debye_Huckel_B,
            tmodel.thermo_unit,
        )
    tmodel.prepare()
    # 0 dG formation for proteins so they are not used in thermo calculations
    # proteins are not included inside "model.metabolites" so we need to add'em
    for prot in tmodel.proteins:
        LC = tmodel.add_variable(LogConcentration, prot, lb=-1000, ub=1000)
        tmodel.LC_vars[prot] = LC
        prot.thermo.deltaGf_tr = 0
        prot.thermo.deltaGf_err = 0
    # restore whatever protein constraints we had previous to the model.repair
    for prot_id, constraint_terms in right_prot_coeffs.items():
        for var_name, coefficient in constraint_terms.items():
            tmodel.constraints[prot_id].set_linear_coefficients(
                {tmodel.variables[var_name]: coefficient}
            )
    tmodel.convert(verbose=False)
    # tmodel.convert(verbose=False, overwrite_lc_vars=False)
    return tmodel


[docs]def _prepare_pseudometabolites_dfg(model: geckopy.Model, thermodb: Dict):
    r"""Equal :math:`\Delta G_f` of pseudometabolites to their opposite sides."""
    prepared_reactions = 0
    # reac = cobra.Reaction("EX_revolver")
    # reac.add_metabolites({cobra.Metabolite("revolver", compartment="c"): -1})
    # model.add_reaction(reac)
    # reac.bounds = -1000, 1000
    for reaction in model.reactions:
        if reaction.id.startswith("arm_"):
            real_mets = [
                thermodb["metabolites"][met.annotation["seed_id"]]
                for met in reaction.metabolites
                if "seed_id" in met.annotation and not met.id.startswith("pmet")
            ]
            if len(real_mets) == len(reaction.metabolites) - 1:
                pmet, *_ = [
                    met for met in reaction.metabolites if met.id.startswith("pmet")
                ]
                pmet.annotation["seed_id"] = pmet.id
                pmet.formula = "".join([met["formula"] for met in real_mets])
                thermodb["metabolites"][pmet.id] = {
                    "pKa": [7],
                    "deltaGf_err": max(met["deltaGf_err"] for met in real_mets),
                    "mass_std": sum(met["mass_std"] for met in real_mets),
                    "id": pmet.id,
                    "nH_std": 12,
                    "name": pmet.name,
                    "formula": pmet.formula,
                    "deltaGf_std": sum(met["deltaGf_std"] for met in real_mets),
                    "error": "Nil",
                    "charge_std": -sum(met["charge_std"] for met in real_mets),
                    "struct_cues": {
                        k: sum(x["struct_cues"].get(k, 0) for x in real_mets)
                        for k in reduce(
                            lambda a, b: a | set(b["struct_cues"]),
                            real_mets,
                            set(),
                        )
                    },
                }
                # reaction.add_metabolites(
                #     {model.metabolites.get_by_id("revolver"):
                #      -1 if "REV" in reaction.id else 1}
                # )
                prepared_reactions += 1
    LOGGER.info(f"Succesfully prepared {prepared_reactions} arm reactions")


[docs]def translate_model_mnx_to_seed(
    model: cobra.Model,
    thermodb: Dict,
    mnx_file: str,
):
    """Add a seed_id annotation to every metabolite."""
    df = pd.read_csv(
        mnx_file,
        sep="\t",
        comment="#",
        names=["xref", "mnx", "annotation"],
    )
    # perf filter
    df = df.loc[df.xref.str.startswith("seed"), :]
    df.xref = df.xref.apply(lambda x: x[-8:])
    for met in model.metabolites:
        if "seed.id" in met.annotation:
            met.annotation["seed_id"] = met.annotation["seed.id"]
        if "seed_id" in met.annotation:
            continue
        if "metanetx.chemical" in met.annotation:
            mnx_id = met.annotation["metanetx.chemical"]
            if isinstance(mnx_id, str):
                annotations = df.loc[
                    df.mnx == met.annotation["metanetx.chemical"], "xref"
                ]
            else:
                annotations = df.loc[
                    df.mnx.isin(met.annotation["metanetx.chemical"]), "xref"
                ]
            annotation = [ann for ann in annotations if ann in thermodb["metabolites"]]
            if annotation:
                # just pick the first one
                met.annotation["seed_id"] = annotation[0]


[docs]def write_thermodb(thermodb: Dict, filename: str):
    """Deserialize the `thermoDB` to a compressed file at `filename`."""
    with open(filename, "wb") as f:
        f.write(zlib.compress(pickle.dumps(thermodb)))


[docs]def get_thermo_coverage(model: pytfa.thermo.ThermoModel, total=True):
    """Return the number of reactions that were assigned a thermodynamic variable.

    Parameters
    ----------
    model: pytfa.ThermoModel
    total: bool
        whether to report the total number of reactions covered or the percentage.
        Default: True
    """
    thermo_set = get_thermo_reactions(model)

    return len(thermo_set) if total else len(thermo_set) / len(model.reactions)


[docs]def get_thermo_reactions(model: pytfa.thermo.ThermoModel):
    r"""Return the set of reactions that were assigned a thermodynamic variable.

    In pytfa, a reaction will have thermodynamic constraints if and only if all
    of the metabolites in the reaction have the :math:`\Delta G_f`.
    """
    return {reac for reac in model.reactions if reac.thermo["computed"]}
Source code for geckopy.integration.pytfa

Navigation

Related Topics