-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgapfilling.py
188 lines (165 loc) · 8.24 KB
/
gapfilling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import cobra
from copy import deepcopy
import numpy as np
import pandas as pd
from egc import resolve_egc
import random
# This function adds exchange reactions for targeted fermentation products
def add_ex_reactions(model, universe, target_ex_rxns):
for rid in target_ex_rxns:
if rid not in model.reactions:
#print('add_ex_rxns: adding %s...' % rid)
reaction = cobra.Reaction(rid)
reaction.name = 'R_%s' % rid
reaction.lower_bound = -1000.0
reaction.upper_bound = 1000.0
met_id = rid.lstrip('EX_')
if met_id in model.metabolites:
met = model.metabolites.get_by_id(met_id)
else:
met = universe.metabolites.get_by_id(met_id)
reaction.add_metabolites({met: -1.0})
model.add_reactions([reaction])
return None
# constrain nutrient uptake
def constrain_media(model, df_cm, namespace, rxn_suffix):
for ex in model.exchanges:
ex_cpd = ex.id.lstrip('EX_')
ex_cpd = ex_cpd.rstrip(rxn_suffix)
if ex_cpd in list(df_cm[namespace]):
ex.lower_bound = -np.abs(df_cm.loc[df_cm[namespace] == ex_cpd, 'flux'].values[0])
ex.upper_bound = 1000.0
else:
ex.lower_bound = 0.0
ex.upper_bound = 1000.0
return None
# test if adding reactions will increase growth rate
def test_growth_inflation(model, rxns_to_add):
max_growth = model.slim_optimize()
model2 = deepcopy(model)
model2.add_reactions(rxns_to_add)
max_growth2 = model2.slim_optimize()
if np.abs(max_growth2 - max_growth) <= 1e-6:
return 0
else:
return 1
# add reactions predicted from deep learning model or randomly selected from reaction pools
def add_gapfilled_reactions(gem_file, universe, paras):
# read parameters
namespace = paras['NAMESPACE']
batch_size = int(paras['BATCH_SIZE'])
num_rxns_to_fill = int(paras['NUM_GAPFILLED_RXNS_TO_ADD'])
add_random_rxns = int(paras['ADD_RANDOM_RXNS'])
target_ex_rxns = paras['TARGET_EX_RXNS']
ex_suffix = paras['EX_SUFFIX']
# read GEM into cobrapy
model = cobra.io.read_sbml_model("%s/%s.xml" % (paras['GEM_DIRECTORY'], gem_file))
model.solver = 'cplex'
print('add_gapfilled_reaction: model %s loaded' % gem_file)
# read culture medium
df_cm = pd.read_csv(paras['CULTURE_MEDIUM'])
if namespace not in list(df_cm.columns):
raise RuntimeError('cannot find %s as a column of media file.' % namespace)
# add exchange reactions
add_ex_reactions(model, universe, target_ex_rxns)
# constrain boundary reactions by culture media
constrain_media(model, df_cm, namespace, ex_suffix)
# skip the model if it does not grow
max_growth = model.slim_optimize()
if max_growth == 0.0:
raise RuntimeError("model %s cannot grow in current medium." % (gem_file.rstrip('.xml')))
else:
print('add_gapfilled_reaction: max growth rate = %2.2f' % max_growth)
# model before reaction added
model_no_gapfill = deepcopy(model)
model_w_gapfill = deepcopy(model)
if add_random_rxns:
# randomize reactions in universal pools
candidate_reactions = list(set([r.id for r in universe.reactions if r.id not in model_w_gapfill.reactions]))
random.shuffle(candidate_reactions)
else:
# read deep learning model predicted reactions with scores
score_file = "%s/%s.csv" % (paras['GAPFILLED_RXNS_DIRECTORY'], gem_file)
df_gapfill = pd.read_csv(score_file, index_col=0)
# keep gapfilled reactions that are not contained in the model but included in the reaction pools
df_gapfill = df_gapfill.loc[
[rid for rid in df_gapfill.index if rid not in model_w_gapfill.reactions and rid in universe.reactions]
]
df_gapfill = df_gapfill[df_gapfill['predicted_scores']>=float(paras['MIN_PREDICTED_SCORES'])].sort_values('similarity_scores')
candidate_reactions = list(df_gapfill.index)
print('add_gapfilled_reaction: find %d candidate reactions' % len(candidate_reactions))
# if Resolve_EGC is True, add reaction one by one; otherwise add all together
rids_added = []
num_rxns_before = len(model_w_gapfill.reactions)
counter = 0
counter2 = 0
max_counter = np.min([num_rxns_to_fill, len(candidate_reactions)])
if int(paras['RESOLVE_EGC']):
while counter < max_counter:
# determine which reactions to add for this batch
rxns_to_add = []
for rid in candidate_reactions[counter2:]:
if len(rxns_to_add) >= batch_size or counter + len(rxns_to_add) >= max_counter:
break
assert not rid.startswith('EX_')
rxn = universe.reactions.get_by_id(rid)
# anaerobic condition; do not add reactions involving oxygen
if int(paras['ANAEROBIC']):
if namespace == "bigg":
if 'o2_c' in [met.id for met in rxn.reactants] or 'o2_c' in [met.id for met in rxn.products]:
counter2 += 1
continue
elif namespace == "modelseed":
if 'cpd00007_c0' in [met.id for met in rxn.reactants] or 'cpd00007_c0' in [met.id for met in
rxn.products]:
counter2 += 1
continue
rxns_to_add.append(rxn)
counter2 += 1
# add reactions in batch and test growth inflation
is_inflated = test_growth_inflation(model_w_gapfill, rxns_to_add)
if is_inflated:
# growth inflation found, add reaction one by one, and resolve potential egc
for rxn in rxns_to_add:
assert rxn.id not in model_w_gapfill.reactions
is_inflated = test_growth_inflation(model_w_gapfill, [rxn])
if is_inflated:
model_w_gapfill2 = deepcopy(model_w_gapfill)
model_w_gapfill2.add_reactions([rxn])
egc_resolved = resolve_egc(model_w_gapfill2, rxn.id, namespace)
if egc_resolved and model_w_gapfill2.slim_optimize() < 2.81:
model_w_gapfill = deepcopy(model_w_gapfill2)
rids_added.append(rxn.id)
counter += 1
print(
'add_gapfilled_reaction: added %s after resolving egc, current counter = %d'
% (rxn.id, counter))
else:
print(
'add_gapfilled_reaction: failed to add %s and egc not resolved, current counter = %d'
% (rxn.id, counter))
else:
model_w_gapfill.add_reactions([rxn])
rids_added.append(rxn.id)
counter += 1
print(
'add_gapfilled_reaction: added %s, no growth inflation, current counter = %d'
% (rxn.id, counter))
else:
for rxn in rxns_to_add:
assert rxn.id not in model_w_gapfill.reactions
model_w_gapfill.add_reactions(rxns_to_add)
rids_added.extend([rxn.id for rxn in rxns_to_add])
counter += len(rxns_to_add)
print('add_gapfilled_reaction: added %d reactions, no growth inflation, current counter = %d'
% (len(rxns_to_add), counter))
else:
max_counter = np.min(paras['NUM_GAPFILLED_RXNS_TO_ADD'], len(candidate_reactions))
rxns_to_add = candidate_reactions[0:max_counter]
model_w_gapfill.add_reactions([rxns_to_add])
# calculate number of reactions added
num_rxns_after = len(model_w_gapfill.reactions)
num_rxns_added = num_rxns_after - num_rxns_before
assert num_rxns_added <= max_counter
print("model %s: successfully added %d reactions." % (gem_file.replace('.xml', ''), num_rxns_added))
return model_no_gapfill, model_w_gapfill, num_rxns_added, rids_added