From 12b487cb0656e27936f916d1c80c65138e83fae1 Mon Sep 17 00:00:00 2001 From: Carlo Camilloni Date: Fri, 30 Jan 2026 11:15:26 +0100 Subject: [PATCH] new guess for the interaction model based on a deconvolution of the amino acid stickyness in the new KLL paper --- src/multiego/ensemble.py | 34 ++-- src/multiego/resources/type_definitions.py | 173 +++++++++++---------- 2 files changed, 109 insertions(+), 98 deletions(-) diff --git a/src/multiego/ensemble.py b/src/multiego/ensemble.py index 61a33a8b..27b12aa6 100644 --- a/src/multiego/ensemble.py +++ b/src/multiego/ensemble.py @@ -352,7 +352,6 @@ def init_meGO_matrices(ensemble, args, custom_dict): # TODO: probabilities of equivalent atoms should be averaged - reference_contact_matrices[name] = reference_contact_matrices[name].add_prefix("rc_") reference_contact_matrices[name]["c6_i"] = [lj_data_dict[x][0] for x in reference_contact_matrices[name]["rc_ai"]] reference_contact_matrices[name]["c6_j"] = [lj_data_dict[x][0] for x in reference_contact_matrices[name]["rc_aj"]] @@ -1038,8 +1037,10 @@ def set_sig_epsilon(meGO_LJ, parameters): # These are defined only if the training probability is greater than MD_threshold and # by comparing them with RC_probabilities so that the resulting epsilon is between eps_min and eps_0 condition = ( - meGO_LJ["probability"] > meGO_LJ["limit_rc_att"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]) - ) & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) & (meGO_LJ["epsilon_prior"] > 0.) + (meGO_LJ["probability"] > meGO_LJ["limit_rc_att"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"])) + & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) + & (meGO_LJ["epsilon_prior"] > 0.0) + ) meGO_LJ.loc[condition, "epsilon"] = np.maximum(0.0, meGO_LJ["epsilon_prior"]) - ( (meGO_LJ["epsilon_0"] - np.maximum(0.0, meGO_LJ["epsilon_prior"])) / np.log(meGO_LJ["rc_threshold"]) @@ -1048,20 +1049,23 @@ def set_sig_epsilon(meGO_LJ, parameters): meGO_LJ.loc[condition, "sigma"] = meGO_LJ["distance"] / 2.0 ** (1.0 / 6.0) condition = ( - meGO_LJ["probability"] > meGO_LJ["limit_rc_att"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]) - ) & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) & (meGO_LJ["epsilon_prior"] < 0.) & (meGO_LJ["rc_probability"] > meGO_LJ["md_threshold"]) + (meGO_LJ["probability"] > meGO_LJ["limit_rc_att"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"])) + & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) + & (meGO_LJ["epsilon_prior"] < 0.0) + & (meGO_LJ["rc_probability"] > meGO_LJ["md_threshold"]) + ) meGO_LJ.loc[condition, "epsilon"] = (-meGO_LJ["rep"] * (meGO_LJ["distance"] / meGO_LJ["rc_distance"]) ** 12).clip( lower=-20 * meGO_LJ["rep"], upper=-0.05 * meGO_LJ["rep"] )[condition] meGO_LJ.loc[condition, "learned"] = 1 - #condition = ( + # condition = ( # meGO_LJ["probability"] > meGO_LJ["limit_rc_att"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]) - #) & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) & (meGO_LJ["epsilon_prior"] < 0.) & (meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]) - #meGO_LJ.loc[condition, "epsilon"] = -meGO_LJ["rep"] * ( + # ) & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) & (meGO_LJ["epsilon_prior"] < 0.) & (meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]) + # meGO_LJ.loc[condition, "epsilon"] = -meGO_LJ["rep"] * ( # 1.0 + (np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]) - meGO_LJ["probability"]) - #) - #meGO_LJ.loc[condition, "learned"] = 1 + # ) + # meGO_LJ.loc[condition, "learned"] = 1 # Not-attractive interactions # this is used only when MD_th < MD_p < limit_rc_att*RC_p @@ -1076,15 +1080,15 @@ def set_sig_epsilon(meGO_LJ, parameters): )[condition] meGO_LJ.loc[condition, "learned"] = 1 - #condition = ( + # condition = ( # (meGO_LJ["probability"] <= meGO_LJ["limit_rc_att"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"])) # & (meGO_LJ["probability"] > meGO_LJ["md_threshold"]) # & (meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]) - #) - #meGO_LJ.loc[condition, "epsilon"] = -meGO_LJ["rep"] * ( + # ) + # meGO_LJ.loc[condition, "epsilon"] = -meGO_LJ["rep"] * ( # 1.0 + (np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]) - meGO_LJ["probability"]) - #) - #meGO_LJ.loc[condition, "learned"] = 1 + # ) + # meGO_LJ.loc[condition, "learned"] = 1 # 1-4 interactions are special and cannot become attractive because they are part of the bonded-interactions condition = (meGO_LJ["bond_distance"] < 4) & (meGO_LJ["same_chain"]) diff --git a/src/multiego/resources/type_definitions.py b/src/multiego/resources/type_definitions.py index 300cb54e..98b320ce 100644 --- a/src/multiego/resources/type_definitions.py +++ b/src/multiego/resources/type_definitions.py @@ -3,19 +3,38 @@ import sys mg_OO_c12_rep = 7.5e-7 +mg_OMOM_c12_rep = 2.5e-5 mg_HH_c12_rep = 1.2e-8 mg_ON_c12_rep = 7.5e-7 mg_NN_c12_rep = 2.5e-5 mg_HO_sigma = 0.169500 -mg_eps_ch3 = 0.10 -mg_eps_HO = 0.13 # hydrogen bond strength -mg_eps_ch_aromatic = 0.13 -mg_eps_ch2 = 0.10 -mg_eps_pol_nz = 0.11 -mg_eps_pol = 0.09 -mg_eps_ch1 = 0.07 -mg_eps_bkbn_O_CB = 0.10 +mg_eps_HO = 0.12 + +eps_O = 0.085 +eps_OM = 0.085 +eps_OA = 0.13 +eps_N = 0.085 +eps_NT = 0.085 +eps_NL = 0.085 +eps_NR = 0.07 +eps_NZ = 0.11 +eps_NE = 0.085 +eps_C = 0.085 +eps_CH = 0.11 +eps_CH1 = 0.085 +eps_CAH = 0.085 +eps_CH2 = 0.11 +eps_CAH2 = 0.11 +eps_CH3 = 0.12 +eps_CH2r = 0.11 +eps_S = 0.14 +eps_CH3p = 0.00 +eps_P = 0.00 +eps_OE = 0.00 +eps_CR1 = 0.00 +eps_H = 0.00 +eps_C0 = 0.00 # Dataframe with atom types and associated parameters gromos_atp = pd.DataFrame( @@ -75,56 +94,56 @@ ], # all to ch2 except ch e ch3 "mg_c12": [ - 4.0 * 0.27601**12 * mg_eps_pol, # "O", sig=0.27601 - 4.0 * 0.26259**12 * mg_eps_pol, # "OM", sig=0.26259 - 4.0 * 0.29548**12 * mg_eps_pol, # "OA", sig=0.29548 - 4.0 * 0.31365**12 * mg_eps_pol, # "N", sig=0.31365 - 4.0 * 0.35722**12 * mg_eps_pol, # "NT", sig=0.35722 - 4.0 * 0.31365**12 * mg_eps_pol, # "NL", sig=0.31365 - 4.0 * 0.33411**12 * mg_eps_pol, # "NR", sig=0.33411 - 4.0 * 0.31365**12 * mg_eps_pol_nz, # "NZ", sig=0.31365 - 4.0 * 0.31365**12 * mg_eps_pol, # "NE", sig=0.31365 - 4.0 * 0.35812**12 * mg_eps_pol, # "C", sig=0.35812 - 4.0 * 0.35812**12 * mg_eps_ch_aromatic, # "CH", sig=0.35812 - 4.0 * 0.44592**12 * mg_eps_ch1, # "CH1", sig=0.50192 - 4.0 * 0.44592**12 * mg_eps_ch1, # "CAH", sig=0.50192 - 4.0 * 0.40704**12 * mg_eps_ch2, # "CH2", sig=0.40704 - 4.0 * 0.40704**12 * mg_eps_ch2, # "CAH2", sig=0.40704 - 4.0 * 0.37479**12 * mg_eps_ch3, # "CH3", sig=0.37479 - 4.0 * 0.39547**12 * mg_eps_ch2, # "CH2r", sig=0.39547 - 4.0 * 0.33077**12 * mg_eps_pol, # "S", sig=0.33077 - 4.0 * 0.37479**12 * mg_eps_ch3, # "CH3p", sig=0.37479 - 4.0 * 0.33856**12 * mg_eps_pol, # "P", sig=0.33856 - 4.0 * 0.28492**12 * mg_eps_pol, # "OE", sig=0.28492 - 4.0 * 0.37412**12 * mg_eps_pol, # "CR1", sig=0.37412 - 4.0 * 0.000000000 * mg_eps_pol, # "H", - 4.0 * 0.000000000 * mg_eps_pol, # "C0", + 4.0 * 0.27601**12 * eps_O, # "O", sig=0.27601 + 4.0 * 0.26259**12 * eps_OM, # "OM", sig=0.26259 + 4.0 * 0.29548**12 * eps_OA, # "OA", sig=0.29548 + 4.0 * 0.31365**12 * eps_N, # "N", sig=0.31365 + 4.0 * 0.35722**12 * eps_NT, # "NT", sig=0.35722 + 4.0 * 0.31365**12 * eps_NL, # "NL", sig=0.31365 + 4.0 * 0.33411**12 * eps_NR, # "NR", sig=0.33411 + 4.0 * 0.31365**12 * eps_NZ, # "NZ", sig=0.31365 + 4.0 * 0.31365**12 * eps_NE, # "NE", sig=0.31365 + 4.0 * 0.35812**12 * eps_C, # "C", sig=0.35812 + 4.0 * 0.35812**12 * eps_CH, # "CH", sig=0.35812 + 4.0 * 0.44592**12 * eps_CH1, # "CH1", sig=0.50192 + 4.0 * 0.44592**12 * eps_CAH, # "CAH", sig=0.50192 + 4.0 * 0.40704**12 * eps_CH2, # "CH2", sig=0.40704 + 4.0 * 0.40704**12 * eps_CAH2, # "CAH2", sig=0.40704 + 4.0 * 0.37479**12 * eps_CH3, # "CH3", sig=0.37479 + 4.0 * 0.39547**12 * eps_CH2r, # "CH2r", sig=0.39547 + 4.0 * 0.33077**12 * eps_S, # "S", sig=0.33077 + 4.0 * 0.37479**12 * eps_CH3p, # "CH3p", sig=0.37479 + 4.0 * 0.33856**12 * eps_P, # "P", sig=0.33856 + 4.0 * 0.28492**12 * eps_OE, # "OE", sig=0.28492 + 4.0 * 0.37412**12 * eps_CR1, # "CR1", sig=0.37412 + 4.0 * 0.000000000 * eps_H, # "H", + 4.0 * 0.000000000 * eps_C0, # "C0", ], "mg_c6": [ - 4.0 * 0.27601**6 * mg_eps_pol, # "O", - 4.0 * 0.26259**6 * mg_eps_pol, # "OM", - 4.0 * 0.29548**6 * mg_eps_pol, # "OA", - 4.0 * 0.31365**6 * mg_eps_pol, # "N", - 4.0 * 0.35722**6 * mg_eps_pol, # "NT", - 4.0 * 0.31365**6 * mg_eps_pol, # "NL", - 4.0 * 0.33411**6 * mg_eps_pol, # "NR", - 4.0 * 0.31365**6 * mg_eps_pol_nz, # "NZ", - 4.0 * 0.31365**6 * mg_eps_pol, # "NE", - 4.0 * 0.35812**6 * mg_eps_pol, # "C", - 4.0 * 0.35812**6 * mg_eps_ch_aromatic, # "CH" - 4.0 * 0.44592**6 * mg_eps_ch1, # "CH1" - 4.0 * 0.44592**6 * mg_eps_ch1, # "CAH" - 4.0 * 0.40704**6 * mg_eps_ch2, # "CH2" - 4.0 * 0.40704**6 * mg_eps_ch2, # "CAH2" - 4.0 * 0.37479**6 * mg_eps_ch3, # "CH3" - 4.0 * 0.39547**6 * mg_eps_ch2, # "CH2r" - 4.0 * 0.33077**6 * mg_eps_pol, # "S", - 4.0 * 0.37479**6 * mg_eps_ch3, # "CH3p" - 4.0 * 0.33856**6 * mg_eps_pol, # "P", - 4.0 * 0.28492**6 * mg_eps_pol, # "OE", - 4.0 * 0.37412**6 * mg_eps_pol, # "CR1", - 4.0 * 0.00000000 * mg_eps_pol, # "H", - 4.0 * 0.00000000 * mg_eps_pol, # "C0", + 4.0 * 0.27601**6 * eps_O, # "O", + 4.0 * 0.26259**6 * eps_OM, # "OM", + 4.0 * 0.29548**6 * eps_OA, # "OA", + 4.0 * 0.31365**6 * eps_N, # "N", + 4.0 * 0.35722**6 * eps_NT, # "NT", + 4.0 * 0.31365**6 * eps_NL, # "NL", + 4.0 * 0.33411**6 * eps_NR, # "NR", + 4.0 * 0.31365**6 * eps_NZ, # "NZ", + 4.0 * 0.31365**6 * eps_NE, # "NE", + 4.0 * 0.35812**6 * eps_C, # "C", + 4.0 * 0.35812**6 * eps_CH, # "CH" + 4.0 * 0.44592**6 * eps_CH1, # "CH1" + 4.0 * 0.44592**6 * eps_CAH, # "CAH" + 4.0 * 0.40704**6 * eps_CH2, # "CH2" + 4.0 * 0.40704**6 * eps_CAH2, # "CAH2" + 4.0 * 0.37479**6 * eps_CH3, # "CH3" + 4.0 * 0.39547**6 * eps_CH2r, # "CH2r" + 4.0 * 0.33077**6 * eps_S, # "S", + 4.0 * 0.37479**6 * eps_CH3p, # "CH3p" + 4.0 * 0.33856**6 * eps_P, # "P", + 4.0 * 0.28492**6 * eps_OE, # "OE", + 4.0 * 0.37412**6 * eps_CR1, # "CR1", + 4.0 * 0.00000000 * eps_H, # "H", + 4.0 * 0.00000000 * eps_C0, # "C0", ], } ) @@ -203,26 +222,26 @@ def lj14_generator(df): polar_sbtype = ["O", "OA", "OM", "N", "NL", "NT", "NR", "NZ", "NE", "C", "S"] hyd_sbtype = ["CH3", "CH2", "CH2r", "CH1", "CAH", "CAH2"] special_non_local = [ - #{ - # "atomtypes": (["O", "OM"], ["O", "OM"]), # charged oxygen-oxygen repulsion - # "interaction": "rep", - # "sigma": None, # not needed for repulsion - # "epsilon": mg_OO_c12_rep, - #}, { "atomtypes": (["O"], ["O"]), # charged oxygen-oxygen repulsion "interaction": "rep", "sigma": None, # not needed for repulsion "epsilon": mg_OO_c12_rep, }, - #{ - # "atomtypes": (["NZ", "NL"], ["NZ", "NL"]), # charged nitrogen-nitrogen repulsion - # "interaction": "rep", - # "sigma": None, # not needed for repulsion - # "epsilon": mg_NN_c12_rep, - #}, { - "atomtypes": (["NZ"], ["NZ"]), # charged nitrogen-nitrogen repulsion + "atomtypes": (["O"], ["OM"]), # charged oxygen-oxygen repulsion + "interaction": "rep", + "sigma": None, # not needed for repulsion + "epsilon": mg_OO_c12_rep, + }, + { + "atomtypes": (["OM"], ["OM"]), # charged oxygen-oxygen repulsion + "interaction": "rep", + "sigma": None, # not needed for repulsion + "epsilon": mg_OMOM_c12_rep, + }, + { + "atomtypes": (["NL"], ["NL"]), # charged nitrogen-nitrogen repulsion "interaction": "rep", "sigma": None, # not needed for repulsion "epsilon": mg_NN_c12_rep, @@ -233,12 +252,6 @@ def lj14_generator(df): "sigma": None, # not needed for repulsion "epsilon": mg_HH_c12_rep, }, - #{ - # "atomtypes": (polar_sbtype, hyd_sbtype), # polar - hydrophobic repulsion - # "interaction": "att", - # "sigma": None, # not needed for repulsion - # "epsilon": 0.09, - #}, # If None use default rc c12 repulsion { "atomtypes": (["O", "OM", "OA"], ["H"]), # hydrogen bond attraction "interaction": "att", @@ -246,23 +259,17 @@ def lj14_generator(df): "epsilon": mg_eps_HO, }, { - "atomtypes": (["NL"], ["N", "NT", "NL", "NR", "NZ", "NE", "C", "CH", "CH1", "CAH", "CH2", "CAH2", "CH3", "CH2r", "S"]), # hydrogen bond attraction + "atomtypes": (["NL"], ["NZ", "N", "NT", "NR", "NE", "C", "CH", "CH1", "CAH", "CH2", "CAH2", "CH3", "CH2r", "S"]), "interaction": "rep", "sigma": None, "epsilon": None, }, { - "atomtypes": (["OM"], ["O", "OM", "C", "CH", "CH1", "CAH", "CH2", "CAH2", "CH3", "CH2r", "S"]), # hydrogen bond attraction + "atomtypes": (["OM"], ["C", "CH", "CH1", "CAH", "CH2", "CAH2", "CH3", "CH2r", "S"]), "interaction": "rep", "sigma": None, "epsilon": None, }, - #{ - # "atomtypes": (["CH1", "CAH"], ["CH1", "CAH"]), # hydrogen bond attraction - # "interaction": "rep", - # "sigma": None, - # "epsilon": None, - #}, ] # List of amino acids and nucleic acids