Source code for rif.rotamer.richardson

# from rif import rcl
# from rif.rcl import pyrosetta, rosetta
# from rif.chem.biochem import aa_name3s
import os
import numpy as np
import pandas as pd
try:
    from functools import lru_cache
except ImportError:
    from backports.functools_lru_cache import lru_cache

boundsfields = 'lb1 ub1 lb2 ub2 lb3 ub3 lb4 ub4'.split()


def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')


def localpath(pth):
    assert not pth.startswith('/')
    return os.path.join(os.path.dirname(__file__), pth)


def get_rotamer_space_raw():
    r = pd.read_csv(localpath('richardson.csv'),
                    header=0, nrows=999, index_col=(0, 1))
    r.reset_index(inplace=True)
    r.sort_values(['aa', 'lbl'], inplace=True)
    r.set_index(['aa', 'lbl'], inplace=True)
    # r = r.drop('lbl', axis=1)
    # print(r.columns)
    # print(r.iloc[0])
    assert all(x > 0 for x in r.iloc[:, 0])
    for i in range(1, 5):
        r.iloc[:, i] = [float(x) / 100.0 for x in r.iloc[:, i]]
    assert all(0 <= r.f) and all(r.f <= 1.0)
    assert all(0 <= r.fa) and all(r.fa <= 1.0)
    assert all(0 <= r.fb) and all(r.fb <= 1.0)
    assert all(0 <= r.fo) and all(r.fo <= 1.0)
    # print_full(r.x1)
    assert sum(r.x1.isnull()) == 10
    assert all(-180.0 < r.x1.dropna()) and all(r.x1.dropna() <= 180.0)
    # print_full(r.x2)
    assert sum(r.x2.isnull()) == 15
    assert all(-180.0 < r.x2.dropna()) and all(r.x2.dropna() <= 180.0)
    # print_full(r.x3)
    assert sum(r.x3.isnull()) == 62
    assert all(-180.0 < r.x3.dropna()) and all(r.x3.dropna() <= 180.0)
    # print_full(r.x4)
    assert sum(r.x4.isnull()) == 92
    assert all(-180.0 < r.x4.dropna()) and all(r.x4.dropna() <= 180.0)
    assert sum(r.x1r.isnull()) == 10
    assert sum(r.x2r.isnull()) == 15
    # print_full(r.x3r)
    assert sum(r.x3r.isnull()) == 62
    # print_full(r.x4r)
    assert sum(r.x4r.isnull()) == 92
    for i in "1234":
        r['lb' + i] = np.NaN
        r['ub' + i] = np.NaN
        idx = r['x' + i + 'r'].notnull()
        r.loc[idx, 'lb' + i] = [float(x.split()[0])
                                for x in r['x' + i + 'r'][idx]]
        r.loc[idx, 'ub' + i] = [float(x.split()[2])
                                for x in r['x' + i + 'r'][idx]]
    r = r.drop(
        'x1a x2a x3a x4a x1w x2w x3w x4w x1r x2r x3r x4r'.split(), axis=1)

    r['nchi'] = 4 - r.loc[:, boundsfields].isnull().sum(axis=1) / 2
    return r


[docs]@lru_cache() def get_rotamer_space(concat=False, disulf=False): """B is disulfide, x4 -> x2other n num observations fabo total fraction/alpha/beta/other x#a chi # "act" x# chi # "com" x#r chi # range text x#w chi # peak width lb#/ub# chi # range """ try: r = pd.read_pickle(localpath('richardson.pkl')) except IOError: # todo: python3 only use FileNotFoundError # print('warning: richardson.pkl not available, reading from richardson.csv') r = get_rotamer_space_raw() # r.to_pickle(localpath('richardson.pkl')) # print_full(r.loc[:, 'lb1 ub1 ub2 ub2 lb3 ub3 lb4 ub4'.split()]) # print_full(r.loc[:, 'x1w x2w x3w x4w'.split()]) if not disulf: r = r.drop('B') if concat: r = concat_rotamer_space(r) return r
def _roteq(a, b): return abs(a % 360.0 - b % 360.0) < 10.0 def merge_on_chi(rs, chi): lb = 'lb%i' % chi ub = 'ub%i' % chi # print('merge_on_chi') rs = rs.copy() # warnings if I don't do this for i in range(10): # print('merge_on_chi', i) updated = False for i, j in ((i, j) for i in range(rs.shape[0]) for j in range(i + 1, rs.shape[0])): if _roteq(rs[lb][i], rs[ub][j]): # print('++++++++++++++++++++++++ 1 +++++++++++++++++++++++++++') # print('lb i, ub j', i, j) # print(rs[boundsfields]) rs.loc[rs.index[i], lb] = rs.loc[rs.index[j], lb] # print('drop', j) # oldlen = rs.shape[0] rs = rs.drop(rs.index[j]) # print(oldlen, rs.shape[0]) # print('NEW:') # print(rs[boundsfields]) updated = True break elif _roteq(rs[ub][i], rs[lb][j]): # print('++++++++++++++++++++++++ 2 +++++++++++++++++++++++++++') # print('ub i, lb j', i, j) # print(rs[boundsfields]) rs.loc[rs.index[i], ub] = rs.loc[rs.index[j], ub] # print('drop', j) # oldlen = rs.shape[0] rs = rs.drop(rs.index[j]) # print(oldlen, rs.shape[0]) # print('NEW:') # print(rs[boundsfields]) updated = True break if not updated: return rs def concat_rotamer_space(rotspace): newdat = [] for nchi, fixnchi in rotspace.groupby('nchi'): keys = (['aa'] + ['lb%i' % i for i in range(1, int(nchi))] + ['ub%i' % i for i in range(1, int(nchi))]) # print(keys) for _, fixchiprefix in fixnchi.groupby(keys): # print(chiprefix) # print(fixchiprefix.loc[:, boundsfields]) newdat.append(merge_on_chi(fixchiprefix, chi=nchi)) r = pd.concat(newdat) r = r.reset_index() r = r.sort_values(['aa', 'lbl']) r = r.set_index(['aa', 'lbl']) return r
[docs]def sample_rotamer_space(rotspace, resl=[10, 10, 10, 10]): """rotamer samples"""
# for
[docs]def get_rotamer_index(rotspace): """extract AA structural info via pyrosetta""" # print('get_rotamer_coords') rcl.init_check() chem_manager = rosetta.core.chemical.ChemicalManager rts = chem_manager.get_instance().residue_type_set("fa_standard") # print(rts) rfactory = rosetta.core.conformation.ResidueFactory for rname in aa_name3s: res = rfactory.create_residue(rts.name_map(rname)) print(rname, res.nheavyatoms(), res.natoms()) raise NotImplementedError return 1