Source code for rif.rotamer.richardson

# from rif import rcl
# from rif.rcl import pyrosetta, rosetta
# from rif.chem.biochem import aa_name3s
import os
import numpy as np
import pandas as pd
try:
    from functools import lru_cache
except ImportError:
    from backports.functools_lru_cache import lru_cache

boundsfields = 'lb1 ub1 lb2 ub2 lb3 ub3 lb4 ub4'.split()


def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')


def localpath(pth):
    assert not pth.startswith('/')
    return os.path.join(os.path.dirname(__file__), pth)


def get_rotamer_space_raw():
    r = pd.read_csv(localpath('richardson.csv'),
                    header=0, nrows=999, index_col=(0, 1))
    r.reset_index(inplace=True)
    r.sort_values(['aa', 'lbl'], inplace=True)
    r.set_index(['aa', 'lbl'], inplace=True)
    # r = r.drop('lbl', axis=1)
    # print(r.columns)
    # print(r.iloc[0])
    assert all(x > 0 for x in r.iloc[:, 0])
    for i in range(1, 5):
        r.iloc[:, i] = [float(x) / 100.0 for x in r.iloc[:, i]]
    assert all(0 <= r.f) and all(r.f <= 1.0)
    assert all(0 <= r.fa) and all(r.fa <= 1.0)
    assert all(0 <= r.fb) and all(r.fb <= 1.0)
    assert all(0 <= r.fo) and all(r.fo <= 1.0)
    # print_full(r.x1)
    assert sum(r.x1.isnull()) == 10
    assert all(-180.0 < r.x1.dropna()) and all(r.x1.dropna() <= 180.0)
    # print_full(r.x2)
    assert sum(r.x2.isnull()) == 15
    assert all(-180.0 < r.x2.dropna()) and all(r.x2.dropna() <= 180.0)
    # print_full(r.x3)
    assert sum(r.x3.isnull()) == 62
    assert all(-180.0 < r.x3.dropna()) and all(r.x3.dropna() <= 180.0)
    # print_full(r.x4)
    assert sum(r.x4.isnull()) == 92
    assert all(-180.0 < r.x4.dropna()) and all(r.x4.dropna() <= 180.0)
    assert sum(r.x1r.isnull()) == 10
    assert sum(r.x2r.isnull()) == 15
    # print_full(r.x3r)
    assert sum(r.x3r.isnull()) == 62
    # print_full(r.x4r)
    assert sum(r.x4r.isnull()) == 92
    for i in "1234":
        r['lb' + i] = np.NaN
        r['ub' + i] = np.NaN
        idx = r['x' + i + 'r'].notnull()
        r.loc[idx, 'lb' + i] = [float(x.split()[0])
                                for x in r['x' + i + 'r'][idx]]
        r.loc[idx, 'ub' + i] = [float(x.split()[2])
                                for x in r['x' + i + 'r'][idx]]
    r = r.drop(
        'x1a x2a x3a x4a x1w x2w x3w x4w x1r x2r x3r x4r'.split(), axis=1)

    r['nchi'] = 4 - r.loc[:, boundsfields].isnull().sum(axis=1) / 2
    return r


[docs]@lru_cache()
def get_rotamer_space(concat=False, disulf=False):
    """B is disulfide, x4 -> x2other
    n num observations
    fabo total fraction/alpha/beta/other
    x#a chi # "act"
    x#  chi # "com"
    x#r chi # range text
    x#w chi # peak width
    lb#/ub# chi # range
    """
    try:
        r = pd.read_pickle(localpath('richardson.pkl'))
    except IOError:  # todo: python3 only use FileNotFoundError
        # print('warning: richardson.pkl not available, reading from richardson.csv')
        r = get_rotamer_space_raw()
        # r.to_pickle(localpath('richardson.pkl'))
    # print_full(r.loc[:, 'lb1 ub1 ub2 ub2 lb3 ub3 lb4 ub4'.split()])
    # print_full(r.loc[:, 'x1w x2w x3w x4w'.split()])
    if not disulf:
        r = r.drop('B')
    if concat:
        r = concat_rotamer_space(r)
    return r


def _roteq(a, b):
    return abs(a % 360.0 - b % 360.0) < 10.0


def merge_on_chi(rs, chi):
    lb = 'lb%i' % chi
    ub = 'ub%i' % chi
    # print('merge_on_chi')
    rs = rs.copy()  # warnings if I don't do this
    for i in range(10):
        # print('merge_on_chi', i)
        updated = False
        for i, j in ((i, j)
                     for i in range(rs.shape[0])
                     for j in range(i + 1, rs.shape[0])):
            if _roteq(rs[lb][i], rs[ub][j]):
                # print('++++++++++++++++++++++++ 1 +++++++++++++++++++++++++++')
                # print('lb i, ub j', i, j)
                # print(rs[boundsfields])
                rs.loc[rs.index[i], lb] = rs.loc[rs.index[j], lb]
                # print('drop', j)
                # oldlen = rs.shape[0]
                rs = rs.drop(rs.index[j])
                # print(oldlen, rs.shape[0])
                # print('NEW:')
                # print(rs[boundsfields])
                updated = True
                break
            elif _roteq(rs[ub][i], rs[lb][j]):
                # print('++++++++++++++++++++++++ 2 +++++++++++++++++++++++++++')
                # print('ub i, lb j', i, j)
                # print(rs[boundsfields])
                rs.loc[rs.index[i], ub] = rs.loc[rs.index[j], ub]
                # print('drop', j)
                # oldlen = rs.shape[0]
                rs = rs.drop(rs.index[j])
                # print(oldlen, rs.shape[0])
                # print('NEW:')
                # print(rs[boundsfields])
                updated = True
                break
        if not updated:
            return rs


def concat_rotamer_space(rotspace):
    newdat = []
    for nchi, fixnchi in rotspace.groupby('nchi'):
        keys = (['aa'] +
                ['lb%i' % i for i in range(1, int(nchi))] +
                ['ub%i' % i for i in range(1, int(nchi))])
        # print(keys)
        for _, fixchiprefix in fixnchi.groupby(keys):
            # print(chiprefix)
            # print(fixchiprefix.loc[:, boundsfields])
            newdat.append(merge_on_chi(fixchiprefix, chi=nchi))
    r = pd.concat(newdat)
    r = r.reset_index()
    r = r.sort_values(['aa', 'lbl'])
    r = r.set_index(['aa', 'lbl'])
    return r


[docs]def sample_rotamer_space(rotspace, resl=[10, 10, 10, 10]):
    """rotamer samples"""
    # for


[docs]def get_rotamer_index(rotspace):
    """extract AA structural info via pyrosetta"""
    # print('get_rotamer_coords')
    rcl.init_check()
    chem_manager = rosetta.core.chemical.ChemicalManager
    rts = chem_manager.get_instance().residue_type_set("fa_standard")
    # print(rts)
    rfactory = rosetta.core.conformation.ResidueFactory
    for rname in aa_name3s:
        res = rfactory.create_residue(rts.name_map(rname))
        print(rname, res.nheavyatoms(), res.natoms())
    raise NotImplementedError
    return 1