Source code for rif.apps.fiber_dock

"""
a prototype module for fiber docking. for illustration see:

https://docs.google.com/presentation/d/1qv6yZIy0b2dfmZAzA7bFNJD4pl9h7Nt1mOL4aqiVpOw/pub?start=false&loop=false&delayms=3000#slide=id.g1d6846b840_0_8

(see prev slide for simpler, less-general case)
"""

# from rif.io.pdbio import read_pdb
# from rif.nest import NestedXforms
# from rif.models import ImplicitEnsemble, ImplicitEnsembleScore
# from rif.search import HierBeamSearch

# import numpy as np


[docs]def rel_prime_flat_tri(Nmax):
    """return two same-size lists of relatively prime numbers

    mapped to tuples, would be: 1,2 1,3 2,3 1,4 3,4 1,5 2,5 3,5 4,5 ...
    replace with numpy one-liner, probably more clear
    """
    pass


[docs]def center_welzl(model):
    """center model based on welzl optimal bounding sphere, report radius"""
    pass


[docs]def fiber_dock(pdb_file):
    """turn monomer into helical fiber in "all" possible ways"""

    # setup
    resolutions = (16, 8, 4, 2, 1, 0.5)
    M, N = rel_prime_flat_tri(Nmax=20)
    powers = (M / N)[np.newaxis, :]  # shape (1, whatever)
    assert all(powers < 1)  # else Hsearch can't work
    prot, radius = center_welzl(read_pdb(pdb_file))
    bbframes = prot.bbframes(ss='HE', burial='exposed')
    samp_grid = NestedXforms(baseresl=resolutions[0], lever=radius)
    # ImplicitEnsemble represents a structural ensemble in which each member's
    # elements (atoms, rays, stubs, etc) are within well-defined translational
    # and rotational bounds of a reference structure. (rotation mag defined by
    # radius/lever)
    ensemble0 = ImplicitEnsemble(atoms=prot.atoms, rotslots=bbframes,
                                 radius=0.0, lever=radius, bvh_max_radius=1.0,
                                 voxel_convolv_radii=resolutions)
    hsearch = HierBeamSearch(resls=resolutions, grid=samp_grid, beam_size_m=10,
                             extra_fields=(('M', 'u1'), ('N', 'u1')), )
    score = ImplicitEnsembleScore(
        # todo
    )

    # assuming I can do what I want with numpy dtype/operators...
    def score_samples(stage, samples):
        resl = resolutions[stage]
        indices = samples['index']
        positions = samp_grid.position_of(stage, indices)
        ensembleN = positions * ensemble0.with_radius(resl)
        score0N = score(ensemble0, ensembleN)  # radius 0 + resl
        # better to extract sub-array of score0N < 0?
        if np.any(score0N < 0):
            # in addition to ensemble['position']**(M/N), operator
            # __pow__(ensemble,float) must scale ensemble['radius']
            # linearly by pow... is this well-defined algebra?
            # maybe just use an ensemble_power() func directly?
            ensembleM = np.power(ensembleN, powers)
            # radius 0 + resl * m / n (not constant over array)
            score0M = score(ensemble0, ensembleM)
            # - coupling='linear' means M ensemble motion tied exactly to
            #   N motion, so instead of radius = Arad + Brad,
            #   radius = min(Arad - Brad, Brad - Arad)
            # - this check may not be worth the cost here, skip or
            #   maybe filter on ensembleM good enough first
            scoreMN = score(ensembleM, ensembleN, coupling='linear')
            score0MN = score0N + score0M + scoreMN  # score0N bcast
            idxmin = score0MN.argmin(axis=2)  # best score forall M / N
            samples['M'] = M[idxmin]
            samples['N'] = N[idxmin]
            samples['score'] = score0MN[:, idxmin]
            # todo: eventually check all good M/N? will probably be rare
            #       to have more than one hit
            # todo: also score ensemble0 vs all 1..N) for thoroughness
        return samples

    # hsearch interface option 1
    # simple, but perhaps not as flexible as below
    hsearch.search(score_samples)
    print(hsearch.results())

    # hsearch interface option 2
    # not sure if using iteration as an interface to the search is a
    # good idea or not... note that samples['score'] gets communicated
    # back to the search, which is not a pattern I've seen used much
    # but it would be clearer for simpler cases... don't actually have to
    # define a function (ie score_samples)
    hsearch_accum = hsearch.accumulator()
    for iresl, samples in hsearch_accum:
        score_samples(iresl, samples)
    print(hsearch_accum.results())


if __name__ == '__main__':
    fiber_dock()