Source code for rif.index.stripe_index

import rif
from rif import V3, Atom, _rif
import numpy as np
import functools
import rif.dtypes
from time import clock


[docs]def stripe_index_3d(maxdis, points, payloads=None):
    """create a 3D stripe index of points[payloads] based on dtype of inputs

    stripe index is a fast index of 3D coords (possible enhanced with metadata) which can quickly get all the neighbors within a certain maxdis, which must be specified at construction

    Args:
        maxdis (float): maximum distance for points to be neighbors
        points (array): can map to array of C++ structs via dtype
        payloads (array): optional, additional data attached to each point

    Returns:
        stripe_index_3d_<points_t>_<payload_t=NULL>: Wrapper, for example, see the \*_V3 or \*_V3_V3 versions
            - :py:class:`Points only (\*_V3)
              <_rif._index._stripe_index.stripe_index_3d_V3>`
              neighbors method returns points
            - :py:class:`With payloads (\*_V3_V3)
              <_rif._index._stripe_index.stripe_index_3d_V3_V3>`
              neighbors method returns payloads (only)

    Return value notes:
        If payloads is a sequence of python objects, internally integers 1-N are
        used as the payloads, and the objects in the sequence are returned (not recommended, you should be using array style computing).

    Examples:
        - **Example: simple points with python objects**
            >>> array = np.random.uniform(size=(1000,3))
            >>> points = [(x[0], x[1], x[2]) for x in array]
            >>> type(points[13])  # converted to tuples
            <class 'tuple'>
            >>> index = stripe_index_3d(maxdis=0.1, points=points)
            >>> index
            <_rif._index._stripe_index.stripe_index_3d_V3 object at ...>
            >>> test = (0.1, 0.2, 0.3)
            >>> neighbors_fast = index.neighbor_count(test)
            >>> neighbors_slow = index.neighbor_count_brute(test)
            >>> # speedup for this example would be nothing
            >>> # because python operations dominate runtime

        - **Example: simple points with numpy**
            >>> points = np.random.uniform(size=30000).astype('f').view(V3)
            >>> points.shape
            (10000,)
            >>> index = stripe_index_3d(maxdis=0.05, points=points)
            >>> index
            <_rif._index._stripe_index.stripe_index_3d_V3 object at ...>
            >>> testpts = np.random.uniform(low=-1, high=2, size=3000).astype('f').view(V3)
            >>> tfast = clock()
            >>> neighbors_fast = index.neighbor_count(testpts)
            >>> tfast = clock() - tfast
            >>> tslow = clock()
            >>> neighbors_slow = index.neighbor_count_brute(testpts)
            >>> tslow = clock() - tslow
            >>> print('avg number of neighbors:', np.mean(neighbors_fast))  # doctest: +SKIP
            avg number of neighbors: 0.881
            >>> np.all(neighbors_fast == neighbors_slow)
            True
            >>> print('speedup is %fx vs brute force loop over points' % (tslow/tfast)) # doctest: +SKIP
            speedup is 81.904762x vs brute force loop over points
            >>> # this example is faster because it uses a numpy vectorized query
            >>> # this amortizes away the cost of python operations

        - **Example: mapping points to arbitrary python payloads**
            >>> points = [(0,0,0), (1,1,1), (2,2,2), (3,3,3)]
            >>> values = ['string payloads: ' + str(x) for x in points]
            >>> index = stripe_index_3d(maxdis=1.0, points=points, payloads=values)
            >>> index.neighbors((0,0,0))  # returns python objects
            ['string payloads: (0, 0, 0)']
            >>> index.neighbors((0.5,0.5,0.5))
            ['string payloads: (0, 0, 0)', 'string payloads: (1, 1, 1)']
            >>> # this example will also be slow... no reason to use this over brute force

        - **Example: mapping point-like structs to structs via numpy dtypes**
            >>> N = 1000
            >>> points = (np.arange(3*N).astype('f')/N).view(V3)
            >>> atoms = np.empty(N, dtype=Atom)
            >>> atoms['pos'] = points
            >>> atoms['atype'] = np.arange(N) % 23
            >>> atoms['rtype'] = np.arange(N) % 20
            >>> atoms['anum'] = np.arange(N) % 7
            >>> atoms[-3:]
            array([(([ 2.99099994,  2.9920001 ,  2.99300003],),  8, 3, 17),
                   (([ 2.99399996,  2.99499989,  2.99600005],),  9, 4, 18),
                   (([ 2.99699998,  2.99799991,  2.99900007],), 10, 5, 19)],
                  dtype=[('pos', [('raw', '<f4', (3,))]), ('atype', 'u1'), ('anum', 'u1'), ('rtype', '<u2')])
            >>> index = stripe_index_3d(maxdis=0.05, points=points, payloads=atoms)
            >>> index.neighbors((1, 1, 1))
            array([(([ 0.972     ,  0.97299999,  0.97399998],),  2, 2,  4),
                   (([ 0.97500002,  0.97600001,  0.977     ],),  3, 3,  5),
                   (([ 0.97799999,  0.97899997,  0.98000002],),  4, 4,  6),
                   (([ 0.98100001,  0.98199999,  0.98299998],),  5, 5,  7),
                   (([ 0.98400003,  0.98500001,  0.986     ],),  6, 6,  8),
                   (([ 0.98699999,  0.98799998,  0.98900002],),  7, 0,  9),
                   (([ 0.99000001,  0.991     ,  0.99199998],),  8, 1, 10),
                   (([ 0.99299997,  0.99400002,  0.995     ],),  9, 2, 11),
                   (([ 0.99599999,  0.99699998,  0.99800003],), 10, 3, 12),
                   (([ 0.99900001,  1.        ,  1.00100005],), 11, 4, 13),
                   (([ 1.00199997,  1.00300002,  1.00399995],), 12, 5, 14),
                   (([ 1.005     ,  1.00600004,  1.00699997],), 13, 6, 15),
                   (([ 1.00800002,  1.00899994,  1.00999999],), 14, 0, 16),
                   (([ 1.01100004,  1.01199996,  1.01300001],), 15, 1, 17),
                   (([ 1.01400006,  1.01499999,  1.01600003],), 16, 2, 18),
                   (([ 1.01699996,  1.01800001,  1.01900005],), 17, 3, 19),
                   (([ 1.01999998,  1.02100003,  1.02199996],), 18, 4,  0),
                   (([ 1.023     ,  1.02400005,  1.02499998],), 19, 5,  1),
                   (([ 1.02600002,  1.02699995,  1.028     ],), 20, 6,  2)],
                  dtype=[('pos', [('raw', '<f4', (3,))]), ('atype', 'u1'), ('anum', 'u1'), ('rtype', '<u2')])
    """
    dtype2 = None
    if payloads is not None:
        assert len(payloads) == len(points)
        dtype2 = np.dtype('O')
        if hasattr(payloads, 'dtype'):
            dtype2 = payloads.dtype

    dtype1 = np.dtype('O')
    if hasattr(points, 'dtype'):
        dtype1 = points.dtype
    else:
        # if payloads is sequence of objects, allow points
        # to be an iterable of 'index-ables' of len at least 3
        # for example, a list of rosetta xyzVectors
        points = _convert_to_arrayV3(points)
        dtype1 = V3.dtype
    try:
        return _typemap_3d[dtype1, dtype2](maxdis, points, payloads)
    except KeyError:
        raise ValueError(
            "stripe_index_3d only supports numpy arrays (can have " +
            "fancy dtypes) or object payloads (which is inefficient), not: " +
            str(type(points)) + ', ' + str(type(payloads)))


def _enable_init_pyobject_store(init):
    @functools.wraps(init)
    def newinit(self, maxdis, points, payloads):
        self._init_payload = list(payloads)
        init(self, maxdis, points, np.arange(len(payloads)))
    return newinit


def _enable_pyobject_return(func):
    def getpayload(load, x):
        return (x[0], load[x[1]]) if isinstance(x, tuple) else load[x]

    @functools.wraps(func)
    def newfunc(self, *args, **kwargs):
        ret = func(self, *args, **kwargs)
        return ([getpayload(self._init_payload, r) for r in ret]
                if hasattr(ret, '__iter__')
                else getpayload(self._init_payload, ret))
    return newfunc

# when should convert point array return to tuples?
# def _enable_return_tuple(func):
#     @functools.wraps(func)
#     def newfunc(self, *args, **kwargs):
#         ret = func(self, *args, **kwargs)
#         return [(x[0],x[1],x[2]) for x in ret]
#     return newfunc


def _decorate(cls, funcname, decorator):
    orig = getattr(cls, funcname)
    setattr(cls, funcname, decorator(orig))


def _get_dtype_by_name(name):
    if name == 'object':
        return np.dtype('O')
    else:
        return vars(rif)[name].dtype


def _convert_to_arrayV3(iterable):
    seq = list(iterable)
    array = np.zeros((len(seq), 3), dtype='f4')
    for i, v in enumerate(seq):
        array[i, 0] = v[0]
        array[i, 1] = v[1]
        array[i, 2] = v[2]
    return array.view(V3)


_typemap_3d = dict()
for clsname in vars(_rif._index._stripe_index):
    cls = vars(_rif._index._stripe_index)[clsname]
    if clsname.startswith('stripe_index_3d_'):
        if clsname.endswith('_object'):
            cls.__init__ = _enable_init_pyobject_store(cls.__init__)
            _decorate(cls, '_raw_payload', _enable_pyobject_return)
            _decorate(cls, 'neighbors', _enable_pyobject_return)
        dtypes = tuple([_get_dtype_by_name(x) for x in clsname.split('_')[3:]])
        if len(dtypes) is 1:
            dtypes = (dtypes[0], None)
        _typemap_3d[dtypes] = cls