Source code for rif.index.stripe_index

import rif
from rif import V3, Atom, _rif
import numpy as np
import functools
import rif.dtypes
from time import clock


[docs]def stripe_index_3d(maxdis, points, payloads=None): """create a 3D stripe index of points[payloads] based on dtype of inputs stripe index is a fast index of 3D coords (possible enhanced with metadata) which can quickly get all the neighbors within a certain maxdis, which must be specified at construction Args: maxdis (float): maximum distance for points to be neighbors points (array): can map to array of C++ structs via dtype payloads (array): optional, additional data attached to each point Returns: stripe_index_3d_<points_t>_<payload_t=NULL>: Wrapper, for example, see the \*_V3 or \*_V3_V3 versions - :py:class:`Points only (\*_V3) <_rif._index._stripe_index.stripe_index_3d_V3>` neighbors method returns points - :py:class:`With payloads (\*_V3_V3) <_rif._index._stripe_index.stripe_index_3d_V3_V3>` neighbors method returns payloads (only) Return value notes: If payloads is a sequence of python objects, internally integers 1-N are used as the payloads, and the objects in the sequence are returned (not recommended, you should be using array style computing). Examples: - **Example: simple points with python objects** >>> array = np.random.uniform(size=(1000,3)) >>> points = [(x[0], x[1], x[2]) for x in array] >>> type(points[13]) # converted to tuples <class 'tuple'> >>> index = stripe_index_3d(maxdis=0.1, points=points) >>> index <_rif._index._stripe_index.stripe_index_3d_V3 object at ...> >>> test = (0.1, 0.2, 0.3) >>> neighbors_fast = index.neighbor_count(test) >>> neighbors_slow = index.neighbor_count_brute(test) >>> # speedup for this example would be nothing >>> # because python operations dominate runtime - **Example: simple points with numpy** >>> points = np.random.uniform(size=30000).astype('f').view(V3) >>> points.shape (10000,) >>> index = stripe_index_3d(maxdis=0.05, points=points) >>> index <_rif._index._stripe_index.stripe_index_3d_V3 object at ...> >>> testpts = np.random.uniform(low=-1, high=2, size=3000).astype('f').view(V3) >>> tfast = clock() >>> neighbors_fast = index.neighbor_count(testpts) >>> tfast = clock() - tfast >>> tslow = clock() >>> neighbors_slow = index.neighbor_count_brute(testpts) >>> tslow = clock() - tslow >>> print('avg number of neighbors:', np.mean(neighbors_fast)) # doctest: +SKIP avg number of neighbors: 0.881 >>> np.all(neighbors_fast == neighbors_slow) True >>> print('speedup is %fx vs brute force loop over points' % (tslow/tfast)) # doctest: +SKIP speedup is 81.904762x vs brute force loop over points >>> # this example is faster because it uses a numpy vectorized query >>> # this amortizes away the cost of python operations - **Example: mapping points to arbitrary python payloads** >>> points = [(0,0,0), (1,1,1), (2,2,2), (3,3,3)] >>> values = ['string payloads: ' + str(x) for x in points] >>> index = stripe_index_3d(maxdis=1.0, points=points, payloads=values) >>> index.neighbors((0,0,0)) # returns python objects ['string payloads: (0, 0, 0)'] >>> index.neighbors((0.5,0.5,0.5)) ['string payloads: (0, 0, 0)', 'string payloads: (1, 1, 1)'] >>> # this example will also be slow... no reason to use this over brute force - **Example: mapping point-like structs to structs via numpy dtypes** >>> N = 1000 >>> points = (np.arange(3*N).astype('f')/N).view(V3) >>> atoms = np.empty(N, dtype=Atom) >>> atoms['pos'] = points >>> atoms['atype'] = np.arange(N) % 23 >>> atoms['rtype'] = np.arange(N) % 20 >>> atoms['anum'] = np.arange(N) % 7 >>> atoms[-3:] array([(([ 2.99099994, 2.9920001 , 2.99300003],), 8, 3, 17), (([ 2.99399996, 2.99499989, 2.99600005],), 9, 4, 18), (([ 2.99699998, 2.99799991, 2.99900007],), 10, 5, 19)], dtype=[('pos', [('raw', '<f4', (3,))]), ('atype', 'u1'), ('anum', 'u1'), ('rtype', '<u2')]) >>> index = stripe_index_3d(maxdis=0.05, points=points, payloads=atoms) >>> index.neighbors((1, 1, 1)) array([(([ 0.972 , 0.97299999, 0.97399998],), 2, 2, 4), (([ 0.97500002, 0.97600001, 0.977 ],), 3, 3, 5), (([ 0.97799999, 0.97899997, 0.98000002],), 4, 4, 6), (([ 0.98100001, 0.98199999, 0.98299998],), 5, 5, 7), (([ 0.98400003, 0.98500001, 0.986 ],), 6, 6, 8), (([ 0.98699999, 0.98799998, 0.98900002],), 7, 0, 9), (([ 0.99000001, 0.991 , 0.99199998],), 8, 1, 10), (([ 0.99299997, 0.99400002, 0.995 ],), 9, 2, 11), (([ 0.99599999, 0.99699998, 0.99800003],), 10, 3, 12), (([ 0.99900001, 1. , 1.00100005],), 11, 4, 13), (([ 1.00199997, 1.00300002, 1.00399995],), 12, 5, 14), (([ 1.005 , 1.00600004, 1.00699997],), 13, 6, 15), (([ 1.00800002, 1.00899994, 1.00999999],), 14, 0, 16), (([ 1.01100004, 1.01199996, 1.01300001],), 15, 1, 17), (([ 1.01400006, 1.01499999, 1.01600003],), 16, 2, 18), (([ 1.01699996, 1.01800001, 1.01900005],), 17, 3, 19), (([ 1.01999998, 1.02100003, 1.02199996],), 18, 4, 0), (([ 1.023 , 1.02400005, 1.02499998],), 19, 5, 1), (([ 1.02600002, 1.02699995, 1.028 ],), 20, 6, 2)], dtype=[('pos', [('raw', '<f4', (3,))]), ('atype', 'u1'), ('anum', 'u1'), ('rtype', '<u2')]) """ dtype2 = None if payloads is not None: assert len(payloads) == len(points) dtype2 = np.dtype('O') if hasattr(payloads, 'dtype'): dtype2 = payloads.dtype dtype1 = np.dtype('O') if hasattr(points, 'dtype'): dtype1 = points.dtype else: # if payloads is sequence of objects, allow points # to be an iterable of 'index-ables' of len at least 3 # for example, a list of rosetta xyzVectors points = _convert_to_arrayV3(points) dtype1 = V3.dtype try: return _typemap_3d[dtype1, dtype2](maxdis, points, payloads) except KeyError: raise ValueError( "stripe_index_3d only supports numpy arrays (can have " + "fancy dtypes) or object payloads (which is inefficient), not: " + str(type(points)) + ', ' + str(type(payloads)))
def _enable_init_pyobject_store(init): @functools.wraps(init) def newinit(self, maxdis, points, payloads): self._init_payload = list(payloads) init(self, maxdis, points, np.arange(len(payloads))) return newinit def _enable_pyobject_return(func): def getpayload(load, x): return (x[0], load[x[1]]) if isinstance(x, tuple) else load[x] @functools.wraps(func) def newfunc(self, *args, **kwargs): ret = func(self, *args, **kwargs) return ([getpayload(self._init_payload, r) for r in ret] if hasattr(ret, '__iter__') else getpayload(self._init_payload, ret)) return newfunc # when should convert point array return to tuples? # def _enable_return_tuple(func): # @functools.wraps(func) # def newfunc(self, *args, **kwargs): # ret = func(self, *args, **kwargs) # return [(x[0],x[1],x[2]) for x in ret] # return newfunc def _decorate(cls, funcname, decorator): orig = getattr(cls, funcname) setattr(cls, funcname, decorator(orig)) def _get_dtype_by_name(name): if name == 'object': return np.dtype('O') else: return vars(rif)[name].dtype def _convert_to_arrayV3(iterable): seq = list(iterable) array = np.zeros((len(seq), 3), dtype='f4') for i, v in enumerate(seq): array[i, 0] = v[0] array[i, 1] = v[1] array[i, 2] = v[2] return array.view(V3) _typemap_3d = dict() for clsname in vars(_rif._index._stripe_index): cls = vars(_rif._index._stripe_index)[clsname] if clsname.startswith('stripe_index_3d_'): if clsname.endswith('_object'): cls.__init__ = _enable_init_pyobject_store(cls.__init__) _decorate(cls, '_raw_payload', _enable_pyobject_return) _decorate(cls, 'neighbors', _enable_pyobject_return) dtypes = tuple([_get_dtype_by_name(x) for x in clsname.split('_')[3:]]) if len(dtypes) is 1: dtypes = (dtypes[0], None) _typemap_3d[dtypes] = cls