123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- # This file is part of h5py, a Python interface to the HDF5 library.
- #
- # http://www.h5py.org
- #
- # Copyright 2008-2013 Andrew Collette and contributors
- #
- # License: Standard 3-clause BSD; see "license.txt" for full license terms
- # and contributor agreement.
- """
- High-level access to HDF5 dataspace selections
- """
- import numpy as np
- from .base import product
- from .. import h5s, h5r, _selector
- def select(shape, args, dataset=None):
- """ High-level routine to generate a selection from arbitrary arguments
- to __getitem__. The arguments should be the following:
- shape
- Shape of the "source" dataspace.
- args
- Either a single argument or a tuple of arguments. See below for
- supported classes of argument.
- dataset
- A h5py.Dataset instance representing the source dataset.
- Argument classes:
- Single Selection instance
- Returns the argument.
- numpy.ndarray
- Must be a boolean mask. Returns a PointSelection instance.
- RegionReference
- Returns a Selection instance.
- Indices, slices, ellipses, MultiBlockSlices only
- Returns a SimpleSelection instance
- Indices, slices, ellipses, lists or boolean index arrays
- Returns a FancySelection instance.
- """
- if not isinstance(args, tuple):
- args = (args,)
- # "Special" indexing objects
- if len(args) == 1:
- arg = args[0]
- if isinstance(arg, Selection):
- if arg.shape != shape:
- raise TypeError("Mismatched selection shape")
- return arg
- elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b':
- if arg.shape != shape:
- raise TypeError("Boolean indexing array has incompatible shape")
- return PointSelection.from_mask(arg)
- elif isinstance(arg, h5r.RegionReference):
- if dataset is None:
- raise TypeError("Cannot apply a region reference without a dataset")
- sid = h5r.get_region(arg, dataset.id)
- if shape != sid.shape:
- raise TypeError("Reference shape does not match dataset shape")
- return Selection(shape, spaceid=sid)
- if dataset is not None:
- selector = dataset._selector
- else:
- space = h5s.create_simple(shape)
- selector = _selector.Selector(space)
- return selector.make_selection(args)
- class Selection(object):
- """
- Base class for HDF5 dataspace selections. Subclasses support the
- "selection protocol", which means they have at least the following
- members:
- __init__(shape) => Create a new selection on "shape"-tuple
- __getitem__(args) => Perform a selection with the range specified.
- What args are allowed depends on the
- particular subclass in use.
- id (read-only) => h5py.h5s.SpaceID instance
- shape (read-only) => The shape of the dataspace.
- mshape (read-only) => The shape of the selection region.
- Not guaranteed to fit within "shape", although
- the total number of points is less than
- product(shape).
- nselect (read-only) => Number of selected points. Always equal to
- product(mshape).
- broadcast(target_shape) => Return an iterable which yields dataspaces
- for read, based on target_shape.
- The base class represents "unshaped" selections (1-D).
- """
- def __init__(self, shape, spaceid=None):
- """ Create a selection. Shape may be None if spaceid is given. """
- if spaceid is not None:
- self._id = spaceid
- self._shape = spaceid.shape
- else:
- shape = tuple(shape)
- self._shape = shape
- self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape))
- self._id.select_all()
- @property
- def id(self):
- """ SpaceID instance """
- return self._id
- @property
- def shape(self):
- """ Shape of whole dataspace """
- return self._shape
- @property
- def nselect(self):
- """ Number of elements currently selected """
- return self._id.get_select_npoints()
- @property
- def mshape(self):
- """ Shape of selection (always 1-D for this class) """
- return (self.nselect,)
- @property
- def array_shape(self):
- """Shape of array to read/write (always 1-D for this class)"""
- return self.mshape
- # expand_shape and broadcast only really make sense for SimpleSelection
- def expand_shape(self, source_shape):
- if product(source_shape) != self.nselect:
- raise TypeError("Broadcasting is not supported for point-wise selections")
- return source_shape
- def broadcast(self, source_shape):
- """ Get an iterable for broadcasting """
- if product(source_shape) != self.nselect:
- raise TypeError("Broadcasting is not supported for point-wise selections")
- yield self._id
- def __getitem__(self, args):
- raise NotImplementedError("This class does not support indexing")
- class PointSelection(Selection):
- """
- Represents a point-wise selection. You can supply sequences of
- points to the three methods append(), prepend() and set(), or
- instantiate it with a single boolean array using from_mask().
- """
- def __init__(self, shape, spaceid=None, points=None):
- super().__init__(shape, spaceid)
- if points is not None:
- self._perform_selection(points, h5s.SELECT_SET)
- def _perform_selection(self, points, op):
- """ Internal method which actually performs the selection """
- points = np.asarray(points, order='C', dtype='u8')
- if len(points.shape) == 1:
- points.shape = (1,points.shape[0])
- if self._id.get_select_type() != h5s.SEL_POINTS:
- op = h5s.SELECT_SET
- if len(points) == 0:
- self._id.select_none()
- else:
- self._id.select_elements(points, op)
- @classmethod
- def from_mask(cls, mask, spaceid=None):
- """Create a point-wise selection from a NumPy boolean array """
- if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'):
- raise TypeError("PointSelection.from_mask only works with bool arrays")
- points = np.transpose(mask.nonzero())
- return cls(mask.shape, spaceid, points=points)
- def append(self, points):
- """ Add the sequence of points to the end of the current selection """
- self._perform_selection(points, h5s.SELECT_APPEND)
- def prepend(self, points):
- """ Add the sequence of points to the beginning of the current selection """
- self._perform_selection(points, h5s.SELECT_PREPEND)
- def set(self, points):
- """ Replace the current selection with the given sequence of points"""
- self._perform_selection(points, h5s.SELECT_SET)
- class SimpleSelection(Selection):
- """ A single "rectangular" (regular) selection composed of only slices
- and integer arguments. Can participate in broadcasting.
- """
- @property
- def mshape(self):
- """ Shape of current selection """
- return self._sel[1]
- @property
- def array_shape(self):
- scalar = self._sel[3]
- return tuple(x for x, s in zip(self.mshape, scalar) if not s)
- def __init__(self, shape, spaceid=None, hyperslab=None):
- super().__init__(shape, spaceid)
- if hyperslab is not None:
- self._sel = hyperslab
- else:
- # No hyperslab specified - select all
- rank = len(self.shape)
- self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
- def expand_shape(self, source_shape):
- """Match the dimensions of an array to be broadcast to the selection
- The returned shape describes an array of the same size as the input
- shape, but its dimensions
- E.g. with a dataset shape (10, 5, 4, 2), writing like this::
- ds[..., 0] = np.ones((5, 4))
- The source shape (5, 4) will expand to (1, 5, 4, 1).
- Then the broadcast method below repeats that chunk 10
- times to write to an effective shape of (10, 5, 4, 1).
- """
- start, count, step, scalar = self._sel
- rank = len(count)
- remaining_src_dims = list(source_shape)
- eshape = []
- for idx in range(1, rank + 1):
- if len(remaining_src_dims) == 0 or scalar[-idx]: # Skip scalar axes
- eshape.append(1)
- else:
- t = remaining_src_dims.pop()
- if t == 1 or count[-idx] == t:
- eshape.append(t)
- else:
- raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape
- if any([n > 1 for n in remaining_src_dims]):
- # All dimensions from target_shape should either have been popped
- # to match the selection shape, or be 1.
- raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape
- # We have built eshape backwards, so now reverse it
- return tuple(eshape[::-1])
- def broadcast(self, source_shape):
- """ Return an iterator over target dataspaces for broadcasting.
- Follows the standard NumPy broadcasting rules against the current
- selection shape (self.mshape).
- """
- if self.shape == ():
- if product(source_shape) != 1:
- raise TypeError("Can't broadcast %s to scalar" % source_shape)
- self._id.select_all()
- yield self._id
- return
- start, count, step, scalar = self._sel
- rank = len(count)
- tshape = self.expand_shape(source_shape)
- chunks = tuple(x//y for x, y in zip(count, tshape))
- nchunks = product(chunks)
- if nchunks == 1:
- yield self._id
- else:
- sid = self._id.copy()
- sid.select_hyperslab((0,)*rank, tshape, step)
- for idx in range(nchunks):
- offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start))
- sid.offset_simple(offset)
- yield sid
- class FancySelection(Selection):
- """
- Implements advanced NumPy-style selection operations in addition to
- the standard slice-and-int behavior.
- Indexing arguments may be ints, slices, lists of indices, or
- per-axis (1D) boolean arrays.
- Broadcasting is not supported for these selections.
- """
- @property
- def mshape(self):
- return self._mshape
- @property
- def array_shape(self):
- return self._array_shape
- def __init__(self, shape, spaceid=None, mshape=None, array_shape=None):
- super().__init__(shape, spaceid)
- if mshape is None:
- mshape = self.shape
- if array_shape is None:
- array_shape = mshape
- self._mshape = mshape
- self._array_shape = array_shape
- def expand_shape(self, source_shape):
- if not source_shape == self.array_shape:
- raise TypeError("Broadcasting is not supported for complex selections")
- return source_shape
- def broadcast(self, source_shape):
- if not source_shape == self.array_shape:
- raise TypeError("Broadcasting is not supported for complex selections")
- yield self._id
- def guess_shape(sid):
- """ Given a dataspace, try to deduce the shape of the selection.
- Returns one of:
- * A tuple with the selection shape, same length as the dataspace
- * A 1D selection shape for point-based and multiple-hyperslab selections
- * None, for unselected scalars and for NULL dataspaces
- """
- sel_class = sid.get_simple_extent_type() # Dataspace class
- sel_type = sid.get_select_type() # Flavor of selection in use
- if sel_class == h5s.NULL:
- # NULL dataspaces don't support selections
- return None
- elif sel_class == h5s.SCALAR:
- # NumPy has no way of expressing empty 0-rank selections, so we use None
- if sel_type == h5s.SEL_NONE: return None
- if sel_type == h5s.SEL_ALL: return tuple()
- elif sel_class != h5s.SIMPLE:
- raise TypeError("Unrecognized dataspace class %s" % sel_class)
- # We have a "simple" (rank >= 1) dataspace
- N = sid.get_select_npoints()
- rank = len(sid.shape)
- if sel_type == h5s.SEL_NONE:
- return (0,)*rank
- elif sel_type == h5s.SEL_ALL:
- return sid.shape
- elif sel_type == h5s.SEL_POINTS:
- # Like NumPy, point-based selections yield 1D arrays regardless of
- # the dataspace rank
- return (N,)
- elif sel_type != h5s.SEL_HYPERSLABS:
- raise TypeError("Unrecognized selection method %s" % sel_type)
- # We have a hyperslab-based selection
- if N == 0:
- return (0,)*rank
- bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds())
- # Shape of full selection box
- boxshape = topcorner - bottomcorner + np.ones((rank,))
- def get_n_axis(sid, axis):
- """ Determine the number of elements selected along a particular axis.
- To do this, we "mask off" the axis by making a hyperslab selection
- which leaves only the first point along the axis. For a 2D dataset
- with selection box shape (X, Y), for axis 1, this would leave a
- selection of shape (X, 1). We count the number of points N_leftover
- remaining in the selection and compute the axis selection length by
- N_axis = N/N_leftover.
- """
- if(boxshape[axis]) == 1:
- return 1
- start = bottomcorner.copy()
- start[axis] += 1
- count = boxshape.copy()
- count[axis] -= 1
- # Throw away all points along this axis
- masked_sid = sid.copy()
- masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB)
- N_leftover = masked_sid.get_select_npoints()
- return N//N_leftover
- shape = tuple(get_n_axis(sid, x) for x in range(rank))
- if np.product(shape) != N:
- # This means multiple hyperslab selections are in effect,
- # so we fall back to a 1D shape
- return (N,)
- return shape
|