# This file is part of h5py, a Python interface to the HDF5 library. # # http://www.h5py.org # # Copyright 2008-2013 Andrew Collette and contributors # # License: Standard 3-clause BSD; see "license.txt" for full license terms # and contributor agreement. """ Dataset testing operations. Tests all dataset operations, including creation, with the exception of: 1. Slicing operations for read and write, handled by module test_slicing 2. Type conversion for read and write (currently untested) """ import pathlib import sys import numpy as np import platform import pytest from .common import ut, TestCase from .data_files import get_data_file_path from h5py import File, Group, Dataset from h5py._hl.base import is_empty_dataspace from h5py import h5f, h5t import h5py import h5py._hl.selections as sel class BaseDataset(TestCase): def setUp(self): self.f = File(self.mktemp(), 'w') def tearDown(self): if self.f: self.f.close() class TestRepr(BaseDataset): """ Feature: repr(Dataset) behaves sensibly """ def test_repr_open(self): """ repr() works on live and dead datasets """ ds = self.f.create_dataset('foo', (4,)) self.assertIsInstance(repr(ds), str) self.f.close() self.assertIsInstance(repr(ds), str) class TestCreateShape(BaseDataset): """ Feature: Datasets can be created from a shape only """ def test_create_scalar(self): """ Create a scalar dataset """ dset = self.f.create_dataset('foo', ()) self.assertEqual(dset.shape, ()) def test_create_simple(self): """ Create a size-1 dataset """ dset = self.f.create_dataset('foo', (1,)) self.assertEqual(dset.shape, (1,)) def test_create_integer(self): """ Create a size-1 dataset with integer shape""" dset = self.f.create_dataset('foo', 1) self.assertEqual(dset.shape, (1,)) def test_create_extended(self): """ Create an extended dataset """ dset = self.f.create_dataset('foo', (63,)) self.assertEqual(dset.shape, (63,)) self.assertEqual(dset.size, 63) dset = self.f.create_dataset('bar', (6, 10)) self.assertEqual(dset.shape, (6, 10)) self.assertEqual(dset.size, (60)) def test_create_integer_extended(self): """ Create an extended dataset """ dset = self.f.create_dataset('foo', 63) self.assertEqual(dset.shape, (63,)) self.assertEqual(dset.size, 63) dset = self.f.create_dataset('bar', (6, 10)) self.assertEqual(dset.shape, (6, 10)) self.assertEqual(dset.size, (60)) def test_default_dtype(self): """ Confirm that the default dtype is float """ dset = self.f.create_dataset('foo', (63,)) self.assertEqual(dset.dtype, np.dtype('=f4')) def test_missing_shape(self): """ Missing shape raises TypeError """ with self.assertRaises(TypeError): self.f.create_dataset('foo') def test_long_double(self): """ Confirm that the default dtype is float """ dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble) if platform.machine() in ['ppc64le']: pytest.xfail("Storage of long double deactivated on %s" % platform.machine()) self.assertEqual(dset.dtype, np.longdouble) @ut.skipIf(not hasattr(np, "complex256"), "No support for complex256") def test_complex256(self): """ Confirm that the default dtype is float """ dset = self.f.create_dataset('foo', (63,), dtype=np.dtype('complex256')) self.assertEqual(dset.dtype, np.dtype('complex256')) def test_name_bytes(self): dset = self.f.create_dataset(b'foo', (1,)) self.assertEqual(dset.shape, (1,)) dset2 = self.f.create_dataset(b'bar/baz', (2,)) self.assertEqual(dset2.shape, (2,)) class TestCreateData(BaseDataset): """ Feature: Datasets can be created from existing data """ def test_create_scalar(self): """ Create a scalar dataset from existing array """ data = np.ones((), 'f') dset = self.f.create_dataset('foo', data=data) self.assertEqual(dset.shape, data.shape) def test_create_extended(self): """ Create an extended dataset from existing data """ data = np.ones((63,), 'f') dset = self.f.create_dataset('foo', data=data) self.assertEqual(dset.shape, data.shape) def test_dataset_intermediate_group(self): """ Create dataset with missing intermediate groups """ ds = self.f.create_dataset("/foo/bar/baz", shape=(10, 10), dtype='= (1, 10, 5), "chunk info requires HDF5 >= 1.10.5") def test_get_chunk_details(): from io import BytesIO buf = BytesIO() with h5py.File(buf, 'w') as fout: fout.create_dataset('test', shape=(100, 100), chunks=(10, 10), dtype='i4') fout['test'][:] = 1 buf.seek(0) with h5py.File(buf, 'r') as fin: ds = fin['test'].id assert ds.get_num_chunks() == 100 for j in range(100): offset = tuple(np.array(np.unravel_index(j, (10, 10))) * 10) si = ds.get_chunk_info(j) assert si.chunk_offset == offset assert si.filter_mask == 0 assert si.byte_offset is not None assert si.size > 0 si = ds.get_chunk_info_by_coord((0, 0)) assert si.chunk_offset == (0, 0) assert si.filter_mask == 0 assert si.byte_offset is not None assert si.size > 0 def test_empty_shape(writable_file): ds = writable_file.create_dataset('empty', dtype='int32') assert ds.shape is None assert ds.maxshape is None def test_zero_storage_size(): # https://github.com/h5py/h5py/issues/1475 from io import BytesIO buf = BytesIO() with h5py.File(buf, 'w') as fout: fout.create_dataset('empty', dtype='uint8') buf.seek(0) with h5py.File(buf, 'r') as fin: assert fin['empty'].chunks is None assert fin['empty'].id.get_offset() is None assert fin['empty'].id.get_storage_size() == 0 def test_python_int_uint64(writable_file): # https://github.com/h5py/h5py/issues/1547 data = [np.iinfo(np.int64).max, np.iinfo(np.int64).max + 1] # Check creating a new dataset ds = writable_file.create_dataset('x', data=data, dtype=np.uint64) assert ds.dtype == np.dtype(np.uint64) np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64)) # Check writing to an existing dataset ds[:] = data np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64)) def test_setitem_fancy_indexing(writable_file): # https://github.com/h5py/h5py/issues/1593 arr = writable_file.create_dataset('data', (5, 1000, 2), dtype=np.uint8) block = np.random.randint(255, size=(5, 3, 2)) arr[:, [0, 2, 4], ...] = block def test_vlen_spacepad(): with File(get_data_file_path("vlen_string_dset.h5")) as f: assert f["DS1"][0] == b"Parting" def test_vlen_nullterm(): with File(get_data_file_path("vlen_string_dset_utc.h5")) as f: assert f["ds1"][0] == b"2009-12-20T10:16:18.662409Z" @pytest.mark.skipif( h5py.version.hdf5_version_tuple < (1, 10, 3), reason="Appears you cannot pass an unknown filter id for HDF5 < 1.10.3" ) def test_allow_unknown_filter(writable_file): # apparently 256-511 are reserved for testing purposes fake_filter_id = 256 ds = writable_file.create_dataset( 'data', shape=(10, 10), dtype=np.uint8, compression=fake_filter_id, allow_unknown_filter=True ) assert str(fake_filter_id) in ds._filters