"""
Record Arrays
=============
Record arrays expose the fields of structured arrays as properties.
Most commonly, ndarrays contain elements of a single type, e.g. floats,
integers, bools etc. However, it is possible for elements to be combinations
of these using structured types, such as::
>>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
>>> a
array([(1, 2.), (1, 2.)], dtype=[('x', '>> a['x']
array([1, 1])
>>> a['y']
array([2., 2.])
Record arrays allow us to access fields as properties::
>>> ar = np.rec.array(a)
>>> ar.x
array([1, 1])
>>> ar.y
array([2., 2.])
"""
import warnings
from collections import Counter
from contextlib import nullcontext
from .._utils import set_module
from . import numeric as sb
from . import numerictypes as nt
from numpy.compat import os_fspath
from .arrayprint import _get_legacy_print_mode
# All of the functions allow formats to be a dtype
__all__ = [
'record', 'recarray', 'format_parser',
'fromarrays', 'fromrecords', 'fromstring', 'fromfile', 'array',
]
ndarray = sb.ndarray
_byteorderconv = {'b':'>',
'l':'<',
'n':'=',
'B':'>',
'L':'<',
'N':'=',
'S':'s',
's':'s',
'>':'>',
'<':'<',
'=':'=',
'|':'|',
'I':'|',
'i':'|'}
# formats regular expression
# allows multidimensional spec with a tuple syntax in front
# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
# are equally allowed
numfmt = nt.sctypeDict
def find_duplicate(list):
"""Find duplication in a list, return a list of duplicated elements"""
return [
item
for item, counts in Counter(list).items()
if counts > 1
]
@set_module('numpy')
class format_parser:
"""
Class to convert formats, names, titles description to a dtype.
After constructing the format_parser object, the dtype attribute is
the converted data-type:
``dtype = format_parser(formats, names, titles).dtype``
Attributes
----------
dtype : dtype
The converted data-type.
Parameters
----------
formats : str or list of str
The format description, either specified as a string with
comma-separated format descriptions in the form ``'f8, i4, a5'``, or
a list of format description strings in the form
``['f8', 'i4', 'a5']``.
names : str or list/tuple of str
The field names, either specified as a comma-separated string in the
form ``'col1, col2, col3'``, or as a list or tuple of strings in the
form ``['col1', 'col2', 'col3']``.
An empty list can be used, in that case default field names
('f0', 'f1', ...) are used.
titles : sequence
Sequence of title strings. An empty list can be used to leave titles
out.
aligned : bool, optional
If True, align the fields by padding as the C-compiler would.
Default is False.
byteorder : str, optional
If specified, all the fields will be changed to the
provided byte-order. Otherwise, the default byte-order is
used. For all available string specifiers, see `dtype.newbyteorder`.
See Also
--------
dtype, typename, sctype2char
Examples
--------
>>> np.format_parser(['>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
... []).dtype
dtype([('col1', '>> np.format_parser([' len(titles):
self._titles += [None] * (self._nfields - len(titles))
def _createdtype(self, byteorder):
dtype = sb.dtype({
'names': self._names,
'formats': self._f_formats,
'offsets': self._offsets,
'titles': self._titles,
})
if byteorder is not None:
byteorder = _byteorderconv[byteorder[0]]
dtype = dtype.newbyteorder(byteorder)
self.dtype = dtype
class record(nt.void):
"""A data-type scalar that allows field access as attribute lookup.
"""
# manually set name and module so that this class's type shows up
# as numpy.record when printed
__name__ = 'record'
__module__ = 'numpy'
def __repr__(self):
if _get_legacy_print_mode() <= 113:
return self.__str__()
return super().__repr__()
def __str__(self):
if _get_legacy_print_mode() <= 113:
return str(self.item())
return super().__str__()
def __getattribute__(self, attr):
if attr in ('setfield', 'getfield', 'dtype'):
return nt.void.__getattribute__(self, attr)
try:
return nt.void.__getattribute__(self, attr)
except AttributeError:
pass
fielddict = nt.void.__getattribute__(self, 'dtype').fields
res = fielddict.get(attr, None)
if res:
obj = self.getfield(*res[:2])
# if it has fields return a record,
# otherwise return the object
try:
dt = obj.dtype
except AttributeError:
#happens if field is Object type
return obj
if dt.names is not None:
return obj.view((self.__class__, obj.dtype))
return obj
else:
raise AttributeError("'record' object has no "
"attribute '%s'" % attr)
def __setattr__(self, attr, val):
if attr in ('setfield', 'getfield', 'dtype'):
raise AttributeError("Cannot set '%s' attribute" % attr)
fielddict = nt.void.__getattribute__(self, 'dtype').fields
res = fielddict.get(attr, None)
if res:
return self.setfield(val, *res[:2])
else:
if getattr(self, attr, None):
return nt.void.__setattr__(self, attr, val)
else:
raise AttributeError("'record' object has no "
"attribute '%s'" % attr)
def __getitem__(self, indx):
obj = nt.void.__getitem__(self, indx)
# copy behavior of record.__getattribute__,
if isinstance(obj, nt.void) and obj.dtype.names is not None:
return obj.view((self.__class__, obj.dtype))
else:
# return a single element
return obj
def pprint(self):
"""Pretty-print all fields."""
# pretty-print all fields
names = self.dtype.names
maxlen = max(len(name) for name in names)
fmt = '%% %ds: %%s' % maxlen
rows = [fmt % (name, getattr(self, name)) for name in names]
return "\n".join(rows)
# The recarray is almost identical to a standard array (which supports
# named fields already) The biggest difference is that it can use
# attribute-lookup to find the fields and it is constructed using
# a record.
# If byteorder is given it forces a particular byteorder on all
# the fields (and any subfields)
class recarray(ndarray):
"""Construct an ndarray that allows field access using attributes.
Arrays may have a data-types containing fields, analogous
to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
where each entry in the array is a pair of ``(int, float)``. Normally,
these attributes are accessed using dictionary lookups such as ``arr['x']``
and ``arr['y']``. Record arrays allow the fields to be accessed as members
of the array, using ``arr.x`` and ``arr.y``.
Parameters
----------
shape : tuple
Shape of output array.
dtype : data-type, optional
The desired data-type. By default, the data-type is determined
from `formats`, `names`, `titles`, `aligned` and `byteorder`.
formats : list of data-types, optional
A list containing the data-types for the different columns, e.g.
``['i4', 'f8', 'i4']``. `formats` does *not* support the new
convention of using types directly, i.e. ``(int, float, int)``.
Note that `formats` must be a list, not a tuple.
Given that `formats` is somewhat limited, we recommend specifying
`dtype` instead.
names : tuple of str, optional
The name of each column, e.g. ``('x', 'y', 'z')``.
buf : buffer, optional
By default, a new array is created of the given shape and data-type.
If `buf` is specified and is an object exposing the buffer interface,
the array will use the memory from the existing buffer. In this case,
the `offset` and `strides` keywords are available.
Other Parameters
----------------
titles : tuple of str, optional
Aliases for column names. For example, if `names` were
``('x', 'y', 'z')`` and `titles` is
``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
byteorder : {'<', '>', '='}, optional
Byte-order for all fields.
aligned : bool, optional
Align the fields in memory as the C-compiler would.
strides : tuple of ints, optional
Buffer (`buf`) is interpreted according to these strides (strides
define how many bytes each array element, row, column, etc.
occupy in memory).
offset : int, optional
Start reading buffer (`buf`) from this offset onwards.
order : {'C', 'F'}, optional
Row-major (C-style) or column-major (Fortran-style) order.
Returns
-------
rec : recarray
Empty array of the given shape and type.
See Also
--------
core.records.fromrecords : Construct a record array from data.
record : fundamental data-type for `recarray`.
format_parser : determine a data-type from formats, names, titles.
Notes
-----
This constructor can be compared to ``empty``: it creates a new record
array but does not fill it with data. To create a record array from data,
use one of the following methods:
1. Create a standard ndarray and convert it to a record array,
using ``arr.view(np.recarray)``
2. Use the `buf` keyword.
3. Use `np.rec.fromrecords`.
Examples
--------
Create an array with two fields, ``x`` and ``y``:
>>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '>> x
array([(1., 2), (3., 4)], dtype=[('x', '>> x['x']
array([1., 3.])
View the array as a record array:
>>> x = x.view(np.recarray)
>>> x.x
array([1., 3.])
>>> x.y
array([2, 4])
Create a new, empty record array:
>>> np.recarray((2,),
... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
(3471280, 1.2134086255804012e-316, 0)],
dtype=[('x', ' 0 or self.shape == (0,):
lst = sb.array2string(
self, separator=', ', prefix=prefix, suffix=',')
else:
# show zero-length shape unless it is (0,)
lst = "[], shape=%s" % (repr(self.shape),)
lf = '\n'+' '*len(prefix)
if _get_legacy_print_mode() <= 113:
lf = ' ' + lf # trailing space
return fmt % (lst, lf, repr_dtype)
def field(self, attr, val=None):
if isinstance(attr, int):
names = ndarray.__getattribute__(self, 'dtype').names
attr = names[attr]
fielddict = ndarray.__getattribute__(self, 'dtype').fields
res = fielddict[attr][:2]
if val is None:
obj = self.getfield(*res)
if obj.dtype.names is not None:
return obj
return obj.view(ndarray)
else:
return self.setfield(val, *res)
def _deprecate_shape_0_as_None(shape):
if shape == 0:
warnings.warn(
"Passing `shape=0` to have the shape be inferred is deprecated, "
"and in future will be equivalent to `shape=(0,)`. To infer "
"the shape and suppress this warning, pass `shape=None` instead.",
FutureWarning, stacklevel=3)
return None
else:
return shape
@set_module("numpy.rec")
def fromarrays(arrayList, dtype=None, shape=None, formats=None,
names=None, titles=None, aligned=False, byteorder=None):
"""Create a record array from a (flat) list of arrays
Parameters
----------
arrayList : list or tuple
List of array-like objects (such as lists, tuples,
and ndarrays).
dtype : data-type, optional
valid dtype for all arrays
shape : int or tuple of ints, optional
Shape of the resulting array. If not provided, inferred from
``arrayList[0]``.
formats, names, titles, aligned, byteorder :
If `dtype` is ``None``, these arguments are passed to
`numpy.format_parser` to construct a dtype. See that function for
detailed documentation.
Returns
-------
np.recarray
Record array consisting of given arrayList columns.
Examples
--------
>>> x1=np.array([1,2,3,4])
>>> x2=np.array(['a','dd','xyz','12'])
>>> x3=np.array([1.1,2,3,4])
>>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
>>> print(r[1])
(2, 'dd', 2.0) # may vary
>>> x1[1]=34
>>> r.a
array([1, 2, 3, 4])
>>> x1 = np.array([1, 2, 3, 4])
>>> x2 = np.array(['a', 'dd', 'xyz', '12'])
>>> x3 = np.array([1.1, 2, 3,4])
>>> r = np.core.records.fromarrays(
... [x1, x2, x3],
... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
>>> r
rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
(4, b'12', 4. )],
dtype=[('a', ' 0:
shape = shape[:-nn]
_array = recarray(shape, descr)
# populate the record array (makes a copy)
for k, obj in enumerate(arrayList):
nn = descr[k].ndim
testshape = obj.shape[:obj.ndim - nn]
name = _names[k]
if testshape != shape:
raise ValueError(f'array-shape mismatch in array {k} ("{name}")')
_array[name] = obj
return _array
@set_module("numpy.rec")
def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
titles=None, aligned=False, byteorder=None):
"""Create a recarray from a list of records in text form.
Parameters
----------
recList : sequence
data in the same field may be heterogeneous - they will be promoted
to the highest data type.
dtype : data-type, optional
valid dtype for all arrays
shape : int or tuple of ints, optional
shape of each array.
formats, names, titles, aligned, byteorder :
If `dtype` is ``None``, these arguments are passed to
`numpy.format_parser` to construct a dtype. See that function for
detailed documentation.
If both `formats` and `dtype` are None, then this will auto-detect
formats. Use list of tuples rather than list of lists for faster
processing.
Returns
-------
np.recarray
record array consisting of given recList rows.
Examples
--------
>>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
... names='col1,col2,col3')
>>> print(r[0])
(456, 'dbe', 1.2)
>>> r.col1
array([456, 2])
>>> r.col2
array(['dbe', 'de'], dtype='>> import pickle
>>> pickle.loads(pickle.dumps(r))
rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
dtype=[('col1', ' 1:
raise ValueError("Can only deal with 1-d array.")
_array = recarray(shape, descr)
for k in range(_array.size):
_array[k] = tuple(recList[k])
# list of lists instead of list of tuples ?
# 2018-02-07, 1.14.1
warnings.warn(
"fromrecords expected a list of tuples, may have received a list "
"of lists instead. In the future that will raise an error",
FutureWarning, stacklevel=2)
return _array
else:
if shape is not None and retval.shape != shape:
retval.shape = shape
res = retval.view(recarray)
return res
@set_module("numpy.rec")
def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
names=None, titles=None, aligned=False, byteorder=None):
r"""Create a record array from binary data
Note that despite the name of this function it does not accept `str`
instances.
Parameters
----------
datastring : bytes-like
Buffer of binary data
dtype : data-type, optional
Valid dtype for all arrays
shape : int or tuple of ints, optional
Shape of each array.
offset : int, optional
Position in the buffer to start reading from.
formats, names, titles, aligned, byteorder :
If `dtype` is ``None``, these arguments are passed to
`numpy.format_parser` to construct a dtype. See that function for
detailed documentation.
Returns
-------
np.recarray
Record array view into the data in datastring. This will be readonly
if `datastring` is readonly.
See Also
--------
numpy.frombuffer
Examples
--------
>>> a = b'\x01\x02\x03abc'
>>> np.core.records.fromstring(a, dtype='u1,u1,u1,S3')
rec.array([(1, 2, 3, b'abc')],
dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
>>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
... ('GradeLevel', np.int32)]
>>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
... ('Aadi', 66.6, 6)], dtype=grades_dtype)
>>> np.core.records.fromstring(grades_array.tobytes(), dtype=grades_dtype)
rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
dtype=[('Name', '>> s = '\x01\x02\x03abc'
>>> np.core.records.fromstring(s, dtype='u1,u1,u1,S3')
Traceback (most recent call last)
...
TypeError: a bytes-like object is required, not 'str'
"""
if dtype is None and formats is None:
raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
if dtype is not None:
descr = sb.dtype(dtype)
else:
descr = format_parser(formats, names, titles, aligned, byteorder).dtype
itemsize = descr.itemsize
# NumPy 1.19.0, 2020-01-01
shape = _deprecate_shape_0_as_None(shape)
if shape in (None, -1):
shape = (len(datastring) - offset) // itemsize
_array = recarray(shape, descr, buf=datastring, offset=offset)
return _array
def get_remaining_size(fd):
pos = fd.tell()
try:
fd.seek(0, 2)
return fd.tell() - pos
finally:
fd.seek(pos, 0)
@set_module("numpy.rec")
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
names=None, titles=None, aligned=False, byteorder=None):
"""Create an array from binary file data
Parameters
----------
fd : str or file type
If file is a string or a path-like object then that file is opened,
else it is assumed to be a file object. The file object must
support random access (i.e. it must have tell and seek methods).
dtype : data-type, optional
valid dtype for all arrays
shape : int or tuple of ints, optional
shape of each array.
offset : int, optional
Position in the file to start reading from.
formats, names, titles, aligned, byteorder :
If `dtype` is ``None``, these arguments are passed to
`numpy.format_parser` to construct a dtype. See that function for
detailed documentation
Returns
-------
np.recarray
record array consisting of data enclosed in file.
Examples
--------
>>> from tempfile import TemporaryFile
>>> a = np.empty(10,dtype='f8,i4,a5')
>>> a[5] = (0.5,10,'abcde')
>>>
>>> fd=TemporaryFile()
>>> a = a.newbyteorder('<')
>>> a.tofile(fd)
>>>
>>> _ = fd.seek(0)
>>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
... byteorder='<')
>>> print(r[5])
(0.5, 10, 'abcde')
>>> r.shape
(10,)
"""
if dtype is None and formats is None:
raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
# NumPy 1.19.0, 2020-01-01
shape = _deprecate_shape_0_as_None(shape)
if shape is None:
shape = (-1,)
elif isinstance(shape, int):
shape = (shape,)
if hasattr(fd, 'readinto'):
# GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface.
# Example of fd: gzip, BytesIO, BufferedReader
# file already opened
ctx = nullcontext(fd)
else:
# open file
ctx = open(os_fspath(fd), 'rb')
with ctx as fd:
if offset > 0:
fd.seek(offset, 1)
size = get_remaining_size(fd)
if dtype is not None:
descr = sb.dtype(dtype)
else:
descr = format_parser(formats, names, titles, aligned, byteorder).dtype
itemsize = descr.itemsize
shapeprod = sb.array(shape).prod(dtype=nt.intp)
shapesize = shapeprod * itemsize
if shapesize < 0:
shape = list(shape)
shape[shape.index(-1)] = size // -shapesize
shape = tuple(shape)
shapeprod = sb.array(shape).prod(dtype=nt.intp)
nbytes = shapeprod * itemsize
if nbytes > size:
raise ValueError(
"Not enough bytes left in file for specified shape and type")
# create the array
_array = recarray(shape, descr)
nbytesread = fd.readinto(_array.data)
if nbytesread != nbytes:
raise OSError("Didn't read as many bytes as expected")
return _array
@set_module("numpy.rec")
def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
names=None, titles=None, aligned=False, byteorder=None, copy=True):
"""
Construct a record array from a wide-variety of objects.
A general-purpose record array constructor that dispatches to the
appropriate `recarray` creation function based on the inputs (see Notes).
Parameters
----------
obj : any
Input object. See Notes for details on how various input types are
treated.
dtype : data-type, optional
Valid dtype for array.
shape : int or tuple of ints, optional
Shape of each array.
offset : int, optional
Position in the file or buffer to start reading from.
strides : tuple of ints, optional
Buffer (`buf`) is interpreted according to these strides (strides
define how many bytes each array element, row, column, etc.
occupy in memory).
formats, names, titles, aligned, byteorder :
If `dtype` is ``None``, these arguments are passed to
`numpy.format_parser` to construct a dtype. See that function for
detailed documentation.
copy : bool, optional
Whether to copy the input object (True), or to use a reference instead.
This option only applies when the input is an ndarray or recarray.
Defaults to True.
Returns
-------
np.recarray
Record array created from the specified object.
Notes
-----
If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
`obj` is a string, then call the `fromstring` constructor. If `obj` is a
list or a tuple, then if the first object is an `~numpy.ndarray`, call
`fromarrays`, otherwise call `fromrecords`. If `obj` is a
`~numpy.recarray`, then make a copy of the data in the recarray
(if ``copy=True``) and use the new formats, names, and titles. If `obj`
is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
Examples
--------
>>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> np.core.records.array(a)
rec.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
dtype=int32)
>>> b = [(1, 1), (2, 4), (3, 9)]
>>> c = np.core.records.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
>>> c
rec.array([(1, 1.0), (2, 4.0), (3, 9.0)],
dtype=[('x', '>> c.x
rec.array([1, 2, 3], dtype=int16)
>>> c.y
rec.array([ 1.0, 4.0, 9.0], dtype=float16)
>>> r = np.rec.array(['abc','def'], names=['col1','col2'])
>>> print(r.col1)
abc
>>> r.col1
array('abc', dtype='>> r.col2
array('def', dtype='