Source code for saqc.core.frame

#! /usr/bin/env python
# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
# SPDX-License-Identifier: GPL-3.0-or-later
# -*- coding: utf-8 -*-
from __future__ import annotations

from typing import Any, Hashable, Mapping

import pandas as pd
from fancy_collections import DictOfPandas



[docs]
class DictOfSeries(DictOfPandas):
    _key_types = (str, int, float, tuple)
    _value_types = (pd.Series, pd.DataFrame)

    def __init__(self, *args, **kwargs):
        # data is needed to prevent an
        # AttributeError on repr during
        # Errors within __init__
        self.data = {}
        self._attrs = None
        super().__init__(*args, **kwargs)

    @property
    def attrs(self) -> dict[Hashable, Any]:
        """
        Dictionary of global attributes of this dataset.
        """
        if self._attrs is None:
            self._attrs = {}
        return self._attrs

    @attrs.setter
    def attrs(self, value: Mapping[Hashable, Any]) -> None:
        self._attrs = dict(value)


[docs]
    def astype(self, dtype: str | type) -> DictOfSeries:
        """
        Cast a DictOfSeries object to the specified ``dtype``

        Parameters
        ----------
        dtype: data type to cast the entire object to.

        Returns
        -------
        DictOfSeries
        """
        out = DictOfSeries()
        for k, v in self.data.items():
            out[k] = v.astype(dtype)
        return out




DictOfSeries.empty.__doc__ = """
Indicator whether DictOfSeries is empty.

True if DictOfSeries is entirely empty (no items) or all
items are empty themselves.

Notes
-----
To only check if DictOfSeries has no items use ``len`` or ``bool``
buildins.

Examples
--------
>>> from saqc import DictOfSeries
>>> di1 = DictOfSeries()
>>> di1.empty
True

A DictOfSeries is also considered empty if all items within it are empty

>>> di2 = DictOfSeries(a=pd.Series(dtype=float), b=pd.Series(dtype='O'))
>>> assert di2['a'].empty and di2['b'].empty
>>> di2.empty
True

To differentiate between a DictOfSeries with no items and a
DictOfSeries with empty items use the buildin functions
`len` or `bool`

>>> len(di1)
0
>>> bool(di1)
False
>>> len(di2)
2
>>> bool(di2)
True

Returns
-------
bool
"""

DictOfSeries.to_pandas.__doc__ = """
Transform DictOfSeries to a pandas.DataFrame.

Because a pandas.DataFrame can not handle data of different
length, but DictOfSeries can, the missing data is filled with
NaNs or is dropped, depending on the keyword `how`.

Parameters
----------
how : {'outer', 'inner'}, default 'outer'
    Defines how the resulting DataFrame index is generated.

    - ``outer`` : The resulting DataFrame index is the combination
        of all indices merged together. If a column misses values at
        new index locations, `NaN`'s are filled.
    - ``inner`` : Only indices that are present in all columns are used
        for the resulting index. Filling logic is not needed, but values
        are dropped, if a column has indices that are not known to all
        other columns.

Returns
-------
frame: pandas.DataFrame

Examples
--------
Missing data locations are filled with NaN's

>>> from saqc import DictOfSeries
>>> a = pd.Series(11, index=range(2))
>>> b = pd.Series(22, index=range(3))
>>> c = pd.Series(33, index=range(1,9,3))
>>> di = DictOfSeries(a=a, b=b, c=c)
>>> di   # doctest: +NORMALIZE_WHITESPACE
    a |     b |     c |
===== | ===== | ===== |
0  11 | 0  22 | 1  33 |
1  11 | 1  22 | 4  33 |
      | 2  22 | 7  33 |

>>> di.to_pandas()   # doctest: +NORMALIZE_WHITESPACE
      a     b     c
0  11.0  22.0   NaN
1  11.0  22.0  33.0
2   NaN  22.0   NaN
4   NaN   NaN  33.0
7   NaN   NaN  33.0

or is dropped if `how='inner'`

>>> di.to_pandas(how='inner')   # doctest: +NORMALIZE_WHITESPACE
    a   b   c
1  11  22  33
"""