Source code for saqc.core.translation.floatscheme

#! /usr/bin/env python

# SPDX-FileCopyrightText: 2021 Helmholtz-Zentrum für Umweltforschung GmbH - UFZ
#
# SPDX-License-Identifier: GPL-3.0-or-later

# -*- coding: utf-8 -*-

from __future__ import annotations

import numpy as np
import pandas as pd

from saqc.constants import FILTER_ALL, UNFLAGGED
from saqc.core.flags import Flags
from saqc.core.frame import DictOfSeries
from saqc.core.history import History
from saqc.core.translation.basescheme import TranslationScheme


[docs] class FloatScheme(TranslationScheme): """ Acts as the default Translator, provides a changeable subset of the internal float flags """ DFILTER_DEFAULT: float = FILTER_ALL
[docs] def __call__(self, flag: float | int) -> float: try: return float(flag) except (TypeError, ValueError, OverflowError): raise ValueError(f"invalid flag, expected a numerical value, got: {flag}")
[docs] def toInternal(self, flags: pd.DataFrame | DictOfSeries) -> Flags: try: return Flags(flags.astype(float)) except (TypeError, ValueError, OverflowError): raise ValueError( f"invalid flag(s), expected a collection of numerical values, got: {flags}" )
[docs] def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries: out = DictOfSeries(flags) out.attrs = attrs or {} return out
[docs] class AnnotatedFloatScheme(FloatScheme):
[docs] def toExternal(self, flags: Flags, attrs: dict | None = None) -> DictOfSeries: tflags = super().toExternal(flags, attrs=attrs) out = DictOfSeries() for field in tflags.columns: df = pd.DataFrame( { "flag": tflags[field], "func": "", "parameters": "", } ) history = flags.history[field] for col in history.columns: valid = (history.hist[col] != UNFLAGGED) & history.hist[col].notna() meta = history.meta[col] df.loc[valid, "func"] = meta["func"] df.loc[valid, "parameters"] = str(meta["kwargs"]) out[field] = df return out
[docs] def toInternal(self, flags: DictOfSeries) -> Flags: data = {} for key, frame in flags.items(): history = History(index=frame.index) for (flag, func, kwargs), values in frame.groupby( ["flag", "func", "parameters"] ): column = pd.Series(np.nan, index=frame.index) column.loc[values.index] = self(flag) history.append(column, meta={"func": func, "kwargs": kwargs}) data[key] = history return Flags(data)