Source code for dios.lib

import pandas as pd
import warnings


[docs]class ItypeWarning(RuntimeWarning): pass
[docs]class ItypeCastWarning(ItypeWarning): pass
[docs]class ItypeCastError(RuntimeError): pass
class __Itype: def __init__(self): raise RuntimeError("a Itype class does not allow instances of itself.")
[docs]class DtItype(__Itype): name = 'datetime' unique = True subtypes = (pd.DatetimeIndex,) min_pdindex = pd.DatetimeIndex([])
[docs]class IntItype(__Itype): name = 'integer' unique = True subtypes = (pd.RangeIndex, pd.Int64Index, pd.UInt64Index, int) min_pdindex = pd.Int64Index([])
[docs]class FloatItype(__Itype): name = 'float' subtypes = (pd.Float64Index, float) unique = True min_pdindex = pd.Float64Index([])
# class MultiItype(__Itype): # name = "multi" # subtypes = (pd.MultiIndex, ) # unique = ??
[docs]class NumItype(__Itype): name = "numeric" _subitypes = (IntItype, FloatItype) subtypes = (_subitypes + IntItype.subtypes + FloatItype.subtypes) unique = False min_pdindex = pd.Float64Index([])
[docs]class ObjItype(__Itype): name = "object" unique = False _subitypes = (DtItype, IntItype, FloatItype, NumItype, str) _otheritypes = (pd.CategoricalIndex, pd.IntervalIndex, pd.PeriodIndex, pd.TimedeltaIndex, pd.Index) subtypes = (_subitypes + _otheritypes + DtItype.subtypes + NumItype.subtypes) min_pdindex = pd.Index([])
[docs]def is_itype(obj, itype): """ Check if obj is a instance of the given itype or its str-alias was given""" # todo: iter through itype as it could be a tuple, if called like ``is_itype(o, (t1,t2))`` # user gave a Itype, like ``DtItype`` if type(obj) == type and issubclass(obj, itype): return True # user gave a string, like 'datetime' if isinstance(obj, str) and obj == itype.name: return True return False
[docs]def is_itype_subtype(obj, itype): """ Check if obj is a subclass or a instance of a subclass of the given itype""" # user gave a subtype, like ``pd.DatetimeIndex`` if type(obj) == type and issubclass(obj, itype.subtypes): return True # user gave a instance of a subtype, like ``pd.Series(..).index`` if isinstance(obj, itype.subtypes): return True return False
[docs]def is_itype_like(obj, itype): """ Check if obj is a subclass or a instance of the given itype or any of its subtypes""" return is_itype(obj, itype) or is_itype_subtype(obj, itype)
[docs]def get_itype(obj): """ Return the according Itype. and return the according Itype Parameters ---------- obj : {itype string, Itype, pandas.Index, instance of pd.index} get the itype fitting for the input Examples -------- >>> get_itype("datetime") <class 'dios.lib.DtItype'> >>> s = pd.Series(index=pd.to_datetime([])) >>> get_itype(s.index) <class 'dios.lib.DtItype'> >>> get_itype(DtItype) <class 'dios.lib.DtItype'> >>> get_itype(pd.DatetimeIndex) <class 'dios.lib.DtItype'> """ if type(obj) == type and issubclass(obj, __Itype): return obj # check if it is the actual type, not a subtype types = [DtItype, IntItype, FloatItype, NumItype, ObjItype] for t in types: if is_itype(obj, t): return t for t in types: if is_itype_subtype(obj, t): return t raise ValueError(f"{obj} is not a itype, nor any known subtype of a itype, nor a itype string alias")
def _itype_eq(a, b): return is_itype(a, b) def _itype_lt(a, b): return is_itype_subtype(a, b) def _itype_le(a, b): return is_itype_like(a, b) def _find_least_common_itype(iterable_of_series): itypes = [NumItype, FloatItype, IntItype, DtItype] tlist = [get_itype(s.index) for s in iterable_of_series] found = ObjItype if tlist: for itype in itypes: for t in tlist: if _itype_le(t, itype): continue break else: found = itype return found ################################################################################ # Casting
[docs]class CastPolicy: force = 'force' save = 'save' never = 'never'
_CAST_POLICIES = [CastPolicy.force, CastPolicy.save, CastPolicy.never]
[docs]def cast_to_itype(series, itype, policy='lossless', err='raise', inplace=False): """ Cast a series (more explicit the type of the index) to fit the itype of a dios. Return the casted series if successful, None otherwise. Note: This is very basic number-casting, so in most cases, information from the old index will be lost after the cast. """ if policy not in _CAST_POLICIES: raise ValueError(f"policy={policy}") if err not in ['raise', 'ignore']: raise ValueError(f"err={err}") if not inplace: series = series.copy() itype = get_itype(itype) if series.empty: return pd.Series(index=itype.min_pdindex, dtype=series.dtype) series.itype = get_itype(series.index) # up-cast issn't necessary because a dios with a higher # itype always can take lower itypes. # series can have dt/int/float/mixed # dt -> dt -> mixed # int -> int -> num -> mixed # float -> float -> num -> mixed # mixed -> mixed if _itype_le(series.itype, itype): # a <= b return series e = f"A series index of type '{type(series.index)}' cannot be casted to Itype '{itype.name}'" # cast any -> dt always fail. if is_itype(itype, DtItype): pass else: e += f", as forbidden by the cast-policy '{policy}'." if policy == CastPolicy.never: pass elif policy == CastPolicy.force: # cast any (dt/float/mixed) -> int if is_itype(itype, IntItype): # a == b series.index = pd.RangeIndex(len(series)) return series # cast any (dt/int/mixed) -> float # cast any (dt/float/mixed) -> nur if is_itype(itype, FloatItype) or is_itype(itype, NumItype): # a == b or a == c series.index = pd.Float64Index(range(len(series))) return series elif policy == CastPolicy.save: # cast int -> float if is_itype(itype, IntItype) and is_itype(series.itype, FloatItype): # a == b and c == d series.index = series.index.astype(float) return series # cast float -> int, maybe if unique if is_itype(itype, FloatItype) and is_itype(series.itype, IntItype): # a == b and c == d series.index = series.index.astype(int) if series.index.is_unique: return series e = f"The cast with policy {policy} from series index type '{type(series.index)}' to " \ f"itype {itype.name} resulted in a non-unique index." # cast mixed -> int/float always fail if err == 'raise': raise ItypeCastError(e) else: return None
################################################################################ # OPTIONS
[docs]class OptsFields: """storage class for the keys in `dios_options` Use like so: ``dios_options[OptsFields.X] = Opts.Y``. See Also -------- Opts: values for the options dict dios_options: options dict for module """ mixed_itype_warn_policy = "mixed_itype_policy" disp_max_rows = "disp_max_rows " disp_min_rows = "disp_min_rows " disp_max_cols = "disp_max_vars" dios_repr = 'dios_repr'
[docs]class Opts: """storage class for string values for `dios_options` Use like so: ``dios_options[OptsFields.X] = Opts.Y``. See Also -------- OptsFields: keys for the options dict dios_options: options dict for module """ itype_warn = 'warn' itype_err = 'err' itype_ignore = 'ignore' repr_aligned = 'aligned' repr_indexed = 'indexed'
class __DocDummy(dict): pass dios_options = __DocDummy() dios_options.update(**{ OptsFields.disp_max_rows: 60, OptsFields.disp_min_rows: 10, OptsFields.disp_max_cols: 10, OptsFields.mixed_itype_warn_policy: Opts.itype_warn, OptsFields.dios_repr: Opts.repr_indexed, }) opdoc = f"""Options dictionary for module `dios`. Use like so: ``dios_options[OptsFields.X] = Opts.Y``. **Items**: * {OptsFields.dios_repr}: {{'indexed', 'aligned'}} default: 'indexed' dios default representation if: * `indexed`: show every column with its index * `aligned`: transform to pandas.DataFrame with indexed merged together. * {OptsFields.disp_max_rows} : int Maximum numbers of row before truncated to `disp_min_rows` in representation of DictOfSeries * {OptsFields.disp_min_rows} : int min rows to display if `max_rows` is exceeded * {OptsFields.disp_max_cols} : int Maximum numbers of columns before truncated representation * {OptsFields.mixed_itype_warn_policy} : {{'warn', 'err', 'ignore'}} How to inform user about mixed Itype See Also -------- OptsFields: keys for the options dict Opts: values for the options dict """ dios_options.__doc__ = opdoc def _throw_MixedItype_err_or_warn(itype): msg = f"Using '{itype.name}' as itype is not recommend. " \ f"As soon as series with different index types are inserted,\n" \ f"indexing and slicing will almost always fail. " if dios_options[OptsFields.mixed_itype_warn_policy] in ['ignore', Opts.itype_ignore]: pass elif dios_options[OptsFields.mixed_itype_warn_policy] in ['error', 'err', Opts.itype_err]: msg += "Suppress this error by specifying an unitary 'itype' or giving an 'index' to DictOfSeries." raise ItypeCastError(msg) else: msg += "Silence this warning by specifying an unitary 'itype' or giving an 'index' to DictOfSeries." warnings.warn(msg, ItypeWarning) return
[docs]def example_DictOfSeries(): """ Return a example dios. Returns ------- DictOfSeries: an example Examples -------- >>> from dios import example_DictOfSeries >>> di = example_DictOfSeries() >>> di a | b | c | d | ===== | ====== | ====== | ===== | 0 0 | 2 5 | 4 7 | 6 0 | 1 7 | 3 6 | 5 17 | 7 1 | 2 14 | 4 7 | 6 27 | 8 2 | 3 21 | 5 8 | 7 37 | 9 3 | 4 28 | 6 9 | 8 47 | 10 4 | 5 35 | 7 10 | 9 57 | 11 5 | 6 42 | 8 11 | 10 67 | 12 6 | 7 49 | 9 12 | 11 77 | 13 7 | 8 56 | 10 13 | 12 87 | 14 8 | 9 63 | 11 14 | 13 97 | 15 9 | """ from dios import DictOfSeries a = pd.Series(range(0, 70, 7)) b = pd.Series(range(5, 15, 1)) c = pd.Series(range(7, 107, 10)) d = pd.Series(range(0, 10, 1)) for i, s in enumerate([a, b, c, d]): s.index += i * 2 di = DictOfSeries(dict(a=a, b=b, c=c, d=d)) return di.copy()