Source code for felicien.felits

#!/usr/bin/env python
# -*- coding: utf8 -*-

import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt


[docs] class FeliTS: """A Timeserie of a Prometheus metric This is a metric representation as returned by the Prometheus API. It includes the metric definition, and the data as a pandas Series. see official documentation: https://prometheus.io/docs/prometheus/latest/querying/api/#expression-query-result-formats Attributes: - name: A string with the name of the metric - labels: A dict of labels of the metric - data: A pandas.Series with the timeserie """
[docs] def __init__( self, from_prom: dict = None, name: str = None, labels: dict = {}, values: pd.Series = None, ) -> None: """Initializes the instance based on the data from Prometheus API Args: from_prom (dict, optional): Query result data from Prometheus API. name (str, optional): Name of the metric labels (dict, optional): Labels of the metric values (pandas Series, optional): Values and their timestamp of the timeserie as the Index Raises: AttributeError if the metric has no __name__ AttributeError if the metric has no value (or values) ValueError if the value list is empty ValueError if a item of the value list hasn't the right format: [timestamp, metric_value] AttributeError if neither an output from Prometheus API nor raw data are passed to the constructor """ if from_prom is not None: # Construct from Prometheus API output self.name = from_prom.get("metric", {}).get("__name__", "") if self.name == "": raise AttributeError("missing metric __name__") self.labels = dict() for label, value in from_prom.get("metric", {}).items(): if label != "__name__": self.labels[label] = value _data = list() _index = list() if from_prom.get("value") is not None: if ( not isinstance(from_prom.get("value"), list) or len(from_prom.get("value", [])) != 2 ): raise ValueError( f"metric value is not right {from_prom.get('value')}. " f"It should be an array with a timestamp and a value." ) _index.append(from_prom.get("value", [])[0]) _data.append(float(from_prom.get("value", [])[1])) elif from_prom.get("values") is not None: for value in from_prom.get("values", []): if not isinstance(value, list) or len(value) != 2: raise ValueError( f"metric value is not as expected {value}" ) _index.append(value[0]) _data.append(float(value[1])) else: raise AttributeError("missing metric value(s)") if len(_data) == 0 or len(_index) == 0: raise ValueError("metric value can't be empty") self.data = pd.Series( data=_data, index=pd.to_datetime(_index, unit="s") ) elif name is not None: # Construct from raw data self.name = name if self.name == "": raise AttributeError("missing metric __name__") self.labels = labels if values is None: raise AttributeError("missing metric value(s)") if values.size == 0: raise ValueError("metric value can't be empty") self.data = values else: # Construct from nothing raise AttributeError("missing data to construct FeliTS")
def __repr__(self) -> str: return ( f"FeliTS({self.name}{{{self.labels_string}}}, " f"{self.size} datapoints)" ) @property def labels_string(self) -> str: """The labels as a string, as Prometheus would represent it Returns: str: all the labels as a key-value list, separated with commas """ _labels = list() if hasattr(self, "labels") and self.labels is not None: for k, v in self.labels.items(): _labels.append(f'{k}:"{v}"') return ", ".join(_labels) @property def frequency(self) -> dt.timedelta: """Expose the main frequency in the timeseries. In case there are multiple frequencies, the most frequent is returned. Returns: dt.timedelta: the duration between 2 data points or None for single value serie """ if self.data.size <= 1: return dt.timedelta() # round the timestamp to the second, then calculate time delta between # every 2 points, then count all the different deltas, and count # results frequencies = ( self.data.index.floor("s").diff().value_counts() # type: ignore ) if frequencies.size == 1: # only one frequency: return it return frequencies.idxmax() elif frequencies.size > 1: # multiple frequencies: return the most occuring (if several values # are the most occuring, returning the smallest) return frequencies[frequencies == frequencies.max()].idxmin() # all other cases seem wrong return dt.timedelta() @property def size(self) -> int: """Expose the size to the timeseries Returns: int: Size of the timeseries """ return self.data.size
[docs] def as_prometheus(self) -> dict: """Object representation based on Prometheus API format Returns: dict: The object, as you could push it to Prometheus """ result = dict() result["metric"] = {"__name__": self.name} for k, v in self.labels.items(): result["metric"][k] = v result["values"] = self.data.to_list() # type: ignore result["timestamps"] = ( (pd.Series(data=self.data.index) - dt.datetime(1970, 1, 1)) .dt.total_seconds() .apply(lambda x: x * 1000) .astype(int) .to_list() ) return result
[docs] def as_dataframe(self, name: str = "") -> pd.DataFrame: """self.data representation as a pandas.DataFrame Args: name (str, optional): Name of the column for the Serie in the resulting DataFrame. Defaults to self.name. Returns: pandas.DataFrame: The self.data, as a pandas.DataFrame """ colname = self.name if name == "" else name return self.data.to_frame(name=colname)
[docs] def trim_by_date( self, boundary: dt.datetime = None, keep: str = "right", inplace: bool = False, ) -> pd.Series: """Trim the timeseries by date Args: boundary (dt.datetime, optional): Limit on which triming the timeserie. Defaults to None. keep (str, optional): Which part of the timeseries to keep. Defaults to right. inplace (bool, optional): Control if the trim should be applied to the current object, or just get the trimmed timeserie. Defaults to False. Returns: pd.Series: The trimmed timeseries Raises: ValueError if the keep argument is not "left" or "right" """ if keep == "right": criteria = self.data.index < boundary elif keep == "left": criteria = self.data.index > boundary else: raise ValueError("keep should be 'left' or 'right'") if inplace: self.data = self.data[~(criteria)] return self.data return self.data[~(criteria)]
[docs] def trim_by_size( self, boundary: int = 0, keep: str = "right", inplace: bool = False, ) -> pd.Series: """Trim the timeseries by size Args: boundary (int, optional): Size of the trimmed timeserie. If the boundary is 0, keep the whole timeserie. Defaults to 0. keep (str, optional): Which part of the timeseries to keep. Defaults to right. inplace (bool, optional): Control if the trim should be applied to the current object, or just get the trimmed timeserie. Defaults to False. Returns: pd.Series: The trimmed timeseries Raises: ValueError if the keep argument is not "left" or "right" """ if boundary > self.data.size or boundary == 0: return self.data if keep == "right": if inplace: self.data = self.data[-boundary:] return self.data[-boundary:] elif keep == "left": if inplace: self.data = self.data[:boundary] return self.data[:boundary] else: raise ValueError("keep should be 'left' or 'right'")
[docs] def plot(self) -> None: """Plot a timeserie""" plt.plot( self.data.index.to_list(), self.data.to_list(), label=self.name, linestyle="solid", ) plt.xticks(rotation=60, fontsize=10) plt.title(f"{self.name}{{{self.labels_string}}}") plt.show() plt.close()