Source code for felicien.felits

#!/usr/bin/env python
# -*- coding: utf8 -*-

import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt



[docs]
class FeliTS:
    """A Timeserie of a Prometheus metric

    This is a metric representation as returned by the Prometheus API. It
    includes the metric definition, and the data as a pandas Series.
    see official documentation:
    https://prometheus.io/docs/prometheus/latest/querying/api/#expression-query-result-formats

    Attributes:

    - name: A string with the name of the metric

    - labels: A dict of labels of the metric

    - data: A pandas.Series with the timeserie
    """


[docs]
    def __init__(
        self,
        from_prom: dict = None,
        name: str = None,
        labels: dict = {},
        values: pd.Series = None,
    ) -> None:
        """Initializes the instance based on the data from Prometheus API

        Args:
            from_prom (dict, optional): Query result data from Prometheus API.

            name (str, optional): Name of the metric

            labels (dict, optional): Labels of the metric

            values (pandas Series, optional): Values and their timestamp of
              the timeserie as the Index

        Raises:
            AttributeError if the metric has no __name__

            AttributeError if the metric has no value (or values)

            ValueError if the value list is empty

            ValueError if a item of the value list hasn't the right format:
            [timestamp, metric_value]

            AttributeError if neither an output from Prometheus API nor raw
            data are passed to the constructor
        """
        if from_prom is not None:
            # Construct from Prometheus API output
            self.name = from_prom.get("metric", {}).get("__name__", "")
            if self.name == "":
                raise AttributeError("missing metric __name__")

            self.labels = dict()
            for label, value in from_prom.get("metric", {}).items():
                if label != "__name__":
                    self.labels[label] = value

            _data = list()
            _index = list()
            if from_prom.get("value") is not None:
                if (
                    not isinstance(from_prom.get("value"), list)
                    or len(from_prom.get("value", [])) != 2
                ):
                    raise ValueError(
                        f"metric value is not right {from_prom.get('value')}. "
                        f"It should be an array with a timestamp and a value."
                    )
                _index.append(from_prom.get("value", [])[0])
                _data.append(float(from_prom.get("value", [])[1]))
            elif from_prom.get("values") is not None:
                for value in from_prom.get("values", []):
                    if not isinstance(value, list) or len(value) != 2:
                        raise ValueError(
                            f"metric value is not as expected {value}"
                        )
                    _index.append(value[0])
                    _data.append(float(value[1]))
            else:
                raise AttributeError("missing metric value(s)")

            if len(_data) == 0 or len(_index) == 0:
                raise ValueError("metric value can't be empty")

            self.data = pd.Series(
                data=_data, index=pd.to_datetime(_index, unit="s")
            )

        elif name is not None:
            # Construct from raw data
            self.name = name
            if self.name == "":
                raise AttributeError("missing metric __name__")

            self.labels = labels

            if values is None:
                raise AttributeError("missing metric value(s)")
            if values.size == 0:
                raise ValueError("metric value can't be empty")
            self.data = values

        else:
            # Construct from nothing
            raise AttributeError("missing data to construct FeliTS")


    def __repr__(self) -> str:
        return (
            f"FeliTS({self.name}{{{self.labels_string}}}, "
            f"{self.size} datapoints)"
        )

    @property
    def labels_string(self) -> str:
        """The labels as a string, as Prometheus would represent it

        Returns:
            str: all the labels as a key-value list, separated with commas
        """
        _labels = list()
        if hasattr(self, "labels") and self.labels is not None:
            for k, v in self.labels.items():
                _labels.append(f'{k}:"{v}"')

        return ", ".join(_labels)

    @property
    def frequency(self) -> dt.timedelta:
        """Expose the main frequency in the timeseries. In case there are
            multiple frequencies, the most frequent is returned.

        Returns:
            dt.timedelta: the duration between 2 data points
                or None for single value serie
        """
        if self.data.size <= 1:
            return dt.timedelta()

        # round the timestamp to the second, then calculate time delta between
        # every 2 points, then count all the different deltas, and count
        # results
        frequencies = (
            self.data.index.floor("s").diff().value_counts()  # type: ignore
        )

        if frequencies.size == 1:
            # only one frequency: return it
            return frequencies.idxmax()
        elif frequencies.size > 1:
            # multiple frequencies: return the most occuring (if several values
            # are the most occuring, returning the smallest)
            return frequencies[frequencies == frequencies.max()].idxmin()

        # all other cases seem wrong
        return dt.timedelta()

    @property
    def size(self) -> int:
        """Expose the size to the timeseries

        Returns:
            int: Size of the timeseries
        """
        return self.data.size


[docs]
    def as_prometheus(self) -> dict:
        """Object representation based on Prometheus API format

        Returns:
            dict: The object, as you could push it to Prometheus
        """
        result = dict()

        result["metric"] = {"__name__": self.name}

        for k, v in self.labels.items():
            result["metric"][k] = v

        result["values"] = self.data.to_list()  # type: ignore
        result["timestamps"] = (
            (pd.Series(data=self.data.index) - dt.datetime(1970, 1, 1))
            .dt.total_seconds()
            .apply(lambda x: x * 1000)
            .astype(int)
            .to_list()
        )

        return result



[docs]
    def as_dataframe(self, name: str = "") -> pd.DataFrame:
        """self.data representation as a pandas.DataFrame

        Args:
            name (str, optional): Name of the column for the Serie in the
                resulting DataFrame. Defaults to self.name.

        Returns:
            pandas.DataFrame: The self.data, as a pandas.DataFrame
        """
        colname = self.name if name == "" else name
        return self.data.to_frame(name=colname)



[docs]
    def trim_by_date(
        self,
        boundary: dt.datetime = None,
        keep: str = "right",
        inplace: bool = False,
    ) -> pd.Series:
        """Trim the timeseries by date

        Args:
            boundary (dt.datetime, optional): Limit on which triming the
                timeserie. Defaults to None.
            keep (str, optional): Which part of the timeseries to keep.
                Defaults to right.
            inplace (bool, optional): Control if the trim should be applied
                to the current object, or just get the trimmed timeserie.
                Defaults to False.

        Returns:
            pd.Series: The trimmed timeseries

        Raises:
            ValueError if the keep argument is not "left" or "right"
        """
        if keep == "right":
            criteria = self.data.index < boundary
        elif keep == "left":
            criteria = self.data.index > boundary
        else:
            raise ValueError("keep should be 'left' or 'right'")

        if inplace:
            self.data = self.data[~(criteria)]
            return self.data

        return self.data[~(criteria)]



[docs]
    def trim_by_size(
        self,
        boundary: int = 0,
        keep: str = "right",
        inplace: bool = False,
    ) -> pd.Series:
        """Trim the timeseries by size

        Args:
            boundary (int, optional): Size of the trimmed timeserie. If the
                boundary is 0, keep the whole timeserie.
                Defaults to 0.
            keep (str, optional): Which part of the timeseries to keep.
                Defaults to right.
            inplace (bool, optional): Control if the trim should be applied
                to the current object, or just get the trimmed timeserie.
                Defaults to False.

        Returns:
            pd.Series: The trimmed timeseries

        Raises:
            ValueError if the keep argument is not "left" or "right"
        """
        if boundary > self.data.size or boundary == 0:
            return self.data
        if keep == "right":
            if inplace:
                self.data = self.data[-boundary:]
            return self.data[-boundary:]
        elif keep == "left":
            if inplace:
                self.data = self.data[:boundary]
            return self.data[:boundary]
        else:
            raise ValueError("keep should be 'left' or 'right'")



[docs]
    def plot(self) -> None:
        """Plot a timeserie"""
        plt.plot(
            self.data.index.to_list(),
            self.data.to_list(),
            label=self.name,
            linestyle="solid",
        )
        plt.xticks(rotation=60, fontsize=10)
        plt.title(f"{self.name}{{{self.labels_string}}}")
        plt.show()
        plt.close()