Source code for sid.statistics

from typing import Union

import numpy as np
import pandas as pd

__all__ = ["calculate_r_effective", "calculate_r_zero"]


[docs]def calculate_r_effective(df: pd.DataFrame, window_length: int = 7) -> pd.Series: """Calculate the effective reproduction number, :math:`R_e`. More explanation can be found in the `Wikipedia article <Wikipedia>`_. Note: The infection counter is only reset to zero once a person becomes infected again so abstracting from very fast reinfections its mean among those that ceased to be infectious in the last window_length is :math:`R_e`. Args: df (pandas.DataFrame): states DataFrame for which to calculate :math:`R_e`, usually the states of one day. window_length (int): how many days to use to identify the previously infectious people. The lower, the more changes in behavior can be seen, but the smaller the number of people on which to calculate :math:`R_e`. Returns: r_effective (pandas.Series): mean number of people infected by someone whose infectious spell ended in the last *window_length* days. .. _Wikipedia: https://en.wikipedia.org/wiki/Basic_reproduction_number """ infectious_in_the_last_n_days = df["cd_infectious_false"].between(-window_length, 0) grouper = _create_time_grouper(df) if grouper is None: r_effective = df.loc[infectious_in_the_last_n_days]["n_has_infected"].mean() else: r_effective = ( df.loc[infectious_in_the_last_n_days] .groupby(grouper)["n_has_infected"] .mean() ) # The groupby-mean removed some dates without infections. Add them again. all_periods = np.sort(df[grouper.key].unique()) r_effective = r_effective.reindex(index=all_periods).fillna(0) return r_effective
[docs]def calculate_r_zero( df: pd.DataFrame, window_length: int = 7, threshold: float = 0.75 ) -> pd.Series: """Calculate the basic replication number :math:`R_0`. This is done by dividing the effective reproduction number by the share of susceptible people in the DataFrame. Using R_e and the share of the susceptible people from the very last period of the time means that heterogeneous matching and changes in the rate of immunity are neglected. More explanation can be found here: https://bit.ly/2VZOR5a. Args: df (pandas.DataFrame): states DataFrame for which to calculate :math:`R_0`, usually the states of one period. window_length (int): how many days to use to identify the previously infectious people. The lower, the more changes in behavior can be seen, but the smaller the number of people on which to calculate :math:`R_0`. threshold (float): Parameter determining at which immunity level threshold an individual is considered as "immune" in a binary setting. This is needed for the approximation of the share of susceptible individuals. Must be in [0, 1]; default: 0.75. Returns: r_zero (pandas.Series): The average number of people that would have been infected by someone whose infectious spell ended in the last *window_length* days if everyone had been susceptible, neglecting heterogeneous matching and changes in the rate of immunity. """ r_effective = calculate_r_effective(df=df, window_length=window_length) not_susceptible = df["immunity"] > threshold grouper = _create_time_grouper(df) if grouper is None: share_susceptibles = 1 - not_susceptible.mean() r_zero = r_effective / share_susceptibles else: not_susceptible = not_susceptible.to_frame().assign(date=df.date) share_susceptibles = 1 - not_susceptible.groupby(grouper)["immunity"].mean() r_zero = r_effective / share_susceptibles return r_zero
def _create_time_grouper(df: pd.DataFrame) -> Union[pd.Grouper, None]: """Create a grouper for the time dimension of the DataFrame.""" if "date" in df.columns: grouper = pd.Grouper(key="date", freq="D") elif "period" in df.columns: grouper = pd.Grouper(key="period") else: grouper = None return grouper