Source code for sid.statistics
from typing import Union
import numpy as np
import pandas as pd
__all__ = ["calculate_r_effective", "calculate_r_zero"]
[docs]def calculate_r_effective(df: pd.DataFrame, window_length: int = 7) -> pd.Series:
"""Calculate the effective reproduction number, :math:`R_e`.
More explanation can be found in the `Wikipedia article <Wikipedia>`_.
Note:
The infection counter is only reset to zero once a person becomes infected again
so abstracting from very fast reinfections its mean among those that ceased to
be infectious in the last window_length is :math:`R_e`.
Args:
df (pandas.DataFrame): states DataFrame for which to calculate :math:`R_e`,
usually the states of one day.
window_length (int): how many days to use to identify the previously infectious
people. The lower, the more changes in behavior can be seen, but the smaller
the number of people on which to calculate :math:`R_e`.
Returns:
r_effective (pandas.Series): mean number of people infected by someone whose
infectious spell ended in the last *window_length* days.
.. _Wikipedia:
https://en.wikipedia.org/wiki/Basic_reproduction_number
"""
infectious_in_the_last_n_days = df["cd_infectious_false"].between(-window_length, 0)
grouper = _create_time_grouper(df)
if grouper is None:
r_effective = df.loc[infectious_in_the_last_n_days]["n_has_infected"].mean()
else:
r_effective = (
df.loc[infectious_in_the_last_n_days]
.groupby(grouper)["n_has_infected"]
.mean()
)
# The groupby-mean removed some dates without infections. Add them again.
all_periods = np.sort(df[grouper.key].unique())
r_effective = r_effective.reindex(index=all_periods).fillna(0)
return r_effective
[docs]def calculate_r_zero(
df: pd.DataFrame, window_length: int = 7, threshold: float = 0.75
) -> pd.Series:
"""Calculate the basic replication number :math:`R_0`.
This is done by dividing the effective reproduction number by the share of
susceptible people in the DataFrame. Using R_e and the share of the susceptible
people from the very last period of the time means that heterogeneous matching and
changes in the rate of immunity are neglected.
More explanation can be found here: https://bit.ly/2VZOR5a.
Args:
df (pandas.DataFrame): states DataFrame for which to calculate :math:`R_0`,
usually the states of one period.
window_length (int): how many days to use to identify the previously infectious
people. The lower, the more changes in behavior can be seen, but the smaller
the number of people on which to calculate :math:`R_0`.
threshold (float): Parameter determining at which immunity level threshold an
individual is considered as "immune" in a binary setting. This is needed
for the approximation of the share of susceptible individuals. Must be in
[0, 1]; default: 0.75.
Returns:
r_zero (pandas.Series): The average number of people that would have been
infected by someone whose infectious spell ended in the last *window_length*
days if everyone had been susceptible, neglecting heterogeneous matching and
changes in the rate of immunity.
"""
r_effective = calculate_r_effective(df=df, window_length=window_length)
not_susceptible = df["immunity"] > threshold
grouper = _create_time_grouper(df)
if grouper is None:
share_susceptibles = 1 - not_susceptible.mean()
r_zero = r_effective / share_susceptibles
else:
not_susceptible = not_susceptible.to_frame().assign(date=df.date)
share_susceptibles = 1 - not_susceptible.groupby(grouper)["immunity"].mean()
r_zero = r_effective / share_susceptibles
return r_zero
def _create_time_grouper(df: pd.DataFrame) -> Union[pd.Grouper, None]:
"""Create a grouper for the time dimension of the DataFrame."""
if "date" in df.columns:
grouper = pd.Grouper(key="date", freq="D")
elif "period" in df.columns:
grouper = pd.Grouper(key="period")
else:
grouper = None
return grouper