Source code for sid.initial_conditions

"""This module contains everything related to initial conditions.

The initial conditions govern the distribution of infections and immunity in the
beginning of a simulation and can used to create patterns which match the real data.

"""
import itertools
import math
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

import numba as nb
import numpy as np
import pandas as pd
from sid.config import DTYPE_VIRUS_STRAIN
from sid.contacts import boolean_choice
from sid.testing import perform_testing
from sid.time import timestamp_to_sid_period
from sid.update_states import compute_waning_immunity
from sid.update_states import update_derived_state_variables
from sid.update_states import update_states
from sid.vaccination import vaccinate_individuals


[docs]def sample_initial_infections( infections: Union[int, float], n_people: Optional[int] = None, index: Optional[pd.Index] = None, seed: Optional[int] = None, ) -> pd.Series: """Create a :class:`pandas.Series` indicating infected individuals. Args: infections (Union[int, float]): The infections can be either a :class:`pandas.Series` where each individual has an indicator for the infection status, an integer representing the number of infected people or a float representing the share of infected individuals. n_people (Optional[int]): The number of individuals. index (Optional[pandas.Index]): The index for the infections. seed (Optional[int]): A seed. Returns: infections (pandas.Series): A series indicating infected individuals. """ seed = np.random.randint(0, 1_000_000) if seed is None else seed np.random.seed(seed) if n_people is None and index is None: raise ValueError("Either 'n_people' or 'index' has to be provided.") elif n_people is None and index is not None: n_people = len(index) elif index is not None and n_people != len(index): raise ValueError("'n_people' has to match the length of 'index'.") if isinstance(infections, int): pass elif isinstance(infections, float) and 0 <= infections < 1: infections = math.ceil(n_people * infections) else: raise ValueError("'infections' must be an int or a float between 0 and 1.") index = pd.RangeIndex(n_people) if index is None else index infected_indices = np.random.choice(n_people, size=infections, replace=False) infections = pd.Series(index=index, data=False) infections.iloc[infected_indices] = True return infections
[docs]def sample_initial_immunity( immunity: Union[int, float, pd.Series, None], infected_or_immune: pd.Series, seed: Optional[int], ) -> pd.Series: """Create indicator for initially immune people. There are some special cases to handle: 1. Infected individuals are always treated as being immune and reduce the number of additional immune individuals. 2. If immunity is given as an integer or float, additional immune individuals are sampled randomly. 3. If immunity is given as a series, immune and infected individuals form the total immune population. Args: immunity (Union[int, float, pandas.Series]): The people who are immune in the beginning can be specified as an integer for the number, a float between 0 and 1 for the share, and a :class:`pandas.Series` with the same index as states. Note that, infected individuals are immune and included. infected_or_immune (pandas.Series): A series which indicates immunity level from from individuals in state. seed (optional[int]): A seed. Returns: initial_immunity (pandas.Series): Indicates immune individuals. """ seed = np.random.randint(0, 1_000_000) if seed is None else seed np.random.seed(seed) immunity = 0 if immunity is None else immunity n_people = len(infected_or_immune) if isinstance(immunity, float): immunity = math.ceil(n_people * immunity) initial_immunity = infected_or_immune.copy() if isinstance(immunity, int): n_immune = (initial_immunity > 0).sum() n_additional_immune = immunity - n_immune if 0 < n_additional_immune <= n_people - n_immune: choices = np.arange(len(initial_immunity))[~(initial_immunity > 0)] ilocs = np.random.choice(choices, size=n_additional_immune, replace=False) initial_immunity.iloc[ilocs] = 1.0 # this is reduced to a realistic value # given params in the subsequent call to ``_integrate_immune_individuals``. elif n_additional_immune > (n_people - n_immune): raise ValueError( "Number of initially immune exceeds number of candidate individuals. " "Consider setting a lower value for 'immunity'. Note that initial " "immunity can also stem from an initial infection." ) elif isinstance(immunity, pd.Series): initial_immunity = np.maximum(initial_immunity, immunity) else: raise ValueError("'initial_immunity' must be an int, float or pd.Series.") return initial_immunity
[docs]def sample_initial_distribution_of_infections_and_immunity( states: pd.DataFrame, params: pd.DataFrame, initial_conditions: Dict[str, Any], testing_demand_models: Dict[str, Dict[str, Any]], testing_allocation_models: Dict[str, Dict[str, Any]], testing_processing_models: Dict[str, Dict[str, Any]], virus_strains: Dict[str, Any], vaccination_models: Optional[Callable], seed: itertools.count, derived_state_variables: Dict[str, str], ): """Sample the initial distribution of infections and immunity. This functions allows to - set the number of initial infections. - increase the number of infections by some factor to reduce underreporting, for example, due to asymptomatic cases. You can also keep shares between subgroups constant. - let infections evolve over some periods to have courses of diseases in every stage. - assume pre-existing immunity in the population. Args: states (pandas.DataFrame): The states. params (pandas.DataFrame): The parameters. initial_conditions (Dict[str, Any]): The initial conditions allow you to govern the distribution of infections and immunity and the heterogeneity of courses of disease at the start of the simulation. Use ``None`` to assume no heterogeneous courses of diseases and 1% infections. Otherwise, ``initial_conditions`` is a dictionary containing the following entries: - ``assort_by`` (Optional[Union[str, List[str]]]): The relative infections is preserved between the groups formed by ``assort_by`` variables. By default, no group is formed and infections spread across the whole population. - ``burn_in_periods`` (List[pd.Timestamp]): List of dates that make up the burn-in period. - ``growth_rate`` (float): The growth rate specifies the increase of infections from one burn-in period to the next. For example, two indicates doubling case numbers every period. The value must be greater than or equal to one. Default is one which is no distribution over time. - ``initial_immunity`` (Union[int, float, pandas.Series]): The n_people who are immune in the beginning can be specified as an integer for the number, a float between 0 and 1 for the share, and a :class:`pandas.Series` with the same index as states. Note that infected individuals are also immune. For a 10% pre-existing immunity with 2% currently infected people, set the key to 0.12. By default, only infected individuals indicated by the initial infections are immune. - ``initial_infections`` (Union[int, float, pandas.Series, pandas.DataFrame]): The initial infections can be given as an integer which is the number of randomly infected individuals, as a float for the share or as a :class:`pandas.Series` which indicates whether an individuals is infected. If initial infections are a :class:`pandas.DataFrame`, then, the index is the same as ``states``, columns are dates or periods which can be sorted, and values are infected individuals on that date. This step will skip upscaling and distributing infections over days and directly jump to the evolution of states. By default, 1% of individuals is infected. - ``known_cases_multiplier`` (int): The factor can be used to scale up the initial infections while keeping shares between ``assort_by`` variables constant. This is helpful if official numbers are underreporting the number of cases. - ``virus_shares`` (Union[dict, pandas.Series]): A mapping between the names of the virus strains and their share among newly infected individuals in each burn-in period. virus_strains (Dict[str, Any]): A dictionary with the keys ``"names"`` and ``"factors"`` holding the different contagiousness factors of multiple viruses. vaccination_models (Optional[Dict[str, Dict[str, Any]): A dictionary of models which allow to vaccinate individuals. The ``"model"`` key holds a function with arguments ``states``, ``params``, and a ``seed`` which returns boolean indicators for individuals who received a vaccination. seed (itertools.count): The seed counter. derived_state_variables (Dict[str, str]): A dictionary that maps names of state variables to pandas evaluation strings that generate derived state variables, i.e. state variables that can be calculated from the existing state variables. Returns: states (pandas.DataFrame): The states with sampled infections and immunity. """ initial_infections = initial_conditions["initial_infections"] if isinstance(initial_infections, (int, float, pd.Series)): if isinstance(initial_infections, (float, int)): initial_infections = sample_initial_infections( initial_infections, index=states.index, seed=next(seed) ) scaled_infections = _scale_up_initial_infections( initial_infections=initial_infections, states=states, assort_by=initial_conditions["assort_by"], known_cases_multiplier=initial_conditions["known_cases_multiplier"], seed=next(seed), ) spread_out_infections = _spread_out_initial_infections( scaled_infections=scaled_infections, burn_in_periods=initial_conditions["burn_in_periods"], growth_rate=initial_conditions["growth_rate"], seed=next(seed), ) spread_out_virus_strains = _sample_factorized_virus_strains_for_infections( spread_out_infections=spread_out_infections, virus_shares=initial_conditions["virus_shares"], seed=next(seed), ) else: spread_out_virus_strains = initial_conditions["initial_infections"] # this is necessary to make derived state variables usable in testing models states = update_derived_state_variables(states, derived_state_variables) for burn_in_date in initial_conditions["burn_in_periods"]: states["date"] = burn_in_date states["period"] = timestamp_to_sid_period(burn_in_date) states, _, to_be_processed_tests = perform_testing( date=burn_in_date, states=states, params=params, testing_demand_models=testing_demand_models, testing_allocation_models=testing_allocation_models, testing_processing_models=testing_processing_models, seed=seed, ) newly_vaccinated = vaccinate_individuals( burn_in_date, vaccination_models, states, params, seed ) states = update_states( states=states, newly_infected_contacts=spread_out_virus_strains[burn_in_date], newly_infected_events=spread_out_virus_strains[burn_in_date], params=params, to_be_processed_tests=to_be_processed_tests, virus_strains=virus_strains, newly_vaccinated=newly_vaccinated, seed=seed, derived_state_variables=derived_state_variables, ) # Remove date information because when it is available, we assume the simulation is # resumed. states = states.drop(columns=["date", "period"]) initial_immunity = sample_initial_immunity( initial_conditions["initial_immunity"], states["immunity"], next(seed) ) states = _integrate_immune_individuals( states, params, initial_immunity, len(initial_conditions["burn_in_periods"]), next(seed), ) return states
[docs]def _scale_up_initial_infections( initial_infections, states, assort_by, known_cases_multiplier, seed ): r"""Increase number of infections by a multiplier taken from params. If no ``assort_by`` variables are provided, infections are simply scaled up in the whole population without regarding any inter-group differences. If ``assort_by`` variables are passed to the function, the probability for each individual to become infectious depends on the share of infected people in their group, :math:`\mu` and the growth factor, :math:`r`. .. math:: p_i = \frac{\mu * (r - 1)}{1 - \mu} The formula ensures that relative number of cases between groups defined by the variables in ``assort_by`` is preserved. Args: initial_infections (pandas.Series): Boolean array indicating initial infections. states (pandas.DataFrame): The states DataFrame. assort_by (Optional[List[str]]): A list of ``assort_by`` variables if infections should be proportional between groups or ``None`` if no groups are used. known_cases_multiplier (float): The multiplier which can be used to scale infections from observed infections to the real number of infections. seed (int): The seed. Returns: scaled_up (pandas.Series): A boolean series with upscaled infections. """ states["known_infections"] = initial_infections if assort_by is None: share_infections = pd.Series( index=states.index, data=states["known_infections"].mean() ) else: share_infections = states.groupby(assort_by)["known_infections"].transform( "mean" ) states.drop(columns="known_infections", inplace=True) prob_numerator = share_infections * (known_cases_multiplier - 1) prob_denominator = 1 - share_infections prob = prob_numerator / prob_denominator scaled_up_arr = _scale_up_initial_infections_numba( initial_infections.to_numpy(), prob.to_numpy(), seed ) scaled_up = pd.Series(scaled_up_arr, index=states.index) return scaled_up
@nb.njit # pragma: no cover
[docs]def _scale_up_initial_infections_numba(initial_infections, probabilities, seed): """Scale up initial infections. Args: initial_infections (numpy.ndarray): Boolean array indicating initial infections. probabilities (numpy.ndarray): Probabilities for becoming infected. seed (int): Seed to control randomness. Returns: scaled_infections (numpy.ndarray): Upscaled infections. """ np.random.seed(seed) n_obs = initial_infections.shape[0] scaled_infections = initial_infections.copy() for i in range(n_obs): if not scaled_infections[i]: scaled_infections[i] = boolean_choice(probabilities[i]) return scaled_infections
[docs]def _spread_out_initial_infections( scaled_infections: pd.Series, burn_in_periods: List[pd.Timestamp], growth_rate: float, seed: int, ) -> pd.DataFrame: """Spread out initial infections over several periods, given a growth rate. Args: scaled_infections (pandas.Series): The scaled infections. burn_in_periods (List[pd.Timestamp]): List of dates that make up the burn-in period. growth_rate (float): The growth rate of infections from one burn-in period to the next. seed (int): Seed to control randomness. Return: spread_infections (pandas.DataFrame): A list of boolean arrays which indicate new infections for each day of the burn-in period. """ np.random.seed(seed) n_burn_in_periods = len(burn_in_periods) if n_burn_in_periods > 1: scaled_infections = scaled_infections.to_numpy() if growth_rate == 1: shares = np.array([1] + [0] * (n_burn_in_periods - 1)) else: shares = -np.diff(1 / growth_rate ** np.arange(n_burn_in_periods + 1))[::-1] shares = shares / shares.sum() hypothetical_infection_day = np.random.choice( n_burn_in_periods, p=shares, replace=True, size=len(scaled_infections) ) else: hypothetical_infection_day = np.zeros(len(scaled_infections)) spread_infections = pd.concat( [ pd.Series( data=(hypothetical_infection_day == period) & scaled_infections, name=burn_in_periods[period], ) for period in range(n_burn_in_periods) ], axis=1, ) return spread_infections
[docs]def _sample_factorized_virus_strains_for_infections( spread_out_infections: pd.DataFrame, virus_shares: Dict[str, Any], seed: int, ) -> pd.DataFrame: """Convert boolean infections to factorized virus strains.""" np.random.seed(seed) spread_out_virus_strains = pd.DataFrame( data=-1, index=spread_out_infections.index, columns=spread_out_infections.columns, dtype=DTYPE_VIRUS_STRAIN, ) virus_strain_factors = list(range(len(virus_shares))) probabilities = list(virus_shares.values()) for column in spread_out_infections.columns: n_infected = spread_out_infections[column].sum() if 1 <= n_infected: sampled_virus_strains = np.random.choice( virus_strain_factors, p=probabilities, size=n_infected ) spread_out_virus_strains.loc[ spread_out_infections[column], column ] = sampled_virus_strains return spread_out_virus_strains
[docs]def _integrate_immune_individuals( states: pd.DataFrame, params: pd.DataFrame, initial_immunity: pd.Series, n_burn_in_periods: int, seed: int, ) -> pd.DataFrame: """Integrate immunity level of individuals in states. Args: states (pandas.DataFrame): The states which already include sampled infections. params (pandas.DataFrame): The parameters. initial_immunity (pandas.Series): A series with sampled immunity levels of individuals. n_burn_in_periods (int): The number of periods over which infections are distributed and can progress. seed (int): Seed to control randomness. Returns: states (pandas.DataFrame): The states with initial immunity integrated. """ np.random.seed(seed) extra_immune = initial_immunity > states["immunity"] states.loc[extra_immune, "ever_infected"] = True states.loc[extra_immune, "cd_ever_infected"] = -(n_burn_in_periods + 1) # set waned immunity level given params, randomizing the infection date infection_date = pd.Series( np.random.randint(0, n_burn_in_periods, size=sum(extra_immune)) ) waned_immunity = compute_waning_immunity(params, infection_date, event="infection") initial_immunity[extra_immune] = waned_immunity states["immunity"] = np.maximum(initial_immunity, states["immunity"]) return states