Source code for sid.testing_demand

"""Contains the code for calculating the demand for tests."""
import itertools
from typing import Any
from typing import Dict
from typing import List
from typing import Tuple

import numpy as np
import pandas as pd
from sid.shared import boolean_choices
from sid.shared import random_choice


[docs]def calculate_demand_for_tests(
    states: pd.DataFrame,
    testing_demand_models: Dict[str, Dict[str, Any]],
    params: pd.DataFrame,
    date: pd.Timestamp,
    columns_to_keep: List[str],
    seed: itertools.count,
) -> Tuple[pd.Series, pd.Series]:
    """Calculate the demand for tests.

    The following is a three-staged process:

    1. Compute the probability for each demand model that an individual demands a test.
    2. Sample which individuals demand a test.
    3. For those demanding a test, sample why they want a test.

    Sampling whether an individual requests any test at all and, then, sampling the
    reason for the test is computationally beneficial in contract to sampling with the
    probability of each demand model the demand and the reason. The first approach
    always involves two steps whereas the complexity of the latter increases with the
    number of demand models.

    Args:
        states (pandas.DataFrame): The states of all individuals.
        testing_demand_models (dict): A dictionary containing the demand models for
            testing.
        params (pandas.DataFrame): The parameter DataFrame.
        date (pandas.Timestamp): Current date.
        columns_to_keep (List[str]): Columns which should be kept.
        seed (itertools.count): The seed counter.

    Returns:
        (tuple): Tuple containing.

        - demands_test (pandas.Series): A boolean series indicating which person
          demands a test.
        - channel_demands_test (pandas.Series): A series indicating the demand model
          which made the individual ask for a test.

    """
    demand_probabilities = _calculate_demand_probabilities(
        states, testing_demand_models, params, date, seed
    )

    demands_test = _sample_which_individuals_demand_a_test(demand_probabilities, seed)

    if "channel_demands_test" in columns_to_keep:
        channel_demands_test = _sample_reason_for_demanding_a_test(
            demand_probabilities, demands_test, seed
        )
    else:
        channel_demands_test = None

    return demands_test, channel_demands_test


[docs]def _calculate_demand_probabilities(
    states: pd.DataFrame,
    testing_demand_models: Dict[str, Dict[str, Any]],
    params: pd.DataFrame,
    date: pd.Timestamp,
    seed: itertools.count,
) -> pd.DataFrame:
    """Calculate the demand probabilities for each test demand model.

    Args:
        states (pandas.DataFrame): The states of all individuals.
        testing_demand_models (dict): A dictionary containing the demand models for
            testing.
        params (pandas.DataFrame): The parameter DataFrame.
        date (pandas.Timestamp): Current date.
        seed (itertools.count): The seed counter.

    Returns:
        demand_probabilities (pandas.DataFrame): Contains for each individual and every
            demand model the probability that the individual will request a test.

    """
    demand_probabilities = pd.DataFrame(index=states.index)
    for name, model in testing_demand_models.items():
        loc = model.get("loc", params.index)
        func = model["model"]

        if model["start"] <= date <= model["end"]:
            probabilities = func(states=states, params=params.loc[loc], seed=next(seed))
        else:
            probabilities = 0

        demand_probabilities[name] = probabilities

    return demand_probabilities


[docs]def _sample_which_individuals_demand_a_test(
    demand_probabilities: pd.DataFrame, seed: itertools.count
) -> pd.Series:
    """Sample which individuals demand a test.

    At first, compute the probabilities that each individual will demand no test at all
    and the corresponding probability that an individual demands at least one test.

    Then, sample individuals which demand a test.

    Args:
        demand_probabilities (pandas.DataFrame): Contains for each individual and every
            demand model the probability that the individual will request a test.
        seed (itertools.count): The seed counter.

    Returns:
        demands_test (pandas.Series): A boolean series indicating individuals who demand
            a test.

    """
    np.random.seed(next(seed))

    probability_demands_any_test = 1 - (1 - demand_probabilities).prod(axis=1)

    demands_test = pd.Series(
        index=demand_probabilities.index,
        data=boolean_choices(probability_demands_any_test.to_numpy()),
    )

    return demands_test


[docs]def _sample_reason_for_demanding_a_test(
    demand_probabilities: pd.DataFrame, demands_test: pd.Series, seed: itertools.count
) -> pd.Series:
    """Sample reason for demanding a test.

    Args:
        demand_probabilities (pandas.DataFrame): Contains for each individual and every
            demand model the probability that the individual will request a test.
        demands_test (pandas.Series): A boolean series indicating individuals who demand
            a test.
        seed (itertools.count): The seed counter.

    Returns:
        demands_test_reason (pandas.Series): Shows which demand model caused the bid.

    Examples:
        >>> import itertools
        >>> demand_probabilities = pd.DataFrame(
        ...     [[0.1, 0.2], [0.8, 0.4]], columns=["a", "b"]
        ... )
        >>> demands_test = pd.Series([True, True])
        >>> seed = itertools.count(2)
        >>> _sample_reason_for_demanding_a_test(
        ...     demand_probabilities, demands_test, seed
        ... )
        0    b
        1    a
        dtype: category
        Categories (2, object): ['a', 'b']

    """
    np.random.seed(next(seed))

    normalized_probabilities = _normalize_probabilities(
        demand_probabilities[demands_test]
    )
    sampled_reason = random_choice(
        len(demand_probabilities.columns), normalized_probabilities
    )

    demands_test_reason = pd.Series(index=demands_test.index, data=-1)
    demands_test_reason.loc[demands_test] = sampled_reason

    codes_to_reason = {-1: "no_reason", **dict(enumerate(demand_probabilities.columns))}
    demands_test_reason = demands_test_reason.astype("category").cat.rename_categories(
        codes_to_reason
    )

    return demands_test_reason


[docs]def _normalize_probabilities(probabilities: pd.DataFrame) -> pd.Series:
    """Normalize probabilities such that they sum to one.

    Args:
        probabilities (pandas.DataFrame): Contains probabilities for each individual and
            demand model to request a test.

    Returns:
        (pandas.Series): A series with normalized probabilities summing up to one.

    Examples:
        >>> df = pd.DataFrame([[0.1, 0.1], [0.1, 0.3]])
        >>> _normalize_probabilities(df)
              0     1
        0  0.50  0.50
        1  0.25  0.75

    """
    return probabilities.divide(probabilities.sum(axis=1), axis=0)