Source code for sid.virus_strains

from typing import Any
from typing import Dict
from typing import List
from typing import Tuple
from typing import Union

import numpy as np
import pandas as pd
from sid.config import DTYPE_VIRUS_STRAIN


[docs]def prepare_virus_strain_factors( virus_strains: Dict[str, List[str]], params: pd.DataFrame ) -> Dict[str, Union[List[str], np.ndarray]]: """Prepare the information on virus strains and factors for infectiousness. This function recreates the dictionary to not change the original value in partialed function and adds the factors. The ``contagiousness_factor`` explains how contagious a virus strain is, in comparison to the base strain. The ``immunity_resistance_factor`` explains how well the immunity level guards from (re)infection, dependent on the strain. The infection probability is multiplied with: (1 - (1 - immunity_resistance_factor) * immunity), so that higher values reduce the effect of immunity. """ if len(virus_strains["names"]) == 1: contagiousness_factor = np.ones(1) immunity_resistance_factor = np.zeros(1) else: factors = np.array( [ params.loc[ ( "virus_strain", name, ["contagiousness_factor", "immunity_resistance_factor"], ), "value", ] for name in virus_strains["names"] ] ) if (factors < 0).any(): raise ValueError("Factors of 'virus_strains' cannot be smaller than 0.") contagiousness_factor, immunity_resistance_factor = factors.T contagiousness_factor = contagiousness_factor / contagiousness_factor.max() new_virus_strains = { "names": virus_strains["names"], "contagiousness_factor": contagiousness_factor, "immunity_resistance_factor": immunity_resistance_factor, } return new_virus_strains
[docs]def combine_first_factorized_infections( first: np.ndarray, second: np.ndarray ) -> np.ndarray: """Combine factorized infections where the first has precedence.""" combined = second.copy() combined[first >= 0] = first[first >= 0] return combined
[docs]def categorize_factorized_infections( factorized_infections: Union[pd.Series, np.ndarray], virus_strains: Dict[str, Any] ) -> pd.Series: """Convert factorized infections with virus strains to a categorical.""" return pd.Series( pd.Categorical( factorized_infections, categories=range(-1, len(virus_strains["names"])) ) .rename_categories(["not_infected"] + virus_strains["names"]) .remove_categories("not_infected")
)
[docs]def factorize_initial_infections( infections: pd.DataFrame, virus_strains: Dict[str, Any] ) -> pd.DataFrame: """Factorize multiple boolean or categorical infections.""" all_columns_boolean = (infections.dtypes == np.bool).all() only_one_virus = len(virus_strains["names"]) == 1 all_columns_categorical = (infections.dtypes == "category").all() if (all_columns_boolean and only_one_virus) or all_columns_categorical: factorized_infections = pd.DataFrame(index=infections.index) for column in infections.columns: values = factorize_boolean_or_categorical_infections( infections[column], virus_strains ) factorized_infections[column] = values else: raise ValueError("Infections are not all boolean or categorical.") return factorized_infections
[docs]def factorize_boolean_or_categorical_infections(infections, virus_strains): """Factorize boolean or categorical infections.""" if pd.core.dtypes.common.is_bool_dtype(infections): values, _ = _factorize_boolean_infections(infections, virus_strains["names"]) elif pd.core.dtypes.common.is_categorical_dtype(infections): values, _ = factorize_categorical_infections(infections, virus_strains["names"]) else: raise ValueError( "Unknown dtype of infections. Can only handle 'bool' and 'category'" ) return values
[docs]def _factorize_boolean_infections( infected: Union[pd.Series, np.ndarray], names: List[str] ) -> Tuple[np.ndarray]: """Factorize boolean infection.""" if len(names) > 1: raise ValueError( f"Boolean infections must correspond to one virus strain, but got {names}." ) if infected.dtype.name != "bool": raise ValueError("Infections must have a bool dtype.") values = np.full(len(infected), -1, dtype=DTYPE_VIRUS_STRAIN) values[infected] = 0 categories = np.array(names[:1]) return values, categories
[docs]def factorize_categorical_infections( virus_strain: pd.Series, names: List[str] ) -> Tuple[np.ndarray]: """Factorize a categorical variable indicating virus strains.""" try: virus_strain = virus_strain.cat.reorder_categories(names) except ValueError as e: raise ValueError( "Infections do not align with the passed virus strains:\n\n" f"virus_strains: {names}\ninfections: {virus_strain.cat.categories}" ) from e return ( virus_strain.cat.codes.to_numpy(DTYPE_VIRUS_STRAIN), virus_strain.cat.categories,
)