[ ]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display

from epimodels.continuous.models import SISLogistic
from epimodels.fitting import (
    Dataset,
    DataSeries,
    ParameterSpec,
    ModelFitter,
    FittingResult,
    fit_model,
    SumOfSquaredErrors,
    WeightedSSE,
    PoissonLikelihood,
    NegativeBinomialLikelihood,
    NormalLikelihood,
    HuberLoss,
    ScipyOptimizer,
    MultiStartOptimizer,
)

np.random.seed(42)
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline
[2]:
TRUE_BETA = 0.5
TRUE_GAMMA = 0.2
TRUE_R = 0.1
TRUE_K = 10000

INITIAL_INFECTED = 100
INITIAL_SUSCEPTIBLE = 5000

sis_model = SISLogistic()
sis_model(
    inits=[INITIAL_SUSCEPTIBLE, INITIAL_INFECTED],
    trange=[0, 100],
    totpop=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED,
    params={"beta": TRUE_BETA, "gamma": TRUE_GAMMA, "r": TRUE_R, "k": TRUE_K,
    },
)

times = sis_model.traces["time"]
true_S = sis_model.traces["S"]
true_I = sis_model.traces["I"]

print("True parameters:")
print(f"beta={TRUE_BETA}, gamma={TRUE_GAMMA}, r={TRUE_R}, k={TRUE_K}")
print(f"R0 = {TRUE_BETA / TRUE_GAMMA:.2f}")
True parameters:
beta=0.5, gamma=0.2, r=0.1, k=10000
R0 = 2.50
[3]:
observed_S = np.random.poisson(lam=true_S).astype(float)
observed_I = np.random.poisson(lam=true_I).astype(float)

observed_S = np.maximum(observed_S, 0)
observed_I = np.maximum(observed_I, 0)
[4]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(times, true_S, 'g-', label='Ground Truth S', linewidth=2)
axes[0].plot(times, observed_S, 'mo', label='Observed S', markersize=3, alpha=0.7)
axes[0].set_xlabel('Time (days)')
axes[0].set_ylabel('Number of Susceptible')
axes[0].set_title('Susceptible Population')
axes[0].legend()

axes[1].plot(times, true_I, 'b-', label='Ground Truth I', linewidth=2)
axes[1].plot(times, observed_I, 'ro', label='Observed I', markersize=3, alpha=0.7)
axes[1].set_xlabel('Time (days)')
axes[1].set_ylabel('Number of Infected')
axes[1].set_title('Infectious Population')
axes[1].legend()

plt.tight_layout()
plt.show()
../_images/Examples_SISLogistic_fitting_3_0.png
[5]:
model = SISLogistic()

dataset = Dataset(model)

dataset.register(
    name='infected',
    values=observed_I,
    times=times,
    state_variable='I',
    time_unit='days',
)

validation_result = dataset.validate(total_population=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED)
print(f"Dataset valid: {validation_result.is_valid}")
print(f"Time range: {dataset.time_range}")
print(dataset)
Dataset valid: True
Time range: (0.0, 100.0)
Dataset(n_series=1, variables=['I'], time_range=(0.0, 100.0))
[6]:
param_specs = [
    ParameterSpec(
        name='beta',
        bounds=(0.1, 1.0),
        initial=0.7,
    ),
    ParameterSpec(
        name='gamma',
        bounds=(0.01, 0.5),
        initial=0.3,
    ),
    ParameterSpec(
        name='r',
        bounds=(0.01, 0.5),
        initial=0.4,
    ),
    ParameterSpec(
        name='k',
        bounds=(1000, 20000),
        initial=5000,
    ),
]

print("Parameters to fit:")
for spec in param_specs:
    print(f"  {spec.name}: bounds={spec.bounds}, initial={spec.initial}")
Parameters to fit:
  beta: bounds=(0.1, 1.0), initial=0.7
  gamma: bounds=(0.01, 0.5), initial=0.3
  r: bounds=(0.01, 0.5), initial=0.4
  k: bounds=(1000, 20000), initial=5000
[7]:
fitter = ModelFitter(
    model=model,
    dataset=dataset,
    parameters_to_fit=param_specs,
    total_population=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED,
    optimizer=ScipyOptimizer(method='L-BFGS-B', max_iterations=200),
)

result = fitter.fit()

print("\n" + "="*50)
print("FITTING RESULTS")
print("="*50)
print(f"Convergence: {result.convergence}")
print(f"Number of evaluations: {result.n_evaluations}")
print(f"Final loss: {result.best_loss:.2f}")

print("\nFitted parameters:")

true_values = {
    "beta": TRUE_BETA,
    "gamma": TRUE_GAMMA,
    "r": TRUE_R,
    "k": TRUE_K,
}

for param, value in result.best_params.items():
    true_val = true_values[param]
    error = abs(value - true_val) / true_val * 100

    print(f"  {param}: {value:.4f} (true: {true_val}, error: {error:.1f}%)")
C:\Users\darla\epimodels\epimodels\fitting\base.py:200: UserWarning: Series 'infected': values exceed total population
  warnings.warn(warning, UserWarning)

==================================================
FITTING RESULTS
==================================================
Convergence: True
Number of evaluations: 330
Final loss: 45545.77

Fitted parameters:
  beta: 0.6176 (true: 0.5, error: 23.5%)
  gamma: 0.2948 (true: 0.2, error: 47.4%)
  r: 0.1048 (true: 0.1, error: 4.8%)
  k: 11449.9407 (true: 10000, error: 14.5%)
[18]:
fitted_model = result.fitted_model

fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(times, observed_I, 'ro', label='Observed I', markersize=4, alpha=0.7)

ax.plot(times, true_I, 'b-', label='Ground Truth', linewidth=2, alpha=0.5)

if fitted_model is not None and fitted_model.traces:
    ax.plot(fitted_model.traces['time'], fitted_model.traces['I'],
            'g--', label='Fitted', linewidth=2)

ax.set_xlabel('Time (days)', fontsize=12)
ax.set_ylabel('Number of Infected', fontsize=12)
ax.set_title('Model Fitting Results', fontsize=14)
ax.legend(fontsize=11)
plt.tight_layout()
plt.show()
../_images/Examples_SISLogistic_fitting_7_0.png
[20]:
model2 = SISLogistic()
dataset2 = Dataset(model2)

dataset2.register(
    name='susceptible',
    values=observed_S,
    times=times,
    state_variable='S',
).register(
    name='infected',
    values=observed_I,
    times=times,
    state_variable='I',
)

print(f"Registered series: {list(dataset2.series.keys())}")
print(f"State variables mapped: {list(set(s.state_variable for s in dataset2.series.values()))}")
Registered series: ['susceptible', 'infected']
State variables mapped: ['S', 'I']
[21]:
from epimodels.fitting import DataValidationError

model_test = SISLogistic()
dataset_test = Dataset(model_test)

dataset_test.register(
    name='invalid',
    values=observed_I,
    times=times,
    state_variable='X',
)

validation = dataset_test.validate()
print(f"Valid: {validation.is_valid}")
print(f"Errors: {validation.errors}")
Valid: False
Errors: ["Series 'invalid': state variable 'X' not found in model. Available: ['S', 'I']", "Data mapped to non-existent variables: ['X']"]
[22]:
model3 = SISLogistic()
dataset3 = Dataset(model3)

weeks = times / 7.0

dataset3.register(
    name='infected_weekly',
    values=observed_I,
    times=weeks,
    state_variable='I',
    time_unit='weeks',
)

print(f"Dataset time unit: {dataset3.time_unit}")
print(f"Series time unit: {dataset3.series['infected_weekly'].time_unit}")
print(f"Time range: {dataset3.time_range}")
Dataset time unit: weeks
Series time unit: weeks
Time range: (0.0, 14.285714285714286)
[23]:
import pandas as pd

df = pd.DataFrame({
    'day': times,
    'susceptible': observed_S,
    'infected': observed_I,
})

print("Sample data:")
display(df.head(10))

model_df = SISLogistic()
dataset_df = Dataset(model_df)

dataset_df.register_from_dataframe(
    df=df,
    time_column='day',
    mapping={'susceptible': 'S', 'infected': 'I'},
    time_unit='days',
)

print(f"\nRegistered from DataFrame: {list(dataset_df.series.keys())}")
Sample data:
day susceptible infected
0 0.000000 4974.0 86.0
1 0.136966 5079.0 96.0
2 1.506630 5228.0 147.0
3 5.500993 5939.0 468.0
4 10.728546 5859.0 1654.0
5 17.141654 4371.0 3670.0
6 20.590730 3917.0 4286.0
7 24.039805 3726.0 4647.0
8 29.779266 3690.0 5134.0
9 35.469539 3672.0 5419.0

Registered from DataFrame: ['susceptible', 'infected']
[8]:
loss_functions = {
    'SSE': SumOfSquaredErrors(),
    'Poisson': PoissonLikelihood(),
    'NegBinom': NegativeBinomialLikelihood(dispersion=5.0),
    'Normal': NormalLikelihood(),
    'Huber': HuberLoss(delta=10.0),
}

print("Available loss functions:")
for name, lf in loss_functions.items():
    print(f"  - {name}: {lf.__class__.__name__}")
Available loss functions:
  - SSE: SumOfSquaredErrors
  - Poisson: PoissonLikelihood
  - NegBinom: NegativeBinomialLikelihood
  - Normal: NormalLikelihood
  - Huber: HuberLoss
[9]:
results_comparison = {}

for name, loss_fn in loss_functions.items():
    print(f"Fitting with {name}...")

    fitter_comp = ModelFitter(
        model=SISLogistic(),
        dataset=dataset,
        parameters_to_fit=param_specs,
        total_population=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED,
        loss_fn=loss_fn,
        optimizer=ScipyOptimizer(method='L-BFGS-B', max_iterations=100),
    )

    result = fitter_comp.fit()
    results_comparison[name] = result
Fitting with SSE...
Fitting with Poisson...
Fitting with NegBinom...
Fitting with Normal...
Fitting with Huber...
[10]:
print("\n" + "="*90)
print("LOSS FUNCTION COMPARISON (SIS Logistic)")
print("="*90)

print(f"{'Loss':<12} {'beta':>8} {'gamma':>8} {'r':>8} {'k':>10} {'R0':>8} {'LossVal':>12}")
print("-"*90)

print(f"{'TRUE':<12} {TRUE_BETA:>8.3f} {TRUE_GAMMA:>8.3f} {TRUE_R:>8.3f} {TRUE_K:>10.0f} {TRUE_BETA/TRUE_GAMMA:>8.2f} {'-':>12}")
print("-"*90)

for name, result in results_comparison.items():
    beta = result.best_params['beta']
    gamma = result.best_params['gamma']
    r = result.best_params['r']
    k = result.best_params['k']
    r0 = beta / gamma

    print(f"{name:<12} {beta:>8.3f} {gamma:>8.3f} {r:>8.3f} {k:>10.0f} {r0:>8.2f} {result.best_loss:>12.2f}")

==========================================================================================
LOSS FUNCTION COMPARISON (SIS Logistic)
==========================================================================================
Loss             beta    gamma        r          k       R0      LossVal
------------------------------------------------------------------------------------------
TRUE            0.500    0.200    0.100      10000     2.50            -
------------------------------------------------------------------------------------------
SSE             0.618    0.295    0.105      11450     2.10     45545.77
Poisson         0.391    0.083    0.034      10939     4.69       193.59
NegBinom        0.434    0.111    0.406       5000     3.92       256.63
Normal          0.336    0.010    0.010       5000    33.59       124.37
Huber           0.311    0.010    0.010       5022    31.12     62016.12
[12]:
scipy_methods = ['L-BFGS-B', 'Nelder-Mead', 'Powell', 'differential_evolution']
optimizer_results = {}

for method in scipy_methods:
    print(f"Testing {method}...")

    optimizer = ScipyOptimizer(method=method, max_iterations=100)

    fitter_opt = ModelFitter(
        model=SISLogistic(),
        dataset=dataset,
        parameters_to_fit=param_specs,
        total_population=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED,
        optimizer=optimizer,
    )

    result = fitter_opt.fit()
    optimizer_results[method] = result
Testing L-BFGS-B...
Testing Nelder-Mead...
Testing Powell...
Testing differential_evolution...
[13]:
print("\n" + "="*100)
print("OPTIMIZER COMPARISON (SIS Logistic)")
print("="*100)

print(f"{'Method':<25} {'beta':>8} {'gamma':>8} {'r':>8} {'k':>10} {'R0':>8} {'Evals':>8} {'Conv':>8}")
print("-"*100)

for method, result in optimizer_results.items():
    beta = result.best_params['beta']
    gamma = result.best_params['gamma']
    r = result.best_params['r']
    k = result.best_params['k']
    r0 = beta / gamma

    print(f"{method:<25} {beta:>8.3f} {gamma:>8.3f} {r:>8.3f} {k:>10.0f} {r0:>8.2f} {result.n_evaluations:>8} {str(result.convergence):>8}")

====================================================================================================
OPTIMIZER COMPARISON (SIS Logistic)
====================================================================================================
Method                        beta    gamma        r          k       R0    Evals     Conv
----------------------------------------------------------------------------------------------------
L-BFGS-B                     0.618    0.295    0.105      11450     2.10      330     True
Nelder-Mead                  0.618    0.295    0.105      11450     2.10      330     True
Powell                       0.618    0.295    0.105      11450     2.10      330     True
differential_evolution       0.608    0.286    0.104      11315     2.13     6105    False
[17]:
base_optimizer = ScipyOptimizer(method='L-BFGS-B', max_iterations=50)

multi_start = MultiStartOptimizer(
    base_optimizer=base_optimizer,
    n_starts=5,
    sampling_method='latin_hypercube',
    seed=42,
)

fitter_ms = ModelFitter(
    model=SISLogistic(),
    dataset=dataset,
    parameters_to_fit=param_specs,
    total_population=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED,
    optimizer=multi_start,
)

result_ms = fitter_ms.fit()

print(f"Multi-start optimization results:")

print(f"  beta: {result_ms.best_params['beta']:.4f}")
print(f"  gamma: {result_ms.best_params['gamma']:.4f}")
print(f"  r: {result_ms.best_params['r']:.4f}")
print(f"  k: {result_ms.best_params['k']:.0f}")

print(f"  Total evaluations: {result_ms.n_evaluations}")
print(f"  Converged: {result_ms.convergence}")
Multi-start optimization results:
  beta: 0.6176
  gamma: 0.2948
  r: 0.1048
  k: 11450
  Total evaluations: 1440
  Converged: True
[21]:
fitter_fixed = ModelFitter(
    model=SISLogistic(),
    dataset=dataset,
    parameters_to_fit=[
        ParameterSpec(name='beta', bounds=(0.1, 1.0), initial=0.7),
        ParameterSpec(name='gamma',bounds=(0.01, 0.5), initial=0.3)
    ],
    total_population=INITIAL_SUSCEPTIBLE + INITIAL_INFECTED,
    fixed_params={'r': TRUE_R, 'k': TRUE_K},
)

result_fixed = fitter_fixed.fit()

print("Fitting with fixed gamma:")
print(f"  Fitted beta: {result_fixed.best_params['beta']:.4f} (true: {TRUE_BETA})")
print(f"  Fitted gamma: {result_fixed.best_params['gamma']:.4f} (true: {TRUE_GAMMA})")
print(f"  Fixed r: {TRUE_R}")
print(f"  Fixed k: {TRUE_K}")
Fitting with fixed gamma:
  Fitted beta: 0.5215 (true: 0.5)
  Fitted gamma: 0.2090 (true: 0.2)
  Fixed r: 0.1
  Fixed k: 10000
C:\Users\darla\epimodels\epimodels\fitting\base.py:200: UserWarning: Series 'infected': values exceed total population
  warnings.warn(warning, UserWarning)
[27]:
param_specs_log = [
    ParameterSpec(
        name='beta',
        bounds=(0.01, 1.0),
        initial=0.3,
        log_scale=True,
    ),
    ParameterSpec(
        name='gamma',
        bounds=(0.01, 0.5),
        initial=0.1,
        log_scale=True,
    ),
    ParameterSpec(
        name='r',
        bounds=(0.01, 0.5),
        initial=0.05,
        log_scale=True,
    ),
    ParameterSpec(
        name='k',
        bounds=(1000, 20000),
        initial=8000,
        log_scale=True,
    ),
]

print("Log-scale parameter specs:")
for spec in param_specs_log:
    print(f"  {spec.name}: bounds={spec.bounds}, log_scale={spec.log_scale}")
Log-scale parameter specs:
  beta: bounds=(0.01, 1.0), log_scale=True
  gamma: bounds=(0.01, 0.5), log_scale=True
  r: bounds=(0.01, 0.5), log_scale=True
  k: bounds=(1000, 20000), log_scale=True
[30]:
profile_result = fitter.profile_likelihood(
    param_name='gamma',
    n_points=20,
    threshold=3.84,
)

print("Profile likelihood analysis for gamma:")
print(f"  Minimum loss: {profile_result['min_loss']:.2f}")
print(f"  95% CI threshold: {profile_result['threshold_loss']:.2f}")
print(f"  Confidence interval: {profile_result['confidence_interval']}")
Profile likelihood analysis for gamma:
  Minimum loss: 45528.81
  95% CI threshold: 45530.73
  Confidence interval: (np.float64(0.29368421052631577), np.float64(0.29368421052631577))
[31]:
fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(profile_result['values'], profile_result['losses'], 'b-', linewidth=2)
ax.axhline(y=profile_result['threshold_loss'], color='r', linestyle='--',
           label=f"95% CI threshold")
ax.axvline(x=TRUE_GAMMA, color='g', linestyle=':', label=f"True gamma = {TRUE_GAMMA}")

ci = profile_result['confidence_interval']
if ci[0] is not None and ci[1] is not None:
    ax.axvspan(ci[0], ci[1], alpha=0.2, color='blue', label=f"95% CI: [{ci[0]:.3f}, {ci[1]:.3f}]")

ax.set_xlabel('gamma', fontsize=12)
ax.set_ylabel('Loss', fontsize=12)
ax.set_title('Profile Likelihood for gamma', fontsize=14)
ax.legend(fontsize=10)
plt.tight_layout()
plt.show()
../_images/Examples_SISLogistic_fitting_21_0.png