Running Multiple Replications in SimPy: Statistical Confidence
One simulation run proves nothing. Run it again—different results. Which is right? Both. Neither. You need replications.
Why Multiple Replications?
A single simulation run is one sample from a distribution of possible outcomes. You wouldn't draw conclusions from one data point. Don't draw them from one simulation.
Multiple replications give you: - Estimate of the mean - Estimate of the variance - Confidence intervals - Statistical validity
Basic Replication Pattern
import simpy
import random
import numpy as np
def run_single_replication(seed, run_time=1000):
"""Run one simulation replication."""
random.seed(seed)
env = simpy.Environment()
# ... setup and run simulation
return mean_wait_time
def run_replications(n_replications, base_seed=42):
"""Run multiple replications."""
results = []
for i in range(n_replications):
result = run_single_replication(seed=base_seed + i)
results.append(result)
print(f"Replication {i+1}: {result:.2f}")
return results
results = run_replications(30)
print(f"\nOverall mean: {np.mean(results):.2f}")
print(f"Std deviation: {np.std(results, ddof=1):.2f}")
How Many Replications?
Rule of thumb: at least 10-30.
More precise: enough to achieve target precision.
def required_replications(pilot_results, target_half_width, confidence=0.95):
"""Estimate replications needed for target confidence interval width."""
from scipy import stats
n_pilot = len(pilot_results)
s = np.std(pilot_results, ddof=1)
t_value = stats.t.ppf((1 + confidence) / 2, df=n_pilot - 1)
# Required n to achieve half-width
n_required = (t_value * s / target_half_width) ** 2
return int(np.ceil(n_required))
# Pilot study
pilot = run_replications(10)
needed = required_replications(pilot, target_half_width=0.5)
print(f"Need approximately {needed} replications")
Confidence Intervals
from scipy import stats
def confidence_interval(data, confidence=0.95):
"""Calculate confidence interval for mean."""
n = len(data)
mean = np.mean(data)
se = stats.sem(data)
t_value = stats.t.ppf((1 + confidence) / 2, df=n-1)
margin = t_value * se
return mean, mean - margin, mean + margin
results = run_replications(30)
mean, lower, upper = confidence_interval(results)
print(f"Mean: {mean:.2f}")
print(f"95% CI: [{lower:.2f}, {upper:.2f}]")
Parallel Replications
Speed up with parallel processing:
from multiprocessing import Pool
def run_single_replication_wrapper(args):
seed, run_time = args
return run_single_replication(seed, run_time)
def run_parallel_replications(n_replications, base_seed=42, n_workers=4):
"""Run replications in parallel."""
args = [(base_seed + i, 1000) for i in range(n_replications)]
with Pool(n_workers) as pool:
results = pool.map(run_single_replication_wrapper, args)
return results
# Much faster for large replication counts
results = run_parallel_replications(100, n_workers=8)
Comparing Scenarios
Run replications for each scenario:
def run_scenario(scenario_config, n_replications=30, base_seed=42):
results = []
for i in range(n_replications):
result = run_single_replication(
seed=base_seed + i,
**scenario_config
)
results.append(result)
return results
# Compare 2 servers vs 3 servers
scenario_a = run_scenario({'num_servers': 2})
scenario_b = run_scenario({'num_servers': 3})
print(f"2 servers: {np.mean(scenario_a):.2f} ± {np.std(scenario_a, ddof=1):.2f}")
print(f"3 servers: {np.mean(scenario_b):.2f} ± {np.std(scenario_b, ddof=1):.2f}")
Paired Comparisons
For fair comparison, use same random numbers:
def run_paired_comparison(config_a, config_b, n_replications=30):
"""Run paired replications using common random numbers."""
differences = []
for i in range(n_replications):
seed = 42 + i
result_a = run_single_replication(seed=seed, **config_a)
result_b = run_single_replication(seed=seed, **config_b)
differences.append(result_a - result_b)
return differences
diffs = run_paired_comparison({'servers': 2}, {'servers': 3})
mean_diff, lower, upper = confidence_interval(diffs)
if lower > 0:
print("Scenario A is significantly higher")
elif upper < 0:
print("Scenario B is significantly higher")
else:
print("No significant difference")
Complete Replication Manager
class ReplicationManager:
def __init__(self, simulation_func, base_seed=42):
self.simulation_func = simulation_func
self.base_seed = base_seed
self.results = {}
def run(self, scenario_name, config, n_replications=30):
"""Run replications for a scenario."""
results = []
for i in range(n_replications):
result = self.simulation_func(
seed=self.base_seed + i,
**config
)
results.append(result)
print(f"{scenario_name} rep {i+1}/{n_replications}: {result:.3f}")
self.results[scenario_name] = results
return results
def summary(self, scenario_name):
"""Get summary statistics for a scenario."""
data = self.results[scenario_name]
mean, lower, upper = confidence_interval(data)
return {
'n': len(data),
'mean': mean,
'std': np.std(data, ddof=1),
'ci_lower': lower,
'ci_upper': upper,
'min': min(data),
'max': max(data)
}
def compare(self, scenario_a, scenario_b):
"""Compare two scenarios."""
a = self.results[scenario_a]
b = self.results[scenario_b]
# Paired t-test
from scipy import stats
t_stat, p_value = stats.ttest_rel(a, b)
return {
'mean_a': np.mean(a),
'mean_b': np.mean(b),
'difference': np.mean(a) - np.mean(b),
't_statistic': t_stat,
'p_value': p_value,
'significant': p_value < 0.05
}
# Usage
manager = ReplicationManager(run_single_replication)
manager.run('baseline', {'servers': 2}, n_replications=30)
manager.run('improved', {'servers': 3}, n_replications=30)
print("\nBaseline:", manager.summary('baseline'))
print("Improved:", manager.summary('improved'))
print("Comparison:", manager.compare('baseline', 'improved'))
Storing Results
Save results for later analysis:
import json
import pandas as pd
def save_results(results, filename):
"""Save replication results."""
df = pd.DataFrame([
{'scenario': scenario, 'replication': i, 'value': value}
for scenario, values in results.items()
for i, value in enumerate(values)
])
df.to_csv(filename, index=False)
def load_results(filename):
"""Load replication results."""
df = pd.read_csv(filename)
results = {}
for scenario in df['scenario'].unique():
results[scenario] = df[df['scenario'] == scenario]['value'].tolist()
return results
Summary
Multiple replications: - Run at least 10-30 per scenario - Use different seeds for each replication - Calculate confidence intervals - Use paired comparisons for scenarios - Parallelise for speed
One run tells you nothing. Many runs tell you everything.
Next Steps
Discover the Power of Simulation
Want to become a go-to expert in simulation with Python? The Complete Simulation Bootcamp will show you how simulation can transform your career and your projects.
Explore the Bootcamp