SimPy with NumPy: Better Randomness and Faster Analysis
Python's random module is fine. NumPy's random is better. More distributions. Better seeding. Faster analysis.
Why NumPy?
- More probability distributions
- Better random number generation
- Reproducibility with modern API
- Fast array operations for analysis
NumPy's Random Generator
import numpy as np
# Modern API (recommended)
rng = np.random.default_rng(seed=42)
# Generate random numbers
rng.random() # Uniform [0, 1)
rng.uniform(5, 10) # Uniform [5, 10]
rng.exponential(5) # Exponential, mean 5
rng.normal(10, 2) # Normal, mean 10, std 2
rng.poisson(5) # Poisson, lambda 5
rng.gamma(2, 2) # Gamma, shape 2, scale 2
rng.lognormal(1, 0.5) # Lognormal
rng.triangular(2, 10, 5) # Triangular
rng.integers(1, 10) # Integer [1, 10)
Distributions for Simulation
Interarrival Times
# Poisson process (exponential interarrivals)
rng.exponential(mean_interarrival)
# Erlang (sum of exponentials)
rng.gamma(shape=k, scale=mean/k)
Service Times
# Exponential
rng.exponential(mean_service)
# Lognormal (common for human tasks)
rng.lognormal(mu, sigma)
# Triangular (when you have min, max, mode)
rng.triangular(low, high, mode)
# Gamma (flexible shape)
rng.gamma(shape, scale)
Discrete Choices
# Choose from options
options = ['A', 'B', 'C']
rng.choice(options)
# Weighted choice
weights = [0.5, 0.3, 0.2]
rng.choice(options, p=weights)
Per-Process Random Streams
class Simulation:
def __init__(self, master_seed):
master_rng = np.random.default_rng(master_seed)
# Generate sub-seeds
seeds = master_rng.integers(0, 2**31, size=5)
# Separate streams
self.arrival_rng = np.random.default_rng(seeds[0])
self.service_rng = np.random.default_rng(seeds[1])
self.failure_rng = np.random.default_rng(seeds[2])
def next_arrival(self):
return self.arrival_rng.exponential(5)
def next_service(self):
return self.service_rng.lognormal(1.5, 0.5)
def next_failure(self):
return self.failure_rng.exponential(100)
Array Operations for Analysis
# Collect wait times as numpy array
wait_times = np.array(stats['wait_times'])
# Fast statistics
np.mean(wait_times)
np.median(wait_times)
np.std(wait_times)
np.percentile(wait_times, [50, 90, 95, 99])
np.min(wait_times)
np.max(wait_times)
# Histogram
counts, bins = np.histogram(wait_times, bins=30)
# Correlation
np.corrcoef(wait_times, service_times)
Generating Correlated Random Variables
def generate_correlated(rng, mean1, mean2, correlation, n):
"""Generate correlated exponential variables."""
# Generate independent uniforms
u1 = rng.random(n)
u2 = rng.random(n)
# Correlate via copula (simplified)
u2_corr = correlation * u1 + np.sqrt(1 - correlation**2) * u2
# Transform to exponential
x1 = -mean1 * np.log(u1)
x2 = -mean2 * np.log(u2_corr)
return x1, x2
Batch Statistics
def batch_means(data, batch_size):
"""Calculate batch means for confidence intervals."""
data = np.array(data)
n_batches = len(data) // batch_size
batches = data[:n_batches * batch_size].reshape(n_batches, batch_size)
batch_means = batches.mean(axis=1)
return {
'overall_mean': batch_means.mean(),
'std_error': batch_means.std() / np.sqrt(n_batches),
'batch_means': batch_means
}
Empirical Distribution
class EmpiricalDistribution:
"""Sample from historical data."""
def __init__(self, data, rng=None):
self.data = np.array(data)
self.rng = rng or np.random.default_rng()
def sample(self):
return self.rng.choice(self.data)
def sample_n(self, n):
return self.rng.choice(self.data, size=n, replace=True)
# Usage with historical data
historical_service_times = np.array([4.2, 5.1, 3.8, 6.2, 4.9, ...])
service_dist = EmpiricalDistribution(historical_service_times, rng)
def customer(env):
yield env.timeout(service_dist.sample())
Fitting Distributions
from scipy import stats
# Fit distribution to data
data = np.array(service_times)
# Try several distributions
distributions = ['expon', 'gamma', 'lognorm', 'weibull_min']
best_fit = None
best_aic = float('inf')
for dist_name in distributions:
dist = getattr(stats, dist_name)
params = dist.fit(data)
# Kolmogorov-Smirnov test
ks_stat, p_value = stats.kstest(data, dist_name, params)
# AIC
log_likelihood = np.sum(dist.logpdf(data, *params))
k = len(params)
aic = 2 * k - 2 * log_likelihood
if aic < best_aic:
best_aic = aic
best_fit = (dist_name, params)
print(f"Best fit: {best_fit[0]} with params {best_fit[1]}")
Vectorised Simulation Analysis
# Instead of loops for statistics
wait_times = np.array([...])
service_times = np.array([...])
# Vectorised calculations
total_times = wait_times + service_times
efficiency = service_times / total_times
above_threshold = np.sum(wait_times > 10)
# Boolean indexing
long_waits = wait_times[wait_times > 10]
Common Random Numbers
def run_with_crn(config_a, config_b, n_samples):
"""Compare scenarios using common random numbers."""
rng = np.random.default_rng(42)
# Generate random numbers once
arrivals = rng.exponential(5, n_samples)
services = rng.lognormal(1.5, 0.5, n_samples)
# Run scenario A with these numbers
result_a = run_scenario(config_a, arrivals, services)
# Run scenario B with same numbers
result_b = run_scenario(config_b, arrivals, services)
# Paired comparison
differences = result_a - result_b
return differences.mean(), differences.std()
Complete Integration
import simpy
import numpy as np
class NumpySimulation:
def __init__(self, env, master_seed):
self.env = env
# Random streams
master = np.random.default_rng(master_seed)
seeds = master.integers(0, 2**31, size=3)
self.arrival_rng = np.random.default_rng(seeds[0])
self.service_rng = np.random.default_rng(seeds[1])
# Resources
self.server = simpy.Resource(env, capacity=2)
# Results (will convert to numpy for analysis)
self.wait_times = []
self.service_times = []
def customer(self, cid):
arrival = self.env.now
with self.server.request() as req:
yield req
wait = self.env.now - arrival
service = self.service_rng.lognormal(1.5, 0.5)
yield self.env.timeout(service)
self.wait_times.append(wait)
self.service_times.append(service)
def arrivals(self):
cid = 0
while True:
yield self.env.timeout(self.arrival_rng.exponential(3))
self.env.process(self.customer(cid))
cid += 1
def run(self, duration):
self.env.process(self.arrivals())
self.env.run(until=duration)
def analyse(self):
waits = np.array(self.wait_times)
services = np.array(self.service_times)
return {
'n_customers': len(waits),
'mean_wait': waits.mean(),
'std_wait': waits.std(),
'median_wait': np.median(waits),
'p90_wait': np.percentile(waits, 90),
'p95_wait': np.percentile(waits, 95),
'max_wait': waits.max(),
'mean_service': services.mean(),
'correlation': np.corrcoef(waits, services)[0, 1]
}
# Run
env = simpy.Environment()
sim = NumpySimulation(env, master_seed=42)
sim.run(1000)
print(sim.analyse())
Summary
NumPy for SimPy:
- Modern random API with default_rng()
- More distributions than random
- Separate streams for reproducibility
- Fast array operations for analysis
- Vectorised calculations
Better randomness. Faster analysis.
Next Steps
Discover the Power of Simulation
Want to become a go-to expert in simulation with Python? The Complete Simulation Bootcamp will show you how simulation can transform your career and your projects.
Explore the Bootcamp