SimPy with NumPy: Better Randomness and Faster Analysis

Python's random module is fine. NumPy's random is better. More distributions. Better seeding. Faster analysis.

Why NumPy?

NumPy's Random Generator

import numpy as np

# Modern API (recommended)
rng = np.random.default_rng(seed=42)

# Generate random numbers
rng.random()                    # Uniform [0, 1)
rng.uniform(5, 10)              # Uniform [5, 10]
rng.exponential(5)              # Exponential, mean 5
rng.normal(10, 2)               # Normal, mean 10, std 2
rng.poisson(5)                  # Poisson, lambda 5
rng.gamma(2, 2)                 # Gamma, shape 2, scale 2
rng.lognormal(1, 0.5)           # Lognormal
rng.triangular(2, 10, 5)        # Triangular
rng.integers(1, 10)             # Integer [1, 10)

Distributions for Simulation

Interarrival Times

# Poisson process (exponential interarrivals)
rng.exponential(mean_interarrival)

# Erlang (sum of exponentials)
rng.gamma(shape=k, scale=mean/k)

Service Times

# Exponential
rng.exponential(mean_service)

# Lognormal (common for human tasks)
rng.lognormal(mu, sigma)

# Triangular (when you have min, max, mode)
rng.triangular(low, high, mode)

# Gamma (flexible shape)
rng.gamma(shape, scale)

Discrete Choices

# Choose from options
options = ['A', 'B', 'C']
rng.choice(options)

# Weighted choice
weights = [0.5, 0.3, 0.2]
rng.choice(options, p=weights)

Per-Process Random Streams

class Simulation:
    def __init__(self, master_seed):
        master_rng = np.random.default_rng(master_seed)

        # Generate sub-seeds
        seeds = master_rng.integers(0, 2**31, size=5)

        # Separate streams
        self.arrival_rng = np.random.default_rng(seeds[0])
        self.service_rng = np.random.default_rng(seeds[1])
        self.failure_rng = np.random.default_rng(seeds[2])

    def next_arrival(self):
        return self.arrival_rng.exponential(5)

    def next_service(self):
        return self.service_rng.lognormal(1.5, 0.5)

    def next_failure(self):
        return self.failure_rng.exponential(100)

Array Operations for Analysis

# Collect wait times as numpy array
wait_times = np.array(stats['wait_times'])

# Fast statistics
np.mean(wait_times)
np.median(wait_times)
np.std(wait_times)
np.percentile(wait_times, [50, 90, 95, 99])
np.min(wait_times)
np.max(wait_times)

# Histogram
counts, bins = np.histogram(wait_times, bins=30)

# Correlation
np.corrcoef(wait_times, service_times)

Generating Correlated Random Variables

def generate_correlated(rng, mean1, mean2, correlation, n):
    """Generate correlated exponential variables."""
    # Generate independent uniforms
    u1 = rng.random(n)
    u2 = rng.random(n)

    # Correlate via copula (simplified)
    u2_corr = correlation * u1 + np.sqrt(1 - correlation**2) * u2

    # Transform to exponential
    x1 = -mean1 * np.log(u1)
    x2 = -mean2 * np.log(u2_corr)

    return x1, x2

Batch Statistics

def batch_means(data, batch_size):
    """Calculate batch means for confidence intervals."""
    data = np.array(data)
    n_batches = len(data) // batch_size

    batches = data[:n_batches * batch_size].reshape(n_batches, batch_size)
    batch_means = batches.mean(axis=1)

    return {
        'overall_mean': batch_means.mean(),
        'std_error': batch_means.std() / np.sqrt(n_batches),
        'batch_means': batch_means
    }

Empirical Distribution

class EmpiricalDistribution:
    """Sample from historical data."""
    def __init__(self, data, rng=None):
        self.data = np.array(data)
        self.rng = rng or np.random.default_rng()

    def sample(self):
        return self.rng.choice(self.data)

    def sample_n(self, n):
        return self.rng.choice(self.data, size=n, replace=True)

# Usage with historical data
historical_service_times = np.array([4.2, 5.1, 3.8, 6.2, 4.9, ...])
service_dist = EmpiricalDistribution(historical_service_times, rng)

def customer(env):
    yield env.timeout(service_dist.sample())

Fitting Distributions

from scipy import stats

# Fit distribution to data
data = np.array(service_times)

# Try several distributions
distributions = ['expon', 'gamma', 'lognorm', 'weibull_min']
best_fit = None
best_aic = float('inf')

for dist_name in distributions:
    dist = getattr(stats, dist_name)
    params = dist.fit(data)

    # Kolmogorov-Smirnov test
    ks_stat, p_value = stats.kstest(data, dist_name, params)

    # AIC
    log_likelihood = np.sum(dist.logpdf(data, *params))
    k = len(params)
    aic = 2 * k - 2 * log_likelihood

    if aic < best_aic:
        best_aic = aic
        best_fit = (dist_name, params)

print(f"Best fit: {best_fit[0]} with params {best_fit[1]}")

Vectorised Simulation Analysis

# Instead of loops for statistics
wait_times = np.array([...])
service_times = np.array([...])

# Vectorised calculations
total_times = wait_times + service_times
efficiency = service_times / total_times
above_threshold = np.sum(wait_times > 10)

# Boolean indexing
long_waits = wait_times[wait_times > 10]

Common Random Numbers

def run_with_crn(config_a, config_b, n_samples):
    """Compare scenarios using common random numbers."""
    rng = np.random.default_rng(42)

    # Generate random numbers once
    arrivals = rng.exponential(5, n_samples)
    services = rng.lognormal(1.5, 0.5, n_samples)

    # Run scenario A with these numbers
    result_a = run_scenario(config_a, arrivals, services)

    # Run scenario B with same numbers
    result_b = run_scenario(config_b, arrivals, services)

    # Paired comparison
    differences = result_a - result_b
    return differences.mean(), differences.std()

Complete Integration

import simpy
import numpy as np

class NumpySimulation:
    def __init__(self, env, master_seed):
        self.env = env

        # Random streams
        master = np.random.default_rng(master_seed)
        seeds = master.integers(0, 2**31, size=3)

        self.arrival_rng = np.random.default_rng(seeds[0])
        self.service_rng = np.random.default_rng(seeds[1])

        # Resources
        self.server = simpy.Resource(env, capacity=2)

        # Results (will convert to numpy for analysis)
        self.wait_times = []
        self.service_times = []

    def customer(self, cid):
        arrival = self.env.now

        with self.server.request() as req:
            yield req
            wait = self.env.now - arrival

            service = self.service_rng.lognormal(1.5, 0.5)
            yield self.env.timeout(service)

        self.wait_times.append(wait)
        self.service_times.append(service)

    def arrivals(self):
        cid = 0
        while True:
            yield self.env.timeout(self.arrival_rng.exponential(3))
            self.env.process(self.customer(cid))
            cid += 1

    def run(self, duration):
        self.env.process(self.arrivals())
        self.env.run(until=duration)

    def analyse(self):
        waits = np.array(self.wait_times)
        services = np.array(self.service_times)

        return {
            'n_customers': len(waits),
            'mean_wait': waits.mean(),
            'std_wait': waits.std(),
            'median_wait': np.median(waits),
            'p90_wait': np.percentile(waits, 90),
            'p95_wait': np.percentile(waits, 95),
            'max_wait': waits.max(),
            'mean_service': services.mean(),
            'correlation': np.corrcoef(waits, services)[0, 1]
        }

# Run
env = simpy.Environment()
sim = NumpySimulation(env, master_seed=42)
sim.run(1000)
print(sim.analyse())

Summary

NumPy for SimPy: - Modern random API with default_rng() - More distributions than random - Separate streams for reproducibility - Fast array operations for analysis - Vectorised calculations

Better randomness. Faster analysis.

Next Steps


Discover the Power of Simulation

Want to become a go-to expert in simulation with Python? The Complete Simulation Bootcamp will show you how simulation can transform your career and your projects.

Explore the Bootcamp