SimPy with NumPy: Better Randomness and Faster Analysis

Published on February 25, 2026

Python's random module is fine. NumPy's random is better. More distributions. Better seeding. Faster analysis.

Why NumPy?

More probability distributions
Better random number generation
Reproducibility with modern API
Fast array operations for analysis

NumPy's Random Generator

import numpy as np

# Modern API (recommended)
rng = np.random.default_rng(seed=42)

# Generate random numbers
rng.random()                    # Uniform [0, 1)
rng.uniform(5, 10)              # Uniform [5, 10]
rng.exponential(5)              # Exponential, mean 5
rng.normal(10, 2)               # Normal, mean 10, std 2
rng.poisson(5)                  # Poisson, lambda 5
rng.gamma(2, 2)                 # Gamma, shape 2, scale 2
rng.lognormal(1, 0.5)           # Lognormal
rng.triangular(2, 10, 5)        # Triangular
rng.integers(1, 10)             # Integer [1, 10)

Distributions for Simulation

Interarrival Times

# Poisson process (exponential interarrivals)
rng.exponential(mean_interarrival)

# Erlang (sum of exponentials)
rng.gamma(shape=k, scale=mean/k)

Service Times

# Exponential
rng.exponential(mean_service)

# Lognormal (common for human tasks)
rng.lognormal(mu, sigma)

# Triangular (when you have min, max, mode)
rng.triangular(low, high, mode)

# Gamma (flexible shape)
rng.gamma(shape, scale)

Discrete Choices

# Choose from options
options = ['A', 'B', 'C']
rng.choice(options)

# Weighted choice
weights = [0.5, 0.3, 0.2]
rng.choice(options, p=weights)

Per-Process Random Streams

class Simulation:
    def __init__(self, master_seed):
        master_rng = np.random.default_rng(master_seed)

        # Generate sub-seeds
        seeds = master_rng.integers(0, 2**31, size=5)

        # Separate streams
        self.arrival_rng = np.random.default_rng(seeds[0])
        self.service_rng = np.random.default_rng(seeds[1])
        self.failure_rng = np.random.default_rng(seeds[2])

    def next_arrival(self):
        return self.arrival_rng.exponential(5)

    def next_service(self):
        return self.service_rng.lognormal(1.5, 0.5)

    def next_failure(self):
        return self.failure_rng.exponential(100)

Array Operations for Analysis

# Collect wait times as numpy array
wait_times = np.array(stats['wait_times'])

# Fast statistics
np.mean(wait_times)
np.median(wait_times)
np.std(wait_times)
np.percentile(wait_times, [50, 90, 95, 99])
np.min(wait_times)
np.max(wait_times)

# Histogram
counts, bins = np.histogram(wait_times, bins=30)

# Correlation
np.corrcoef(wait_times, service_times)

Generating Correlated Random Variables

def generate_correlated(rng, mean1, mean2, correlation, n):
    """Generate correlated exponential variables."""
    # Generate independent uniforms
    u1 = rng.random(n)
    u2 = rng.random(n)

    # Correlate via copula (simplified)
    u2_corr = correlation * u1 + np.sqrt(1 - correlation**2) * u2

    # Transform to exponential
    x1 = -mean1 * np.log(u1)
    x2 = -mean2 * np.log(u2_corr)

    return x1, x2

Batch Statistics

def batch_means(data, batch_size):
    """Calculate batch means for confidence intervals."""
    data = np.array(data)
    n_batches = len(data) // batch_size

    batches = data[:n_batches * batch_size].reshape(n_batches, batch_size)
    batch_means = batches.mean(axis=1)

    return {
        'overall_mean': batch_means.mean(),
        'std_error': batch_means.std() / np.sqrt(n_batches),
        'batch_means': batch_means
    }

Empirical Distribution

class EmpiricalDistribution:
    """Sample from historical data."""
    def __init__(self, data, rng=None):
        self.data = np.array(data)
        self.rng = rng or np.random.default_rng()

    def sample(self):
        return self.rng.choice(self.data)

    def sample_n(self, n):
        return self.rng.choice(self.data, size=n, replace=True)

# Usage with historical data
historical_service_times = np.array([4.2, 5.1, 3.8, 6.2, 4.9, ...])
service_dist = EmpiricalDistribution(historical_service_times, rng)

def customer(env):
    yield env.timeout(service_dist.sample())

Fitting Distributions

from scipy import stats

# Fit distribution to data
data = np.array(service_times)

# Try several distributions
distributions = ['expon', 'gamma', 'lognorm', 'weibull_min']
best_fit = None
best_aic = float('inf')

for dist_name in distributions:
    dist = getattr(stats, dist_name)
    params = dist.fit(data)

    # Kolmogorov-Smirnov test
    ks_stat, p_value = stats.kstest(data, dist_name, params)

    # AIC
    log_likelihood = np.sum(dist.logpdf(data, *params))
    k = len(params)
    aic = 2 * k - 2 * log_likelihood

    if aic < best_aic:
        best_aic = aic
        best_fit = (dist_name, params)

print(f"Best fit: {best_fit[0]} with params {best_fit[1]}")

Vectorised Simulation Analysis

# Instead of loops for statistics
wait_times = np.array([...])
service_times = np.array([...])

# Vectorised calculations
total_times = wait_times + service_times
efficiency = service_times / total_times
above_threshold = np.sum(wait_times > 10)

# Boolean indexing
long_waits = wait_times[wait_times > 10]

Common Random Numbers

def run_with_crn(config_a, config_b, n_samples):
    """Compare scenarios using common random numbers."""
    rng = np.random.default_rng(42)

    # Generate random numbers once
    arrivals = rng.exponential(5, n_samples)
    services = rng.lognormal(1.5, 0.5, n_samples)

    # Run scenario A with these numbers
    result_a = run_scenario(config_a, arrivals, services)

    # Run scenario B with same numbers
    result_b = run_scenario(config_b, arrivals, services)

    # Paired comparison
    differences = result_a - result_b
    return differences.mean(), differences.std()

Complete Integration

import simpy
import numpy as np

class NumpySimulation:
    def __init__(self, env, master_seed):
        self.env = env

        # Random streams
        master = np.random.default_rng(master_seed)
        seeds = master.integers(0, 2**31, size=3)

        self.arrival_rng = np.random.default_rng(seeds[0])
        self.service_rng = np.random.default_rng(seeds[1])

        # Resources
        self.server = simpy.Resource(env, capacity=2)

        # Results (will convert to numpy for analysis)
        self.wait_times = []
        self.service_times = []

    def customer(self, cid):
        arrival = self.env.now

        with self.server.request() as req:
            yield req
            wait = self.env.now - arrival

            service = self.service_rng.lognormal(1.5, 0.5)
            yield self.env.timeout(service)

        self.wait_times.append(wait)
        self.service_times.append(service)

    def arrivals(self):
        cid = 0
        while True:
            yield self.env.timeout(self.arrival_rng.exponential(3))
            self.env.process(self.customer(cid))
            cid += 1

    def run(self, duration):
        self.env.process(self.arrivals())
        self.env.run(until=duration)

    def analyse(self):
        waits = np.array(self.wait_times)
        services = np.array(self.service_times)

        return {
            'n_customers': len(waits),
            'mean_wait': waits.mean(),
            'std_wait': waits.std(),
            'median_wait': np.median(waits),
            'p90_wait': np.percentile(waits, 90),
            'p95_wait': np.percentile(waits, 95),
            'max_wait': waits.max(),
            'mean_service': services.mean(),
            'correlation': np.corrcoef(waits, services)[0, 1]
        }

# Run
env = simpy.Environment()
sim = NumpySimulation(env, master_seed=42)
sim.run(1000)
print(sim.analyse())

Summary

NumPy for SimPy: - Modern random API with default_rng() - More distributions than random - Separate streams for reproducibility - Fast array operations for analysis - Vectorised calculations

Better randomness. Faster analysis.

Next Steps

Discover the Power of Simulation

Want to become a go-to expert in simulation with Python? The Complete Simulation Bootcamp will show you how simulation can transform your career and your projects.

Explore the Bootcamp