SimPy Performance Optimization: Making Simulations Faster
Your simulation works. But it's slow. Here's how to fix that.
Profile First
Don't guess. Measure:
import cProfile
import pstats
def run_simulation():
env = simpy.Environment()
# ... setup ...
env.run(until=10000)
# Profile
profiler = cProfile.Profile()
profiler.enable()
run_simulation()
profiler.disable()
# Analyse
stats = pstats.Stats(profiler)
stats.sort_stats('cumulative')
stats.print_stats(20) # Top 20 time consumers
Find the bottleneck before optimising.
Common Bottlenecks
1. Too Many Events
Every timeout creates an event. Reduce them:
# SLOW - event every 0.1 time units
def monitor(env, resource):
while True:
record_stats(resource)
yield env.timeout(0.1) # 10,000 events per 1,000 time
# FASTER - event every 10 time units
def monitor(env, resource):
while True:
record_stats(resource)
yield env.timeout(10) # 100 events per 1,000 time
2. Expensive Logging
# SLOW - string formatting on every event
def customer(env, server, cid):
logger.debug(f"Customer {cid} arrived at {env.now} with queue {len(server.queue)}")
# ... rest of process
# FASTER - check level first
def customer(env, server, cid):
if logger.isEnabledFor(logging.DEBUG):
logger.debug(f"Customer {cid} arrived at {env.now}")
# ... rest of process
# FASTEST - no logging in production
DEBUG = False
def customer(env, server, cid):
if DEBUG:
print(f"Customer {cid} arrived")
3. Slow Random Number Generation
# SLOW - creating generator each time
def get_service_time():
return numpy.random.default_rng().exponential(5)
# FAST - reuse generator
rng = numpy.random.default_rng(42)
def get_service_time():
return rng.exponential(5)
4. List Append Overhead
# SLOW - list grows unbounded
wait_times = []
def customer(env, server):
# ...
wait_times.append(wait) # O(1) amortised, but memory grows
# FASTER - use deque with maxlen for rolling stats
from collections import deque
wait_times = deque(maxlen=10000) # Keep last 10,000
# FASTEST - compute statistics incrementally
class IncrementalStats:
def __init__(self):
self.count = 0
self.sum = 0
self.sum_sq = 0
def add(self, value):
self.count += 1
self.sum += value
self.sum_sq += value * value
@property
def mean(self):
return self.sum / self.count if self.count else 0
@property
def variance(self):
if self.count < 2:
return 0
return (self.sum_sq - self.sum**2 / self.count) / (self.count - 1)
Reduce Object Creation
# SLOW - new dict every customer
def customer(env, server, cid):
data = {
'arrival': env.now,
'cid': cid,
'wait': 0,
'service': 0
}
# ...
# FASTER - use named tuple or dataclass
from dataclasses import dataclass
@dataclass
class CustomerData:
arrival: float
cid: int
wait: float = 0
service: float = 0
# Or slots for less memory
class CustomerData:
__slots__ = ['arrival', 'cid', 'wait', 'service']
def __init__(self, arrival, cid):
self.arrival = arrival
self.cid = cid
self.wait = 0
self.service = 0
Batch Processing
# SLOW - process one at a time
def arrivals(env, server):
while True:
yield env.timeout(random.expovariate(1))
env.process(customer(env, server))
# FASTER - batch arrivals when appropriate
def batch_arrivals(env, server, batch_size=10):
while True:
# Wait for batch interval
yield env.timeout(10)
# Create batch
for _ in range(batch_size):
env.process(customer(env, server))
Use PyPy
PyPy can be 5-10x faster for pure Python:
# Install PyPy
# Then install SimPy in PyPy environment
pypy -m pip install simpy
# Run simulation
pypy simulation.py
Parallel Replications
Run replications in parallel:
from multiprocessing import Pool
import simpy
def run_single_replication(seed):
"""Run one replication."""
random.seed(seed)
env = simpy.Environment()
# ... setup ...
env.run(until=1000)
return calculate_statistics()
# Sequential (slow)
results = [run_single_replication(i) for i in range(100)]
# Parallel (fast)
with Pool() as pool:
results = pool.map(run_single_replication, range(100))
Simplify the Model
Sometimes the fastest code is code you don't run:
# DETAILED - every step modelled
def detailed_customer(env, reception, triage, doctor, pharmacy):
yield from visit_reception(env, reception)
yield from visit_triage(env, triage)
yield from visit_doctor(env, doctor)
yield from visit_pharmacy(env, pharmacy)
# SIMPLIFIED - aggregate what you don't need
def simplified_customer(env, system):
# Model reception + triage as single step
yield env.timeout(random.triangular(2, 5, 10))
# Model doctor (what we care about)
with system.request() as req:
yield req
yield env.timeout(random.expovariate(1/15))
Pre-generate Random Numbers
# SLOW - generate on demand
def customer(env, server):
service_time = random.expovariate(1/5)
yield env.timeout(service_time)
# FAST - pre-generate
class Simulation:
def __init__(self, n_customers):
rng = numpy.random.default_rng(42)
self.service_times = rng.exponential(5, size=n_customers)
self.current = 0
def next_service_time(self):
time = self.service_times[self.current]
self.current += 1
return time
Avoid String Operations
# SLOW
def customer(env, server, cid):
name = f"customer_{cid}" # String created
log_arrival(name, env.now) # More string operations
# FAST - use IDs
def customer(env, server, cid):
log_arrival(cid, env.now) # Just integers
Memory Management
# SLOW - keep everything
class Simulation:
def __init__(self):
self.all_customers = []
def customer(self, env, server, cid):
data = CustomerData(cid, env.now)
self.all_customers.append(data) # Memory grows forever
# FAST - keep only what you need
class Simulation:
def __init__(self):
self.stats = IncrementalStats()
self.recent = deque(maxlen=1000)
def customer(self, env, server, cid):
# Update stats, don't store everything
self.stats.add(wait_time)
self.recent.append((env.now, wait_time))
Optimised Example
import simpy
import numpy as np
from collections import deque
from dataclasses import dataclass
class FastSimulation:
def __init__(self, seed=42):
self.rng = np.random.default_rng(seed)
self.env = simpy.Environment()
self.server = simpy.Resource(self.env, capacity=2)
# Pre-allocate statistics
self.wait_count = 0
self.wait_sum = 0.0
self.wait_sum_sq = 0.0
self.wait_max = 0.0
def record_wait(self, wait):
"""Incremental statistics - O(1) time, O(1) space."""
self.wait_count += 1
self.wait_sum += wait
self.wait_sum_sq += wait * wait
if wait > self.wait_max:
self.wait_max = wait
def customer(self):
arrival = self.env.now
with self.server.request() as req:
yield req
wait = self.env.now - arrival
self.record_wait(wait)
yield self.env.timeout(self.rng.exponential(5))
def arrivals(self):
while True:
yield self.env.timeout(self.rng.exponential(3))
self.env.process(self.customer())
def run(self, duration):
self.env.process(self.arrivals())
self.env.run(until=duration)
def results(self):
n = self.wait_count
mean = self.wait_sum / n if n else 0
var = (self.wait_sum_sq - self.wait_sum**2/n) / (n-1) if n > 1 else 0
return {
'customers': n,
'mean_wait': mean,
'std_wait': var ** 0.5,
'max_wait': self.wait_max
}
# Run
sim = FastSimulation()
sim.run(100000)
print(sim.results())
Summary
Speed up SimPy: 1. Profile first - find the real bottleneck 2. Reduce events - fewer timeouts 3. Cut logging - check level before formatting 4. Reuse objects - generators, dataclasses 5. Compute incrementally - don't store everything 6. Parallelise replications - use multiprocessing 7. Simplify model - don't model what you don't need
Measure. Optimise. Measure again.
Next Steps
Strengthen Your Python Skills
If you're finding Python tricky, get up to speed quickly with the 10-Day Python Bootcamp. It's designed to give you the confidence and skills to write clean, effective code.
Start the Python Bootcamp