SimPy with pandas: Powerful Data Analysis for Simulation Results

Published on February 23, 2026

SimPy generates data. pandas makes sense of it. Together, they're unstoppable.

Why pandas?

SimPy gives you raw data—lists, dictionaries, timestamps. pandas gives you: - DataFrames for structured analysis - Aggregations, groupings, pivots - Time series analysis - Easy export to CSV, Excel, databases

Collecting Data for pandas

Structure your data from the start:

import simpy
import random
import pandas as pd

class Simulation:
    def __init__(self, env):
        self.env = env
        self.records = []  # Will become DataFrame

    def customer(self, customer_id, server):
        arrival = self.env.now

        with server.request() as req:
            yield req
            wait = self.env.now - arrival

            service = random.expovariate(1/5)
            yield self.env.timeout(service)

        self.records.append({
            'customer_id': customer_id,
            'arrival': arrival,
            'wait': wait,
            'service': service,
            'departure': self.env.now
        })

    def to_dataframe(self):
        return pd.DataFrame(self.records)

Basic Analysis

# Run simulation
sim = Simulation(env)
# ... run ...
df = sim.to_dataframe()

# Summary statistics
print(df.describe())

# Specific metrics
print(f"Mean wait: {df['wait'].mean():.2f}")
print(f"Median wait: {df['wait'].median():.2f}")
print(f"90th percentile: {df['wait'].quantile(0.9):.2f}")
print(f"Max wait: {df['wait'].max():.2f}")

Time-Based Analysis

# Add time bins
df['hour'] = (df['arrival'] / 60).astype(int)

# Hourly statistics
hourly = df.groupby('hour').agg({
    'customer_id': 'count',
    'wait': ['mean', 'max'],
    'service': 'mean'
}).round(2)

print(hourly)

# Rolling average
df['rolling_wait'] = df['wait'].rolling(window=50).mean()

Segmentation Analysis

# If you have customer types
df['customer_type'] = ['VIP' if x % 10 == 0 else 'Regular' for x in df['customer_id']]

# Compare segments
segment_stats = df.groupby('customer_type').agg({
    'wait': ['count', 'mean', 'std', 'max'],
    'service': 'mean'
})
print(segment_stats)

Time Series from Monitor

class TimeSeriesCollector:
    def __init__(self, env, resources):
        self.env = env
        self.resources = resources
        self.records = []

    def monitor(self, interval=1):
        while True:
            record = {'time': self.env.now}
            for name, resource in self.resources.items():
                record[f'{name}_queue'] = len(resource.queue)
                record[f'{name}_busy'] = resource.count
            self.records.append(record)
            yield self.env.timeout(interval)

    def to_dataframe(self):
        return pd.DataFrame(self.records).set_index('time')

# Usage
collector = TimeSeriesCollector(env, {'server': server})
env.process(collector.monitor())
# ... run ...

ts_df = collector.to_dataframe()
print(ts_df.head())

Resampling and Aggregation

# Time series resampling (assuming time in minutes)
ts_df.index = pd.to_timedelta(ts_df.index, unit='m')

# 10-minute averages
resampled = ts_df.resample('10T').mean()

# Or custom aggregation
hourly = ts_df.resample('60T').agg({
    'server_queue': ['mean', 'max'],
    'server_busy': 'mean'
})

Exporting Results

# To CSV
df.to_csv('simulation_results.csv', index=False)

# To Excel with multiple sheets
with pd.ExcelWriter('results.xlsx') as writer:
    df.to_excel(writer, sheet_name='Raw Data', index=False)
    summary.to_excel(writer, sheet_name='Summary')
    hourly.to_excel(writer, sheet_name='Hourly')

# To SQL database
from sqlalchemy import create_engine
engine = create_engine('sqlite:///simulation.db')
df.to_sql('runs', engine, if_exists='replace', index=False)

Scenario Comparison

def run_scenario(config, seed):
    random.seed(seed)
    env = simpy.Environment()
    sim = Simulation(env, config)
    env.run(until=1000)
    df = sim.to_dataframe()
    df['scenario'] = config['name']
    df['seed'] = seed
    return df

# Run multiple scenarios
scenarios = [
    {'name': '2_servers', 'servers': 2},
    {'name': '3_servers', 'servers': 3},
    {'name': '4_servers', 'servers': 4}
]

all_results = []
for config in scenarios:
    for seed in range(10):  # 10 replications
        df = run_scenario(config, seed)
        all_results.append(df)

combined = pd.concat(all_results, ignore_index=True)

# Compare scenarios
comparison = combined.groupby('scenario')['wait'].agg(['mean', 'std', 'max'])
print(comparison)

Pivot Tables

# Customer type by hour
pivot = df.pivot_table(
    values='wait',
    index='hour',
    columns='customer_type',
    aggfunc='mean'
)
print(pivot)

Statistical Tests

from scipy import stats

# Compare two scenarios
scenario_a = combined[combined['scenario'] == '2_servers']['wait']
scenario_b = combined[combined['scenario'] == '3_servers']['wait']

t_stat, p_value = stats.ttest_ind(scenario_a, scenario_b)
print(f"t-statistic: {t_stat:.4f}, p-value: {p_value:.4f}")

Complete pandas Integration

import simpy
import random
import pandas as pd
import numpy as np

class PandasSimulation:
    def __init__(self, env, config):
        self.env = env
        self.config = config
        self.server = simpy.Resource(env, capacity=config['servers'])

        # Data collectors
        self.entity_data = []
        self.time_series = []

    def customer(self, cid):
        arrival = self.env.now

        with self.server.request() as req:
            yield req
            wait = self.env.now - arrival
            service = random.expovariate(1/self.config['service_rate'])
            yield self.env.timeout(service)

        self.entity_data.append({
            'id': cid,
            'arrival': arrival,
            'wait': wait,
            'service': service,
            'departure': self.env.now,
            'system_time': self.env.now - arrival
        })

    def arrivals(self):
        cid = 0
        while True:
            yield self.env.timeout(random.expovariate(self.config['arrival_rate']))
            self.env.process(self.customer(cid))
            cid += 1

    def monitor(self, interval=1):
        while True:
            self.time_series.append({
                'time': self.env.now,
                'queue': len(self.server.queue),
                'busy': self.server.count,
                'utilisation': self.server.count / self.server.capacity
            })
            yield self.env.timeout(interval)

    def run(self, duration):
        self.env.process(self.arrivals())
        self.env.process(self.monitor())
        self.env.run(until=duration)

    def get_entity_df(self):
        return pd.DataFrame(self.entity_data)

    def get_timeseries_df(self):
        return pd.DataFrame(self.time_series)

    def summary(self):
        df = self.get_entity_df()
        return {
            'customers': len(df),
            'mean_wait': df['wait'].mean(),
            'median_wait': df['wait'].median(),
            'p90_wait': df['wait'].quantile(0.9),
            'max_wait': df['wait'].max(),
            'mean_system_time': df['system_time'].mean(),
            'throughput': len(df) / self.env.now
        }

# Run
random.seed(42)
env = simpy.Environment()
sim = PandasSimulation(env, {
    'servers': 2,
    'arrival_rate': 0.5,
    'service_rate': 0.3
})
sim.run(1000)

# Analyse
entity_df = sim.get_entity_df()
ts_df = sim.get_timeseries_df()
print(sim.summary())

Summary

SimPy + pandas: - Structure data collection from the start - Use DataFrames for analysis - Group, aggregate, pivot - Compare scenarios easily - Export to any format

Collect smart. Analyse smarter.

Next Steps

Discover the Power of Simulation

Want to become a go-to expert in simulation with Python? The Complete Simulation Bootcamp will show you how simulation can transform your career and your projects.

Explore the Bootcamp