Source code for simcraft.statistics.tally

"""
Tally for collecting discrete observations.

Collects individual observations and provides statistical summaries.
"""

from __future__ import annotations
import math
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List, Optional, Tuple

if TYPE_CHECKING:
    from simcraft.core.simulation import Simulation


[docs] @dataclass class Tally: """ Collects discrete observations and calculates statistics. Efficiently calculates running statistics without storing all observations (unless keep_history is True). Parameters ---------- sim : Optional[Simulation] Parent simulation name : str Tally name keep_history : bool Whether to store all observations Examples -------- >>> tally = Tally(name="service_time") >>> for time in service_times: ... tally.observe(time) >>> print(tally.mean, tally.std) """ name: str = "" keep_history: bool = False _sim: Optional["Simulation"] = field(default=None, repr=False) # Running statistics (Welford's algorithm) _count: int = field(default=0, repr=False) _mean: float = field(default=0.0, repr=False) _m2: float = field(default=0.0, repr=False) # Sum of squared deviations _min: float = field(default=float("inf"), repr=False) _max: float = field(default=float("-inf"), repr=False) _sum: float = field(default=0.0, repr=False) # Optional history _history: List[Tuple[float, float]] = field(default_factory=list, repr=False) @property def count(self) -> int: """Get number of observations.""" return self._count @property def sum(self) -> float: """Get sum of observations.""" return self._sum @property def mean(self) -> float: """Get mean of observations.""" return self._mean @property def variance(self) -> float: """Get sample variance.""" if self._count < 2: return 0.0 return self._m2 / (self._count - 1) @property def std(self) -> float: """Get sample standard deviation.""" return math.sqrt(self.variance) @property def min(self) -> float: """Get minimum observation.""" if self._count == 0: return 0.0 return self._min @property def max(self) -> float: """Get maximum observation.""" if self._count == 0: return 0.0 return self._max @property def range(self) -> float: """Get range (max - min).""" if self._count == 0: return 0.0 return self._max - self._min
[docs] def observe(self, value: float) -> None: """ Record an observation. Parameters ---------- value : float Observed value """ self._count += 1 self._sum += value # Welford's online algorithm for variance delta = value - self._mean self._mean += delta / self._count delta2 = value - self._mean self._m2 += delta * delta2 # Update min/max self._min = min(self._min, value) self._max = max(self._max, value) # Store history if requested if self.keep_history: time = self._sim.now if self._sim else 0.0 self._history.append((time, value))
[docs] def observe_batch(self, values: List[float]) -> None: """ Record multiple observations. Parameters ---------- values : List[float] Observed values """ for value in values: self.observe(value)
[docs] def percentile(self, p: float) -> float: """ Calculate percentile from stored history. Parameters ---------- p : float Percentile (0-100) Returns ------- float Percentile value Raises ------ ValueError If history not kept or empty """ if not self.keep_history or not self._history: raise ValueError("Percentile requires keep_history=True and observations") values = sorted(v for _, v in self._history) k = (len(values) - 1) * p / 100 f = math.floor(k) c = math.ceil(k) if f == c: return values[int(k)] return values[int(f)] * (c - k) + values[int(c)] * (k - f)
[docs] def confidence_interval(self, confidence: float = 0.95) -> Tuple[float, float]: """ Calculate confidence interval for the mean. Parameters ---------- confidence : float Confidence level (e.g., 0.95 for 95%) Returns ------- Tuple[float, float] (lower, upper) confidence bounds """ if self._count < 2: return (self._mean, self._mean) # t-distribution critical value approximation # Using normal approximation for large samples import math alpha = 1 - confidence if self._count >= 30: # Normal approximation z = {0.90: 1.645, 0.95: 1.96, 0.99: 2.576}.get(confidence, 1.96) else: # Rough t-distribution approximation z = 2.0 + 0.5 * (30 - self._count) / 30 se = self.std / math.sqrt(self._count) margin = z * se return (self._mean - margin, self._mean + margin)
[docs] def get_history(self) -> List[Tuple[float, float]]: """ Get observation history. Returns ------- List[Tuple[float, float]] List of (time, value) tuples """ return self._history.copy()
[docs] def reset(self) -> None: """Reset all statistics.""" self._count = 0 self._mean = 0.0 self._m2 = 0.0 self._min = float("inf") self._max = float("-inf") self._sum = 0.0 self._history.clear()
[docs] def summary(self) -> dict: """ Get summary statistics. Returns ------- dict Dictionary of statistics """ return { "count": self._count, "mean": self.mean, "std": self.std, "min": self.min, "max": self.max, "sum": self._sum, }
def __repr__(self) -> str: """Return detailed representation.""" return ( f"Tally(name={self.name!r}, count={self._count}, " f"mean={self.mean:.4f}, std={self.std:.4f})" )
[docs] class BatchTally(Tally): """ Tally that groups observations into batches for analysis. Useful for batch means analysis and detecting initialization bias. Parameters ---------- batch_size : int Number of observations per batch name : str Tally name """
[docs] def __init__( self, batch_size: int = 100, name: str = "", sim: Optional["Simulation"] = None, ) -> None: """Initialize batch tally.""" super().__init__(name=name, _sim=sim) self._batch_size = batch_size self._current_batch: List[float] = [] self._batch_means: List[float] = []
@property def batch_count(self) -> int: """Get number of completed batches.""" return len(self._batch_means) @property def batch_means(self) -> List[float]: """Get list of batch means.""" return self._batch_means.copy()
[docs] def observe(self, value: float) -> None: """Record an observation.""" super().observe(value) self._current_batch.append(value) if len(self._current_batch) >= self._batch_size: batch_mean = sum(self._current_batch) / len(self._current_batch) self._batch_means.append(batch_mean) self._current_batch.clear()
[docs] def batch_variance(self) -> float: """ Calculate variance of batch means. Useful for estimating variance of the overall mean. """ if len(self._batch_means) < 2: return 0.0 mean_of_means = sum(self._batch_means) / len(self._batch_means) ss = sum((m - mean_of_means) ** 2 for m in self._batch_means) return ss / (len(self._batch_means) - 1)
[docs] def reset(self) -> None: """Reset all statistics.""" super().reset() self._current_batch.clear() self._batch_means.clear()