Source code for simcraft.statistics.tally

"""
Tally for collecting discrete observations.

Collects individual observations and provides statistical summaries.
"""

from __future__ import annotations
import math
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List, Optional, Tuple

if TYPE_CHECKING:
    from simcraft.core.simulation import Simulation



[docs]
@dataclass
class Tally:
    """
    Collects discrete observations and calculates statistics.

    Efficiently calculates running statistics without storing
    all observations (unless keep_history is True).

    Parameters
    ----------
    sim : Optional[Simulation]
        Parent simulation
    name : str
        Tally name
    keep_history : bool
        Whether to store all observations

    Examples
    --------
    >>> tally = Tally(name="service_time")
    >>> for time in service_times:
    ...     tally.observe(time)
    >>> print(tally.mean, tally.std)
    """

    name: str = ""
    keep_history: bool = False
    _sim: Optional["Simulation"] = field(default=None, repr=False)

    # Running statistics (Welford's algorithm)
    _count: int = field(default=0, repr=False)
    _mean: float = field(default=0.0, repr=False)
    _m2: float = field(default=0.0, repr=False)  # Sum of squared deviations
    _min: float = field(default=float("inf"), repr=False)
    _max: float = field(default=float("-inf"), repr=False)
    _sum: float = field(default=0.0, repr=False)

    # Optional history
    _history: List[Tuple[float, float]] = field(default_factory=list, repr=False)

    @property
    def count(self) -> int:
        """Get number of observations."""
        return self._count

    @property
    def sum(self) -> float:
        """Get sum of observations."""
        return self._sum

    @property
    def mean(self) -> float:
        """Get mean of observations."""
        return self._mean

    @property
    def variance(self) -> float:
        """Get sample variance."""
        if self._count < 2:
            return 0.0
        return self._m2 / (self._count - 1)

    @property
    def std(self) -> float:
        """Get sample standard deviation."""
        return math.sqrt(self.variance)

    @property
    def min(self) -> float:
        """Get minimum observation."""
        if self._count == 0:
            return 0.0
        return self._min

    @property
    def max(self) -> float:
        """Get maximum observation."""
        if self._count == 0:
            return 0.0
        return self._max

    @property
    def range(self) -> float:
        """Get range (max - min)."""
        if self._count == 0:
            return 0.0
        return self._max - self._min


[docs]
    def observe(self, value: float) -> None:
        """
        Record an observation.

        Parameters
        ----------
        value : float
            Observed value
        """
        self._count += 1
        self._sum += value

        # Welford's online algorithm for variance
        delta = value - self._mean
        self._mean += delta / self._count
        delta2 = value - self._mean
        self._m2 += delta * delta2

        # Update min/max
        self._min = min(self._min, value)
        self._max = max(self._max, value)

        # Store history if requested
        if self.keep_history:
            time = self._sim.now if self._sim else 0.0
            self._history.append((time, value))



[docs]
    def observe_batch(self, values: List[float]) -> None:
        """
        Record multiple observations.

        Parameters
        ----------
        values : List[float]
            Observed values
        """
        for value in values:
            self.observe(value)



[docs]
    def percentile(self, p: float) -> float:
        """
        Calculate percentile from stored history.

        Parameters
        ----------
        p : float
            Percentile (0-100)

        Returns
        -------
        float
            Percentile value

        Raises
        ------
        ValueError
            If history not kept or empty
        """
        if not self.keep_history or not self._history:
            raise ValueError("Percentile requires keep_history=True and observations")

        values = sorted(v for _, v in self._history)
        k = (len(values) - 1) * p / 100
        f = math.floor(k)
        c = math.ceil(k)

        if f == c:
            return values[int(k)]

        return values[int(f)] * (c - k) + values[int(c)] * (k - f)



[docs]
    def confidence_interval(self, confidence: float = 0.95) -> Tuple[float, float]:
        """
        Calculate confidence interval for the mean.

        Parameters
        ----------
        confidence : float
            Confidence level (e.g., 0.95 for 95%)

        Returns
        -------
        Tuple[float, float]
            (lower, upper) confidence bounds
        """
        if self._count < 2:
            return (self._mean, self._mean)

        # t-distribution critical value approximation
        # Using normal approximation for large samples
        import math

        alpha = 1 - confidence
        if self._count >= 30:
            # Normal approximation
            z = {0.90: 1.645, 0.95: 1.96, 0.99: 2.576}.get(confidence, 1.96)
        else:
            # Rough t-distribution approximation
            z = 2.0 + 0.5 * (30 - self._count) / 30

        se = self.std / math.sqrt(self._count)
        margin = z * se

        return (self._mean - margin, self._mean + margin)



[docs]
    def get_history(self) -> List[Tuple[float, float]]:
        """
        Get observation history.

        Returns
        -------
        List[Tuple[float, float]]
            List of (time, value) tuples
        """
        return self._history.copy()



[docs]
    def reset(self) -> None:
        """Reset all statistics."""
        self._count = 0
        self._mean = 0.0
        self._m2 = 0.0
        self._min = float("inf")
        self._max = float("-inf")
        self._sum = 0.0
        self._history.clear()



[docs]
    def summary(self) -> dict:
        """
        Get summary statistics.

        Returns
        -------
        dict
            Dictionary of statistics
        """
        return {
            "count": self._count,
            "mean": self.mean,
            "std": self.std,
            "min": self.min,
            "max": self.max,
            "sum": self._sum,
        }


    def __repr__(self) -> str:
        """Return detailed representation."""
        return (
            f"Tally(name={self.name!r}, count={self._count}, "
            f"mean={self.mean:.4f}, std={self.std:.4f})"
        )




[docs]
class BatchTally(Tally):
    """
    Tally that groups observations into batches for analysis.

    Useful for batch means analysis and detecting initialization bias.

    Parameters
    ----------
    batch_size : int
        Number of observations per batch
    name : str
        Tally name
    """


[docs]
    def __init__(
        self,
        batch_size: int = 100,
        name: str = "",
        sim: Optional["Simulation"] = None,
    ) -> None:
        """Initialize batch tally."""
        super().__init__(name=name, _sim=sim)
        self._batch_size = batch_size
        self._current_batch: List[float] = []
        self._batch_means: List[float] = []


    @property
    def batch_count(self) -> int:
        """Get number of completed batches."""
        return len(self._batch_means)

    @property
    def batch_means(self) -> List[float]:
        """Get list of batch means."""
        return self._batch_means.copy()


[docs]
    def observe(self, value: float) -> None:
        """Record an observation."""
        super().observe(value)
        self._current_batch.append(value)

        if len(self._current_batch) >= self._batch_size:
            batch_mean = sum(self._current_batch) / len(self._current_batch)
            self._batch_means.append(batch_mean)
            self._current_batch.clear()



[docs]
    def batch_variance(self) -> float:
        """
        Calculate variance of batch means.

        Useful for estimating variance of the overall mean.
        """
        if len(self._batch_means) < 2:
            return 0.0

        mean_of_means = sum(self._batch_means) / len(self._batch_means)
        ss = sum((m - mean_of_means) ** 2 for m in self._batch_means)
        return ss / (len(self._batch_means) - 1)



[docs]
    def reset(self) -> None:
        """Reset all statistics."""
        super().reset()
        self._current_batch.clear()
        self._batch_means.clear()