Skip to content

Simulation results

A simulation result work with the following logic :

  • each iteration (step) is a step_result instance.
  • series result aggregates all the before and

SimulationStepResult dataclass

Result of a simulation step.

The comparison matrix is stored as a symmetric float32 2D array so that any distance metric (binary, Jaccard, etc.) can be used.

Source code in src/vote_simulation/models/results/step_result.py
@dataclass(slots=True)
class SimulationStepResult:
    """Result of a simulation step.

    The comparison matrix is stored as a symmetric ``float32`` 2D array so that
    any distance metric (binary, Jaccard, etc.) can be used.
    """

    data_source: str
    winners_by_rule: dict[str, list[str]] = field(default_factory=dict)
    distance_metric: Distance = field(default_factory=JaccardDistance)
    config: ResultConfig = field(default_factory=ResultConfig)
    _rule_order: list[str] = field(default_factory=list, init=False, repr=False)
    _rule_index: dict[str, int] = field(default_factory=dict, init=False, repr=False)
    _winner_sets_by_rule: dict[str, frozenset[str]] = field(default_factory=dict, init=False, repr=False)
    _distance_matrix: np.ndarray = field(
        default_factory=lambda: np.zeros((0, 0), dtype=np.float32),
        init=False,
        repr=False,
    )
    _metrics_by_rule: dict[str, WinnerMetrics] = field(default_factory=dict, init=False, repr=False)

    def __post_init__(self) -> None:
        """Normalize any pre-populated data and build the matrix once."""

        initial_items = list(self.winners_by_rule.items())
        self.winners_by_rule = {}
        self._rule_order = []
        self._rule_index = {}
        self._winner_sets_by_rule = {}
        self._distance_matrix = np.zeros((0, 0), dtype=np.float32)
        self._metrics_by_rule = {}

        for rule_code, winners in initial_items:
            self.add_method_result(rule_code, winners)

    @property
    def rule_codes(self) -> list[str]:
        """Ordered rule codes matching the matrix axes."""

        return list(self._rule_order)

    @property
    def dist_matrix(self) -> np.ndarray:
        """Read-only 2D matrix of pairwise distances between rules."""

        matrix = self._distance_matrix.view()
        matrix.flags.writeable = False
        return matrix

    @property
    def distance_matrix_frame(self) -> pd.DataFrame:
        """Distance matrix as a labeled DataFrame for display and analysis."""

        idx = pd.Index(self._rule_order)
        return pd.DataFrame(self._distance_matrix, index=idx, columns=idx, copy=False)

    def add_method_result(self, rule_code: str, winners: list[str]) -> None:
        """Add or update winners for one voting method in this step.

        Args:
            rule_code: str - Code of the voting method (e.g., "STV", "IRV", "Borda")
            winners: list[str] - List of winner labels for the given method. Can be multiple in case of ties.
        """

        normalized_code = rule_code.strip().upper()
        normalized_winners = list(dict.fromkeys(winners))
        winner_set = frozenset(normalized_winners)

        self.winners_by_rule[normalized_code] = normalized_winners
        self._winner_sets_by_rule[normalized_code] = winner_set

        if normalized_code in self._rule_index:
            self._refresh_rule_distances(normalized_code)
            return

        self._append_rule(normalized_code)

    def add_method_result_with_metrics(
        self,
        rule_code: str,
        winners: list[str],
        metrics: WinnerMetrics,
    ) -> None:
        """Add winners *and* pre-computed :class:`WinnerMetrics` for one rule.

        This is the enriched variant of :meth:`add_method_result` used by the
        simulation engine so that winner-quality metrics can be aggregated
        across iterations with no extra recomputation.

        Args:
            rule_code: Voting rule code (e.g. ``"COPE"``).
            winners: List of co-winner labels.
            metrics: Pre-computed :class:`WinnerMetrics` for this step.
        """
        self.add_method_result(rule_code, winners)
        normalized_code = rule_code.strip().upper()
        self._metrics_by_rule[normalized_code] = metrics

    @property
    def metrics_by_rule(self) -> dict[str, WinnerMetrics]:
        """Mapping from rule code to its :class:`WinnerMetrics` for this step.

        Only rules registered via :meth:`add_method_result_with_metrics` will
        appear here.  Rules loaded from disk (without metrics) return an empty
        dict for their key.
        """
        return dict(self._metrics_by_rule)

    @property
    def metrics_frame(self) -> pd.DataFrame:
        """Metrics for all rules in this step as a tidy DataFrame.

        Returns a DataFrame indexed by ``rule`` with one column per metric
        field (see :data:`~vote_simulation.models.rules.winner_metrics.METRIC_FIELDS`).
        Rules without metrics are omitted.
        """
        if not self._metrics_by_rule:
            return pd.DataFrame(columns=pd.Index(np.asarray(list(METRIC_FIELDS), dtype=object)))
        rows = [{"rule": code, **m.to_dict()} for code, m in self._metrics_by_rule.items()]
        return pd.DataFrame(rows).set_index("rule")

    # ------------------------------------------------------------------
    # Distance metrics
    # ------------------------------------------------------------------

    @property
    def mean_distance(self) -> float:
        """Mean of all off-diagonal pairwise distances (O(1) numpy ops)."""
        n = len(self._rule_order)
        if n < 2:
            return 0.0
        total = float(np.sum(self._distance_matrix))  # diag is 0 so no need to subtract
        return total / (n * (n - 1))

    @property
    def most_distant_rules(self) -> tuple[str, str, float]:
        """Pair of rules with the maximum distance.

        Returns:
            ``(rule_a, rule_b, distance)`` or ``("", "", 0.0)`` if fewer
            than two rules are present.
        """
        n = len(self._rule_order)
        if n < 2:
            return ("", "", 0.0)
        idx = int(np.argmax(self._distance_matrix))
        i, j = divmod(idx, n)
        return (self._rule_order[i], self._rule_order[j], float(self._distance_matrix[i, j]))

    # Persistence

    def save_to_file(self, file_path: str) -> None:
        """Save the step result to a parquet file.

        Configuration metadata is stored via pyarrow schema metadata so that
        the payload columns remain compact ("Rule" + "Winner" only).

        Args:
            file_path: Path to the output parquet file.
        """
        import pyarrow as pa
        import pyarrow.parquet as pq

        rows = [(rule, winner) for rule, winners in self.winners_by_rule.items() for winner in winners]
        df = pd.DataFrame(rows, columns=pd.Index(["Rule", "Winner"]))
        table = pa.Table.from_pandas(df, preserve_index=False)

        # Inject config into schema metadata (prefixed to avoid collisions).
        existing_meta = table.schema.metadata or {}
        config_meta = {f"vote_sim:{k}".encode(): v.encode() for k, v in self.config.to_dict().items()}
        table = table.replace_schema_metadata({**existing_meta, **config_meta})

        os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True)
        pq.write_table(table, file_path)

    def load_from_file(self, file_path: str) -> None:
        """Load the step result from a parquet file.

        Reads configuration metadata from the parquet schema when available.

        Args:
            file_path: Path to the parquet file containing the step result.
        """
        import pyarrow.parquet as pq

        table = pq.read_table(file_path)
        meta = table.schema.metadata or {}
        config_dict = {
            k.decode().removeprefix("vote_sim:"): v.decode()
            for k, v in meta.items()
            if k.decode().startswith("vote_sim:")
        }
        self.config = ResultConfig.from_dict(config_dict) if config_dict else ResultConfig()

        df = table.to_pandas()
        loaded_winners = df.groupby("Rule")["Winner"].apply(list).to_dict()

        self.winners_by_rule = {}
        self._rule_order = []
        self._rule_index = {}
        self._winner_sets_by_rule = {}
        self._distance_matrix = np.zeros((0, 0), dtype=np.float32)
        self._metrics_by_rule = {}

        for rule_code, winners in loaded_winners.items():
            self.add_method_result(str(rule_code), winners)

    @staticmethod
    def delete_file(file_path: str) -> bool:
        """Delete a saved step result file.

        Returns:
            ``True`` if the file existed and was deleted, ``False`` otherwise.
        """
        try:
            os.remove(file_path)
            return True
        except FileNotFoundError:
            return False

    def format_distance_matrix(self) -> str:
        """Return a printable matrix with row and column labels."""

        if self._distance_matrix.size == 0:
            return "<empty matrix>"

        return self.distance_matrix_frame.to_string()

    def __str__(self) -> str:
        """String representation with a readable matrix block."""

        winners_str = (
            "\n".join(f"- {rule}: {', '.join(winners)}" for rule, winners in self.winners_by_rule.items()) or "- <none>"
        )

        header = f"Data Source: {self.data_source}"
        if self.config:
            header += f"\nConfig: {self.config.description}"

        n = len(self._rule_order)
        metrics = ""
        if n >= 2:
            r1, r2, d = self.most_distant_rules
            metrics = f"\nMean distance: {self.mean_distance:.4f}\nMost distant: {r1} <-> {r2} ({d:.4f})"

        mat_str = f"\nDistance Matrix:\n{indent(self.format_distance_matrix(), '  ')}" if self._rule_order else ""
        return f"{header}\nWinners by rule:\n{indent(winners_str, '  ')}{metrics}{mat_str}\n"

    def compute_distance_matrix(self) -> np.ndarray:
        """Rebuild the full distance matrix from winners and return it."""

        ordered_items = [(rule_code, self.winners_by_rule[rule_code]) for rule_code in self._rule_order]
        self.winners_by_rule = {}
        self._rule_order = []
        self._rule_index = {}
        self._winner_sets_by_rule = {}
        self._distance_matrix = np.zeros((0, 0), dtype=np.float32)
        self._metrics_by_rule = {}

        for rule_code, winners in ordered_items:
            self.add_method_result(rule_code, winners)

        return self.dist_matrix

    def plot_distance_matrix(
        self,
        ax: Any | None = None,
        save_path: str | None = None,
        *,
        annotate: bool = True,
        show: bool = True,
    ) -> Any:
        """Plot the distance matrix as a heatmap.

        When *save_path* is given the plot is written to disk.  If *save_path*
        is a **directory**, the filename is derived automatically from the
        attached :attr:`config`.
        """

        if not self._rule_order:
            raise ValueError("Cannot plot an empty distance matrix.")

        subtitle = self.config.description if self.config else ""
        title = "Rule distance matrix"
        if subtitle:
            title += f"\n{subtitle}"
        else:
            title += f"\n{self.data_source}"

        resolved_save: str | None = None
        if save_path is not None:
            resolved_save = self._resolve_save_path(save_path, "step_distance_matrix.png")

        return _plot_heatmap(
            self._distance_matrix,
            self._rule_order,
            title=title,
            ax=ax,
            vmin=0,
            vmax=1,
            annotate=annotate,
            annotation_fmt=".2f",
            colorbar_label="Distance",
            show=show,
            save_path=resolved_save,
        )

    # Path helpers

    def _resolve_save_path(self, path: str, default_filename: str) -> str:
        """If *path* is a directory, append a config-based filename."""
        if os.path.isdir(path) or path.endswith(os.sep):
            subdir = self.config.label if self.config else "unknown"
            out = os.path.join(path, subdir, default_filename)
        else:
            out = path
        os.makedirs(os.path.dirname(os.path.abspath(out)), exist_ok=True)
        return out

    def _append_rule(self, rule_code: str) -> None:
        """Append a new rule and update only the new row/column."""

        previous_size = len(self._rule_order)
        new_size = previous_size + 1
        new_matrix = np.zeros((new_size, new_size), dtype=np.float32)

        if previous_size:
            new_matrix[:previous_size, :previous_size] = self._distance_matrix

        self._rule_order.append(rule_code)
        self._rule_index[rule_code] = previous_size
        self._distance_matrix = new_matrix
        self._refresh_rule_distances(rule_code)

    def _refresh_rule_distances(self, rule_code: str) -> None:
        """Refresh only one rule row/column in the symmetric matrix."""

        row_index = self._rule_index[rule_code]
        self._distance_matrix[row_index, row_index] = 0.0
        winner_set = self._winner_sets_by_rule[rule_code]
        metric = self.distance_metric

        for other_rule, other_index in self._rule_index.items():
            if other_rule == rule_code:
                continue

            distance = metric.compute(winner_set, self._winner_sets_by_rule[other_rule])
            self._distance_matrix[row_index, other_index] = distance
            self._distance_matrix[other_index, row_index] = distance

dist_matrix property

Read-only 2D matrix of pairwise distances between rules.

distance_matrix_frame property

Distance matrix as a labeled DataFrame for display and analysis.

mean_distance property

Mean of all off-diagonal pairwise distances (O(1) numpy ops).

metrics_by_rule property

Mapping from rule code to its :class:WinnerMetrics for this step.

Only rules registered via :meth:add_method_result_with_metrics will appear here. Rules loaded from disk (without metrics) return an empty dict for their key.

metrics_frame property

Metrics for all rules in this step as a tidy DataFrame.

Returns a DataFrame indexed by rule with one column per metric field (see :data:~vote_simulation.models.rules.winner_metrics.METRIC_FIELDS). Rules without metrics are omitted.

most_distant_rules property

Pair of rules with the maximum distance.

Returns:

Type Description
str

(rule_a, rule_b, distance) or ("", "", 0.0) if fewer

str

than two rules are present.

rule_codes property

Ordered rule codes matching the matrix axes.

__post_init__()

Normalize any pre-populated data and build the matrix once.

Source code in src/vote_simulation/models/results/step_result.py
def __post_init__(self) -> None:
    """Normalize any pre-populated data and build the matrix once."""

    initial_items = list(self.winners_by_rule.items())
    self.winners_by_rule = {}
    self._rule_order = []
    self._rule_index = {}
    self._winner_sets_by_rule = {}
    self._distance_matrix = np.zeros((0, 0), dtype=np.float32)
    self._metrics_by_rule = {}

    for rule_code, winners in initial_items:
        self.add_method_result(rule_code, winners)

__str__()

String representation with a readable matrix block.

Source code in src/vote_simulation/models/results/step_result.py
def __str__(self) -> str:
    """String representation with a readable matrix block."""

    winners_str = (
        "\n".join(f"- {rule}: {', '.join(winners)}" for rule, winners in self.winners_by_rule.items()) or "- <none>"
    )

    header = f"Data Source: {self.data_source}"
    if self.config:
        header += f"\nConfig: {self.config.description}"

    n = len(self._rule_order)
    metrics = ""
    if n >= 2:
        r1, r2, d = self.most_distant_rules
        metrics = f"\nMean distance: {self.mean_distance:.4f}\nMost distant: {r1} <-> {r2} ({d:.4f})"

    mat_str = f"\nDistance Matrix:\n{indent(self.format_distance_matrix(), '  ')}" if self._rule_order else ""
    return f"{header}\nWinners by rule:\n{indent(winners_str, '  ')}{metrics}{mat_str}\n"

add_method_result(rule_code, winners)

Add or update winners for one voting method in this step.

Parameters:

Name Type Description Default
rule_code str

str - Code of the voting method (e.g., "STV", "IRV", "Borda")

required
winners list[str]

list[str] - List of winner labels for the given method. Can be multiple in case of ties.

required
Source code in src/vote_simulation/models/results/step_result.py
def add_method_result(self, rule_code: str, winners: list[str]) -> None:
    """Add or update winners for one voting method in this step.

    Args:
        rule_code: str - Code of the voting method (e.g., "STV", "IRV", "Borda")
        winners: list[str] - List of winner labels for the given method. Can be multiple in case of ties.
    """

    normalized_code = rule_code.strip().upper()
    normalized_winners = list(dict.fromkeys(winners))
    winner_set = frozenset(normalized_winners)

    self.winners_by_rule[normalized_code] = normalized_winners
    self._winner_sets_by_rule[normalized_code] = winner_set

    if normalized_code in self._rule_index:
        self._refresh_rule_distances(normalized_code)
        return

    self._append_rule(normalized_code)

add_method_result_with_metrics(rule_code, winners, metrics)

Add winners and pre-computed :class:WinnerMetrics for one rule.

This is the enriched variant of :meth:add_method_result used by the simulation engine so that winner-quality metrics can be aggregated across iterations with no extra recomputation.

Parameters:

Name Type Description Default
rule_code str

Voting rule code (e.g. "COPE").

required
winners list[str]

List of co-winner labels.

required
metrics WinnerMetrics

Pre-computed :class:WinnerMetrics for this step.

required
Source code in src/vote_simulation/models/results/step_result.py
def add_method_result_with_metrics(
    self,
    rule_code: str,
    winners: list[str],
    metrics: WinnerMetrics,
) -> None:
    """Add winners *and* pre-computed :class:`WinnerMetrics` for one rule.

    This is the enriched variant of :meth:`add_method_result` used by the
    simulation engine so that winner-quality metrics can be aggregated
    across iterations with no extra recomputation.

    Args:
        rule_code: Voting rule code (e.g. ``"COPE"``).
        winners: List of co-winner labels.
        metrics: Pre-computed :class:`WinnerMetrics` for this step.
    """
    self.add_method_result(rule_code, winners)
    normalized_code = rule_code.strip().upper()
    self._metrics_by_rule[normalized_code] = metrics

compute_distance_matrix()

Rebuild the full distance matrix from winners and return it.

Source code in src/vote_simulation/models/results/step_result.py
def compute_distance_matrix(self) -> np.ndarray:
    """Rebuild the full distance matrix from winners and return it."""

    ordered_items = [(rule_code, self.winners_by_rule[rule_code]) for rule_code in self._rule_order]
    self.winners_by_rule = {}
    self._rule_order = []
    self._rule_index = {}
    self._winner_sets_by_rule = {}
    self._distance_matrix = np.zeros((0, 0), dtype=np.float32)
    self._metrics_by_rule = {}

    for rule_code, winners in ordered_items:
        self.add_method_result(rule_code, winners)

    return self.dist_matrix

delete_file(file_path) staticmethod

Delete a saved step result file.

Returns:

Type Description
bool

True if the file existed and was deleted, False otherwise.

Source code in src/vote_simulation/models/results/step_result.py
@staticmethod
def delete_file(file_path: str) -> bool:
    """Delete a saved step result file.

    Returns:
        ``True`` if the file existed and was deleted, ``False`` otherwise.
    """
    try:
        os.remove(file_path)
        return True
    except FileNotFoundError:
        return False

format_distance_matrix()

Return a printable matrix with row and column labels.

Source code in src/vote_simulation/models/results/step_result.py
def format_distance_matrix(self) -> str:
    """Return a printable matrix with row and column labels."""

    if self._distance_matrix.size == 0:
        return "<empty matrix>"

    return self.distance_matrix_frame.to_string()

load_from_file(file_path)

Load the step result from a parquet file.

Reads configuration metadata from the parquet schema when available.

Parameters:

Name Type Description Default
file_path str

Path to the parquet file containing the step result.

required
Source code in src/vote_simulation/models/results/step_result.py
def load_from_file(self, file_path: str) -> None:
    """Load the step result from a parquet file.

    Reads configuration metadata from the parquet schema when available.

    Args:
        file_path: Path to the parquet file containing the step result.
    """
    import pyarrow.parquet as pq

    table = pq.read_table(file_path)
    meta = table.schema.metadata or {}
    config_dict = {
        k.decode().removeprefix("vote_sim:"): v.decode()
        for k, v in meta.items()
        if k.decode().startswith("vote_sim:")
    }
    self.config = ResultConfig.from_dict(config_dict) if config_dict else ResultConfig()

    df = table.to_pandas()
    loaded_winners = df.groupby("Rule")["Winner"].apply(list).to_dict()

    self.winners_by_rule = {}
    self._rule_order = []
    self._rule_index = {}
    self._winner_sets_by_rule = {}
    self._distance_matrix = np.zeros((0, 0), dtype=np.float32)
    self._metrics_by_rule = {}

    for rule_code, winners in loaded_winners.items():
        self.add_method_result(str(rule_code), winners)

plot_distance_matrix(ax=None, save_path=None, *, annotate=True, show=True)

Plot the distance matrix as a heatmap.

When save_path is given the plot is written to disk. If save_path is a directory, the filename is derived automatically from the attached :attr:config.

Source code in src/vote_simulation/models/results/step_result.py
def plot_distance_matrix(
    self,
    ax: Any | None = None,
    save_path: str | None = None,
    *,
    annotate: bool = True,
    show: bool = True,
) -> Any:
    """Plot the distance matrix as a heatmap.

    When *save_path* is given the plot is written to disk.  If *save_path*
    is a **directory**, the filename is derived automatically from the
    attached :attr:`config`.
    """

    if not self._rule_order:
        raise ValueError("Cannot plot an empty distance matrix.")

    subtitle = self.config.description if self.config else ""
    title = "Rule distance matrix"
    if subtitle:
        title += f"\n{subtitle}"
    else:
        title += f"\n{self.data_source}"

    resolved_save: str | None = None
    if save_path is not None:
        resolved_save = self._resolve_save_path(save_path, "step_distance_matrix.png")

    return _plot_heatmap(
        self._distance_matrix,
        self._rule_order,
        title=title,
        ax=ax,
        vmin=0,
        vmax=1,
        annotate=annotate,
        annotation_fmt=".2f",
        colorbar_label="Distance",
        show=show,
        save_path=resolved_save,
    )

save_to_file(file_path)

Save the step result to a parquet file.

Configuration metadata is stored via pyarrow schema metadata so that the payload columns remain compact ("Rule" + "Winner" only).

Parameters:

Name Type Description Default
file_path str

Path to the output parquet file.

required
Source code in src/vote_simulation/models/results/step_result.py
def save_to_file(self, file_path: str) -> None:
    """Save the step result to a parquet file.

    Configuration metadata is stored via pyarrow schema metadata so that
    the payload columns remain compact ("Rule" + "Winner" only).

    Args:
        file_path: Path to the output parquet file.
    """
    import pyarrow as pa
    import pyarrow.parquet as pq

    rows = [(rule, winner) for rule, winners in self.winners_by_rule.items() for winner in winners]
    df = pd.DataFrame(rows, columns=pd.Index(["Rule", "Winner"]))
    table = pa.Table.from_pandas(df, preserve_index=False)

    # Inject config into schema metadata (prefixed to avoid collisions).
    existing_meta = table.schema.metadata or {}
    config_meta = {f"vote_sim:{k}".encode(): v.encode() for k, v in self.config.to_dict().items()}
    table = table.replace_schema_metadata({**existing_meta, **config_meta})

    os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True)
    pq.write_table(table, file_path)

Data models for simulation outputs across multiple iterations.

SimulationSeriesResult dataclass

Aggregation of simulation steps.

Maintains a running float64 sum of per-step distance matrices so that the mean can be computed with a single division at any time, regardless of how many iterations have been added.

The aggregated :attr:config is automatically updated on each :meth:add_step call and reflects the union of all per-step configs.

Source code in src/vote_simulation/models/results/series_result.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
@dataclass(slots=True)
class SimulationSeriesResult:
    """Aggregation of simulation steps.

    Maintains a running ``float64`` sum of per-step distance matrices so
    that the mean can be computed with a single division at any time, regardless
    of how many iterations have been added.

    The aggregated :attr:`config` is automatically updated on each
    :meth:`add_step` call and reflects the union of all per-step configs.
    """

    steps: list[SimulationStepResult] = field(default_factory=list)
    # Accumulator fields
    _rule_order: list[str] = field(default_factory=list, init=False, repr=False)
    _rule_index: dict[str, int] = field(default_factory=dict, init=False, repr=False)
    _matrix_sum: np.ndarray = field(
        default_factory=lambda: np.zeros((0, 0), dtype=np.float64),
        init=False,
        repr=False,
    )
    _iteration_count: int = field(default=0, init=False, repr=False)
    _config: ResultConfig = field(default_factory=ResultConfig, init=False, repr=False)
    # Per-rule metric accumulators (sum and sum-of-squares for online mean/std)
    _metrics_sum: dict[str, np.ndarray] = field(default_factory=dict, init=False, repr=False)
    _metrics_sum_sq: dict[str, np.ndarray] = field(default_factory=dict, init=False, repr=False)
    _metrics_count: dict[str, int] = field(default_factory=dict, init=False, repr=False)

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def add_step(self, step_result: SimulationStepResult) -> None:
        """Add one step result to the series and accumulate its distance matrix."""

        self.steps.append(step_result)
        self._accumulate_step(step_result)
        if step_result.config:
            self._config = self._config.merge(step_result.config)

    def add_rules_to_steps(self, new_rule_codes: list[str]) -> None:
        """Apply additional rules to all existing steps and update the series.

        Does not re-run existing rules, only computes distances for new rules.
        Rebuilds the accumulated distance matrix with all rules (old + new).

        Args:
            new_rule_codes: List of additional rule codes to apply to each step.

        Raises:
            ImportError: If ``vote_simulation.models.rules`` is not available.
        """
        if not new_rule_codes:
            return

        from vote_simulation.models.data_generation.data_instance import DataInstance
        from vote_simulation.models.rules import get_rule_builder

        # Apply new rules to each step
        for step in self.steps:
            if not step.data_source:
                print("Warning: Step without data_source, skipping rule application")
                continue

            try:
                di = DataInstance(step.data_source)
                profile = di.profile

                for code in new_rule_codes:
                    normalized = code.strip().upper()
                    if normalized in step.winners_by_rule:
                        continue  # Skip if rule already exists

                    try:
                        builder = get_rule_builder(normalized)
                        rule = builder(profile, None)
                        winners = rule.cowinners_
                        try:
                            metrics = rule.compute_metrics()
                            step.add_method_result_with_metrics(normalized, winners, metrics)
                        except Exception:
                            step.add_method_result(normalized, winners)
                    except Exception as e:
                        print(f"Error applying rule '{normalized}' to step: {e}")
                        step.add_method_result(normalized, [f"ERROR: {e}"])
            except Exception as e:
                print(f"Error loading data source '{step.data_source}': {e}")

        # Rebuild the aggregated distance matrix and metric accumulators
        self._rule_order = []
        self._rule_index = {}
        self._matrix_sum = np.zeros((0, 0), dtype=np.float64)
        self._iteration_count = 0
        self._metrics_sum = {}
        self._metrics_sum_sq = {}
        self._metrics_count = {}

        for step in self.steps:
            self._accumulate_step(step)

        # Update config to include new rules
        if new_rule_codes:
            new_rules = frozenset(c.strip().upper() for c in new_rule_codes)
            self._config = ResultConfig(
                gen_models=self._config.gen_models,
                n_voters=self._config.n_voters,
                n_candidates=self._config.n_candidates,
                rules_codes=self._config.rules_codes | new_rules,
                n_iterations=self._config.n_iterations,
            )

    @property
    def config(self) -> ResultConfig:
        """Aggregated configuration across all added steps."""
        return self._config

    @config.setter
    def config(self, value: ResultConfig) -> None:
        self._config = value

    @property
    def step_count(self) -> int:
        """Number of recorded steps (equals the iteration count)."""
        return self._iteration_count

    @property
    def mean_distance_matrix(self) -> np.ndarray:
        """Mean pairwise distance matrix over all accumulated steps.

        Returns a ``float32`` array of shape ``(n_rules, n_rules)``.
        Values are in ``[0, 100]``: 0 means every step agreed, 100 means they never did.
        """
        if self._iteration_count == 0:
            return np.zeros((0, 0), dtype=np.float32)
        return (100.0 * self._matrix_sum / self._iteration_count).astype(np.float32)

    @property
    def mean_distance_matrix_frame(self) -> pd.DataFrame:
        """Mean distance matrix as a labeled DataFrame."""
        matrix = self.mean_distance_matrix
        idx = pd.Index(self._rule_order)
        return pd.DataFrame(matrix, index=idx, columns=idx)

    @property
    def metrics_summary_frame(self) -> pd.DataFrame:
        """Per-rule winner-metric statistics aggregated across all iterations.

        Returns a :class:`~pandas.DataFrame` indexed by ``rule`` with two
        columns per metric field — one for the mean and one for the standard
        deviation across all accumulated steps:

        ``<field>_mean``, ``<field>_std``  for each field in
        :data:`~vote_simulation.models.rules.winner_metrics.METRIC_FIELDS`.

        Rules for which no metrics were recorded (e.g. loaded from a parquet
        file without metrics) are omitted from the frame.

        An empty DataFrame is returned when no metrics have been accumulated.
        """
        if not self._metrics_sum:
            col_names = [f"{f}_{s}" for f in METRIC_FIELDS for s in ("mean", "std")]
            return pd.DataFrame(columns=pd.Index(np.asarray(col_names, dtype=object)))

        rows = []
        for rule in self._rule_order:
            if rule not in self._metrics_sum:
                continue
            count = self._metrics_count[rule]
            mean_arr = self._metrics_sum[rule] / count
            mean_sq_arr = self._metrics_sum_sq[rule] / count
            # population std — safe against floating precision below zero
            std_arr = np.sqrt(np.maximum(0.0, mean_sq_arr - mean_arr**2))
            row: dict[str, object] = {"rule": rule}
            for i, field_name in enumerate(METRIC_FIELDS):
                row[f"{field_name}_mean"] = float(mean_arr[i])
                row[f"{field_name}_std"] = float(std_arr[i])
            rows.append(row)

        if not rows:
            col_names = [f"{f}_{s}" for f in METRIC_FIELDS for s in ("mean", "std")]
            return pd.DataFrame(columns=pd.Index(np.asarray(col_names, dtype=object)))
        return pd.DataFrame(rows).set_index("rule")

    # ------------------------------------------------------------------
    # Distance metrics
    # ------------------------------------------------------------------

    @property
    def mean_distance(self) -> float:
        """Scalar mean of all off-diagonal cells in the mean distance matrix.

        Value in ``[0, 100]``.
        """
        n = len(self._rule_order)
        if n < 2 or self._iteration_count == 0:
            return 0.0
        mean_mat = self.mean_distance_matrix
        total = float(np.sum(mean_mat))  # diag is 0
        return total / (n * (n - 1))

    @property
    def most_distant_rules(self) -> tuple[str, str, float]:
        """Pair of rules with the maximum mean distance.

        Returns:
            ``(rule_a, rule_b, distance)`` or ``("", "", 0.0)`` when fewer
            than two rules are present.
        """
        n = len(self._rule_order)
        if n < 2 or self._iteration_count == 0:
            return ("", "", 0.0)
        mean_mat = self.mean_distance_matrix
        idx = int(np.argmax(mean_mat))
        i, j = divmod(idx, n)
        return (self._rule_order[i], self._rule_order[j], float(mean_mat[i, j]))

    # ------------------------------------------------------------------
    # Plotting
    # ------------------------------------------------------------------

    def _build_title(self, prefix: str) -> str:
        """Build a plot title from *prefix*, config description and iteration count."""
        desc = self._config.description if self._config else ""
        iters = f"{self._iteration_count} iterations"
        if desc:
            return f"{prefix}\n{desc} \u00b7 {iters}"
        return f"{prefix}\n({iters})"

    def _resolve_save_path(self, base_path: str, default_filename: str) -> str:
        """Derive a full save path, inserting a config‑based sub-directory."""
        if os.path.isdir(base_path) or base_path.endswith(os.sep):
            subdir = self._config.label if self._config else "unknown"
            out = os.path.join(base_path, subdir, default_filename)
        else:
            out = base_path
        os.makedirs(os.path.dirname(os.path.abspath(out)), exist_ok=True)
        return out

    def plot_mean_distance_matrix(
        self,
        ax: Any | None = None,
        folder_save_path: str | None = None,
        *,
        annotate: bool = True,
        show: bool = True,
    ) -> Any:
        """Plot the mean distance matrix as a heatmap.

        Cell values show the percentage of iterations where two rules disagreed.
        When multi‑config (several models / voter counts / candidate counts),
        the title mentions all of them.
        """
        if self._iteration_count == 0:
            raise ValueError("Cannot plot: no steps have been added yet.")

        save_path: str | None = None
        if folder_save_path is not None:
            save_path = self._resolve_save_path(
                folder_save_path,
                f"{self._iteration_count}_mean_distance_matrix.png",
            )

        result = _plot_heatmap(
            self.mean_distance_matrix,
            self._rule_order,
            self._build_title("Mean rule distance matrix"),
            ax,
            annotate=annotate,
            annotation_fmt=".1f",
            colorbar_label="Mean distance (%)",
            show=show,
            save_path=save_path,
        )

        # Auto-save series parquet alongside the plot
        if save_path is not None:
            parquet_path = os.path.join(
                os.path.dirname(save_path),
                f"{self._iteration_count}_series.parquet",
            )
            self.save_to_file(parquet_path)

        return result

    def map_rules_2d(self) -> MdsProjection:
        """Project rules into 2D using Multi-Dimensional Scaling (MDS).

        Uses the mean distance matrix as a precomputed dissimilarity matrix
        so that pairwise distances in the 2D plane approximate the original
        rule-to-rule distances.

        Returns:
            :class:`MdsProjection` with 2D coordinates and normalized stress.

        Raises:
            ValueError: If no steps have been added yet.
        """
        if self._iteration_count == 0:
            raise ValueError("Cannot project: no steps have been added yet.")

        from sklearn.manifold import MDS

        distance_matrix = self.mean_distance_matrix
        mds = MDS(
            n_components=2, metric="precomputed", random_state=42, normalized_stress="auto", n_init=4, init="random"
        )
        coords = mds.fit_transform(distance_matrix)
        return MdsProjection(coords=coords, stress=float(mds.stress_))

    def map_rules_3d(self) -> MdsProjection:
        """Project rules into 3D using Multi-Dimensional Scaling (MDS).

        Uses the mean distance matrix as a precomputed dissimilarity matrix
        so that pairwise distances in the 3D space approximate the original
        rule-to-rule distances.

        Returns:
            :class:`MdsProjection` with 3D coordinates and normalized stress.

        Raises:
            ValueError: If no steps have been added yet.
        """
        if self._iteration_count == 0:
            raise ValueError("Cannot project: no steps have been added yet.")

        from sklearn.manifold import MDS

        distance_matrix = self.mean_distance_matrix
        mds = MDS(
            n_components=3, metric="precomputed", random_state=42, normalized_stress="auto", n_init=4, init="random"
        )
        coords = mds.fit_transform(distance_matrix)
        return MdsProjection(coords=coords, stress=float(mds.stress_))

    def plot_rules_3d(
        self,
        ax: Any | None = None,
        *,
        show: bool = True,
        save_path: str | None = None,
    ) -> Any:
        """Plot rules as labeled points in a 3D MDS projection.

        Distances between points approximate mean pairwise rule distances.
        The normalized MDS stress is shown on the plot.

        Args:
            ax: Optional matplotlib Axes to draw on. A new figure is created
                when *None*.
            show: Whether to call ``plt.show()`` at the end.
            save_path: Optional path (file or directory) to save the plot."""

        import matplotlib.pyplot as plt
        from matplotlib.figure import Figure as MplFigure
        from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

        projection = self.map_rules_3d()
        coords, stress = projection.coords, projection.stress
        labels = self._rule_order

        if ax is None:
            fig = plt.figure(figsize=(8, 6), constrained_layout=True)
            ax = fig.add_subplot(111, projection="3d")
            fig.patch.set_facecolor("white")

        # scatter points
        ax.scatter(
            coords[:, 0],
            coords[:, 1],
            coords[:, 2],
            s=60,
            edgecolors="white",
            linewidths=0.6,
            zorder=3,
        )

        # label each point with its rule short code
        for i, label in enumerate(labels):
            ax.text(
                coords[i, 0],
                coords[i, 1],
                coords[i, 2],
                label,
                fontsize=8,
                fontweight="medium",
                color="#222222",
            )

        title = self._build_title("Rule proximity map (3D)")
        title += f"\nMDS stress: {stress:.4f}"
        ax.set_title(title, fontsize=11, pad=10)
        ax.set_xlabel("MDS 1", fontsize=9, color="#555555")
        ax.set_ylabel("MDS 2", fontsize=9, color="#555555")
        ax.set_zlabel("MDS 3", fontsize=9, color="#555555")
        ax.tick_params(labelsize=8, colors="#888888")
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_color("#CCCCCC")
        ax.spines["bottom"].set_color("#CCCCCC")
        ax.set_aspect("equal")
        ax.grid(True, linestyle="--", linewidth=0.4, alpha=0.5)

        if save_path is not None:
            resolved = self._resolve_save_path(
                save_path,
                f"{self._iteration_count}_rules_3d.png",
            )
            fig = ax.get_figure()
            if isinstance(fig, MplFigure):
                fig.savefig(resolved)
            # Auto-save series parquet alongside the plot
            parquet_path = os.path.join(
                os.path.dirname(resolved),
                f"{self._iteration_count}_series.parquet",
            )
            self.save_to_file(parquet_path)

        if show:
            plt.show()

        return ax

    def plot_rules_2d(
        self,
        ax: Any | None = None,
        *,
        show: bool = True,
        save_path: str | None = None,
    ) -> Any:
        """Plot rules as labeled points in a 2D MDS projection.

        Distances between points approximate mean pairwise rule distances.
        The normalized MDS stress is shown on the plot.

        Args:
            ax: Optional matplotlib Axes to draw on. A new figure is created
                when *None*.
            show: Whether to call ``plt.show()`` at the end.
            save_path: Optional path (file or directory) to save the plot.

        Returns:
            The matplotlib Axes used for plotting."""

        import matplotlib.pyplot as plt
        from matplotlib.figure import Figure as MplFigure

        projection = self.map_rules_2d()
        coords, stress = projection.coords, projection.stress
        labels = self._rule_order

        if ax is None:
            fig, ax = plt.subplots(figsize=(7, 7), constrained_layout=True)
            fig.patch.set_facecolor("white")

        # scatter points
        ax.scatter(
            coords[:, 0],
            coords[:, 1],
            s=60,
            edgecolors="white",
            linewidths=0.6,
            zorder=3,
        )

        # label each point with its rule short code
        for i, label in enumerate(labels):
            ax.annotate(
                label,
                (coords[i, 0], coords[i, 1]),
                textcoords="offset points",
                xytext=(6, 6),
                fontsize=8,
                fontweight="medium",
                color="#222222",
            )

        title = self._build_title("Rule proximity map")
        title += f"\nMDS stress: {stress:.4f}"
        ax.set_title(title, fontsize=11, pad=10)
        ax.set_xlabel("MDS 1", fontsize=9, color="#555555")
        ax.set_ylabel("MDS 2", fontsize=9, color="#555555")
        ax.tick_params(labelsize=8, colors="#888888")
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_color("#CCCCCC")
        ax.spines["bottom"].set_color("#CCCCCC")
        ax.set_aspect("equal")
        ax.grid(True, linestyle="--", linewidth=0.4, alpha=0.5)

        if save_path is not None:
            resolved = self._resolve_save_path(
                save_path,
                f"{self._iteration_count}_rules_2d.png",
            )
            fig = ax.get_figure()
            if isinstance(fig, MplFigure):
                fig.savefig(resolved)
            # Auto-save series parquet alongside the plot
            parquet_path = os.path.join(
                os.path.dirname(resolved),
                f"{self._iteration_count}_series.parquet",
            )
            self.save_to_file(parquet_path)

        if show:
            plt.show()

        return ax

    # ------------------------------------------------------------------
    # Persistence
    # ------------------------------------------------------------------

    def save_to_file(self, file_path: str) -> None:
        """Save the series result to a parquet file.

        Per-step config is stored in columns ``GenModel``, ``NVoters``,
        ``NCandidates`` so that each row is self-describing.  The aggregated
        series config is stored in schema metadata.

        Args:
            file_path: Path to the output parquet file.
        """
        import pyarrow as pa
        import pyarrow.parquet as pq

        rows: list[dict[str, str | int]] = []
        for step in self.steps:
            # Flatten per-step config to single values (or empty string / 0)
            gm = ",".join(sorted(step.config.gen_models)) if step.config.gen_models else ""
            nv = ",".join(str(v) for v in sorted(step.config.n_voters)) if step.config.n_voters else ""
            nc = ",".join(str(c) for c in sorted(step.config.n_candidates)) if step.config.n_candidates else ""
            for rule, winners in step.winners_by_rule.items():
                for winner in winners:
                    rows.append(
                        {
                            "DataSource": step.data_source,
                            "GenModel": gm,
                            "NVoters": nv,
                            "NCandidates": nc,
                            "Rule": rule,
                            "Winner": winner,
                        }
                    )

        df = pd.DataFrame(rows)
        table = pa.Table.from_pandas(df, preserve_index=False)

        # Store aggregated config in schema metadata
        existing_meta = table.schema.metadata or {}
        config_meta = {f"vote_sim:{k}".encode(): v.encode() for k, v in self._config.to_dict().items()}
        table = table.replace_schema_metadata({**existing_meta, **config_meta})

        os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True)
        pq.write_table(table, file_path)

    def load_from_file(self, file_path: str) -> None:
        """Load the series result from a parquet file and rebuild the accumulator.

        Reads per-step config from row columns and aggregated config from
        schema metadata.  Backwards-compatible with files lacking config columns.

        Args:
            file_path: Path to the parquet file containing the series result.
        """
        import pyarrow.parquet as pq

        table = pq.read_table(file_path)

        # --- Aggregated config from metadata ---
        meta = table.schema.metadata or {}
        config_dict = {
            k.decode().removeprefix("vote_sim:"): v.decode()
            for k, v in meta.items()
            if k.decode().startswith("vote_sim:")
        }

        df = table.to_pandas()

        self.steps = []
        self._rule_order = []
        self._rule_index = {}
        self._matrix_sum = np.zeros((0, 0), dtype=np.float64)
        self._iteration_count = 0
        self._config = ResultConfig()
        self._metrics_sum = {}
        self._metrics_sum_sq = {}
        self._metrics_count = {}

        has_config_cols = {"GenModel", "NVoters", "NCandidates"}.issubset(df.columns)

        for data_source, group in df.groupby("DataSource", sort=False):
            step_config = ResultConfig()
            if has_config_cols:
                row0 = group.iloc[0]
                gm = str(row0["GenModel"]) if row0["GenModel"] else ""
                nv_str = str(row0["NVoters"]) if row0["NVoters"] else ""
                nc_str = str(row0["NCandidates"]) if row0["NCandidates"] else ""
                step_config = ResultConfig(
                    gen_models=frozenset(m for m in gm.split(",") if m),
                    n_voters=frozenset(int(v) for v in nv_str.split(",") if v),
                    n_candidates=frozenset(int(c) for c in nc_str.split(",") if c),
                )

            step_result = SimulationStepResult(
                data_source=str(data_source),
                config=step_config,
            )
            for rule, winners in group.groupby("Rule", sort=False)["Winner"]:
                step_result.add_method_result(str(rule), winners.tolist())
            self.add_step(step_result)

        # If schema metadata had config, prefer it (more complete for aggregates)
        if config_dict:
            self._config = ResultConfig.from_dict(config_dict)

    @staticmethod
    def delete_file(file_path: str) -> bool:
        """Delete a saved series result file.

        Returns:
            ``True`` if the file existed and was deleted, ``False`` otherwise.
        """
        try:
            os.remove(file_path)
            return True
        except FileNotFoundError:
            return False

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _accumulate_step(self, step: SimulationStepResult) -> None:
        """Add one step's distance matrix and winner metrics to the running sums."""

        step_rules = step._rule_order
        if not step_rules:
            return

        if not self._rule_order:
            # First step: establish canonical rule order for the whole series
            self._rule_order = list(step_rules)
            self._rule_index = dict(step._rule_index)
            n = len(step_rules)
            self._matrix_sum = np.zeros((n, n), dtype=np.float64)

        # Build permutation mapping step column index → series column index.
        # This handles the (rare) case where a step's rules are in a different order.
        perm = np.array([self._rule_index[r] for r in step_rules], dtype=np.intp)
        self._matrix_sum[np.ix_(perm, perm)] += step._distance_matrix
        self._iteration_count += 1

        # Accumulate winner metrics (only for rules that carry metrics)
        n_fields = len(METRIC_FIELDS)
        for rule_code, wm in step._metrics_by_rule.items():
            arr = metrics_to_array(wm)
            if rule_code not in self._metrics_sum:
                self._metrics_sum[rule_code] = np.zeros(n_fields, dtype=np.float64)
                self._metrics_sum_sq[rule_code] = np.zeros(n_fields, dtype=np.float64)
                self._metrics_count[rule_code] = 0
            self._metrics_sum[rule_code] += arr
            self._metrics_sum_sq[rule_code] += arr * arr
            self._metrics_count[rule_code] += 1

config property writable

Aggregated configuration across all added steps.

mean_distance property

Scalar mean of all off-diagonal cells in the mean distance matrix.

Value in [0, 100].

mean_distance_matrix property

Mean pairwise distance matrix over all accumulated steps.

Returns a float32 array of shape (n_rules, n_rules). Values are in [0, 100]: 0 means every step agreed, 100 means they never did.

mean_distance_matrix_frame property

Mean distance matrix as a labeled DataFrame.

metrics_summary_frame property

Per-rule winner-metric statistics aggregated across all iterations.

Returns a :class:~pandas.DataFrame indexed by rule with two columns per metric field — one for the mean and one for the standard deviation across all accumulated steps:

<field>_mean, <field>_std for each field in :data:~vote_simulation.models.rules.winner_metrics.METRIC_FIELDS.

Rules for which no metrics were recorded (e.g. loaded from a parquet file without metrics) are omitted from the frame.

An empty DataFrame is returned when no metrics have been accumulated.

most_distant_rules property

Pair of rules with the maximum mean distance.

Returns:

Type Description
str

(rule_a, rule_b, distance) or ("", "", 0.0) when fewer

str

than two rules are present.

step_count property

Number of recorded steps (equals the iteration count).

add_rules_to_steps(new_rule_codes)

Apply additional rules to all existing steps and update the series.

Does not re-run existing rules, only computes distances for new rules. Rebuilds the accumulated distance matrix with all rules (old + new).

Parameters:

Name Type Description Default
new_rule_codes list[str]

List of additional rule codes to apply to each step.

required

Raises:

Type Description
ImportError

If vote_simulation.models.rules is not available.

Source code in src/vote_simulation/models/results/series_result.py
def add_rules_to_steps(self, new_rule_codes: list[str]) -> None:
    """Apply additional rules to all existing steps and update the series.

    Does not re-run existing rules, only computes distances for new rules.
    Rebuilds the accumulated distance matrix with all rules (old + new).

    Args:
        new_rule_codes: List of additional rule codes to apply to each step.

    Raises:
        ImportError: If ``vote_simulation.models.rules`` is not available.
    """
    if not new_rule_codes:
        return

    from vote_simulation.models.data_generation.data_instance import DataInstance
    from vote_simulation.models.rules import get_rule_builder

    # Apply new rules to each step
    for step in self.steps:
        if not step.data_source:
            print("Warning: Step without data_source, skipping rule application")
            continue

        try:
            di = DataInstance(step.data_source)
            profile = di.profile

            for code in new_rule_codes:
                normalized = code.strip().upper()
                if normalized in step.winners_by_rule:
                    continue  # Skip if rule already exists

                try:
                    builder = get_rule_builder(normalized)
                    rule = builder(profile, None)
                    winners = rule.cowinners_
                    try:
                        metrics = rule.compute_metrics()
                        step.add_method_result_with_metrics(normalized, winners, metrics)
                    except Exception:
                        step.add_method_result(normalized, winners)
                except Exception as e:
                    print(f"Error applying rule '{normalized}' to step: {e}")
                    step.add_method_result(normalized, [f"ERROR: {e}"])
        except Exception as e:
            print(f"Error loading data source '{step.data_source}': {e}")

    # Rebuild the aggregated distance matrix and metric accumulators
    self._rule_order = []
    self._rule_index = {}
    self._matrix_sum = np.zeros((0, 0), dtype=np.float64)
    self._iteration_count = 0
    self._metrics_sum = {}
    self._metrics_sum_sq = {}
    self._metrics_count = {}

    for step in self.steps:
        self._accumulate_step(step)

    # Update config to include new rules
    if new_rule_codes:
        new_rules = frozenset(c.strip().upper() for c in new_rule_codes)
        self._config = ResultConfig(
            gen_models=self._config.gen_models,
            n_voters=self._config.n_voters,
            n_candidates=self._config.n_candidates,
            rules_codes=self._config.rules_codes | new_rules,
            n_iterations=self._config.n_iterations,
        )

add_step(step_result)

Add one step result to the series and accumulate its distance matrix.

Source code in src/vote_simulation/models/results/series_result.py
def add_step(self, step_result: SimulationStepResult) -> None:
    """Add one step result to the series and accumulate its distance matrix."""

    self.steps.append(step_result)
    self._accumulate_step(step_result)
    if step_result.config:
        self._config = self._config.merge(step_result.config)

delete_file(file_path) staticmethod

Delete a saved series result file.

Returns:

Type Description
bool

True if the file existed and was deleted, False otherwise.

Source code in src/vote_simulation/models/results/series_result.py
@staticmethod
def delete_file(file_path: str) -> bool:
    """Delete a saved series result file.

    Returns:
        ``True`` if the file existed and was deleted, ``False`` otherwise.
    """
    try:
        os.remove(file_path)
        return True
    except FileNotFoundError:
        return False

load_from_file(file_path)

Load the series result from a parquet file and rebuild the accumulator.

Reads per-step config from row columns and aggregated config from schema metadata. Backwards-compatible with files lacking config columns.

Parameters:

Name Type Description Default
file_path str

Path to the parquet file containing the series result.

required
Source code in src/vote_simulation/models/results/series_result.py
def load_from_file(self, file_path: str) -> None:
    """Load the series result from a parquet file and rebuild the accumulator.

    Reads per-step config from row columns and aggregated config from
    schema metadata.  Backwards-compatible with files lacking config columns.

    Args:
        file_path: Path to the parquet file containing the series result.
    """
    import pyarrow.parquet as pq

    table = pq.read_table(file_path)

    # --- Aggregated config from metadata ---
    meta = table.schema.metadata or {}
    config_dict = {
        k.decode().removeprefix("vote_sim:"): v.decode()
        for k, v in meta.items()
        if k.decode().startswith("vote_sim:")
    }

    df = table.to_pandas()

    self.steps = []
    self._rule_order = []
    self._rule_index = {}
    self._matrix_sum = np.zeros((0, 0), dtype=np.float64)
    self._iteration_count = 0
    self._config = ResultConfig()
    self._metrics_sum = {}
    self._metrics_sum_sq = {}
    self._metrics_count = {}

    has_config_cols = {"GenModel", "NVoters", "NCandidates"}.issubset(df.columns)

    for data_source, group in df.groupby("DataSource", sort=False):
        step_config = ResultConfig()
        if has_config_cols:
            row0 = group.iloc[0]
            gm = str(row0["GenModel"]) if row0["GenModel"] else ""
            nv_str = str(row0["NVoters"]) if row0["NVoters"] else ""
            nc_str = str(row0["NCandidates"]) if row0["NCandidates"] else ""
            step_config = ResultConfig(
                gen_models=frozenset(m for m in gm.split(",") if m),
                n_voters=frozenset(int(v) for v in nv_str.split(",") if v),
                n_candidates=frozenset(int(c) for c in nc_str.split(",") if c),
            )

        step_result = SimulationStepResult(
            data_source=str(data_source),
            config=step_config,
        )
        for rule, winners in group.groupby("Rule", sort=False)["Winner"]:
            step_result.add_method_result(str(rule), winners.tolist())
        self.add_step(step_result)

    # If schema metadata had config, prefer it (more complete for aggregates)
    if config_dict:
        self._config = ResultConfig.from_dict(config_dict)

map_rules_2d()

Project rules into 2D using Multi-Dimensional Scaling (MDS).

Uses the mean distance matrix as a precomputed dissimilarity matrix so that pairwise distances in the 2D plane approximate the original rule-to-rule distances.

Returns:

Type Description
MdsProjection

class:MdsProjection with 2D coordinates and normalized stress.

Raises:

Type Description
ValueError

If no steps have been added yet.

Source code in src/vote_simulation/models/results/series_result.py
def map_rules_2d(self) -> MdsProjection:
    """Project rules into 2D using Multi-Dimensional Scaling (MDS).

    Uses the mean distance matrix as a precomputed dissimilarity matrix
    so that pairwise distances in the 2D plane approximate the original
    rule-to-rule distances.

    Returns:
        :class:`MdsProjection` with 2D coordinates and normalized stress.

    Raises:
        ValueError: If no steps have been added yet.
    """
    if self._iteration_count == 0:
        raise ValueError("Cannot project: no steps have been added yet.")

    from sklearn.manifold import MDS

    distance_matrix = self.mean_distance_matrix
    mds = MDS(
        n_components=2, metric="precomputed", random_state=42, normalized_stress="auto", n_init=4, init="random"
    )
    coords = mds.fit_transform(distance_matrix)
    return MdsProjection(coords=coords, stress=float(mds.stress_))

map_rules_3d()

Project rules into 3D using Multi-Dimensional Scaling (MDS).

Uses the mean distance matrix as a precomputed dissimilarity matrix so that pairwise distances in the 3D space approximate the original rule-to-rule distances.

Returns:

Type Description
MdsProjection

class:MdsProjection with 3D coordinates and normalized stress.

Raises:

Type Description
ValueError

If no steps have been added yet.

Source code in src/vote_simulation/models/results/series_result.py
def map_rules_3d(self) -> MdsProjection:
    """Project rules into 3D using Multi-Dimensional Scaling (MDS).

    Uses the mean distance matrix as a precomputed dissimilarity matrix
    so that pairwise distances in the 3D space approximate the original
    rule-to-rule distances.

    Returns:
        :class:`MdsProjection` with 3D coordinates and normalized stress.

    Raises:
        ValueError: If no steps have been added yet.
    """
    if self._iteration_count == 0:
        raise ValueError("Cannot project: no steps have been added yet.")

    from sklearn.manifold import MDS

    distance_matrix = self.mean_distance_matrix
    mds = MDS(
        n_components=3, metric="precomputed", random_state=42, normalized_stress="auto", n_init=4, init="random"
    )
    coords = mds.fit_transform(distance_matrix)
    return MdsProjection(coords=coords, stress=float(mds.stress_))

plot_mean_distance_matrix(ax=None, folder_save_path=None, *, annotate=True, show=True)

Plot the mean distance matrix as a heatmap.

Cell values show the percentage of iterations where two rules disagreed. When multi‑config (several models / voter counts / candidate counts), the title mentions all of them.

Source code in src/vote_simulation/models/results/series_result.py
def plot_mean_distance_matrix(
    self,
    ax: Any | None = None,
    folder_save_path: str | None = None,
    *,
    annotate: bool = True,
    show: bool = True,
) -> Any:
    """Plot the mean distance matrix as a heatmap.

    Cell values show the percentage of iterations where two rules disagreed.
    When multi‑config (several models / voter counts / candidate counts),
    the title mentions all of them.
    """
    if self._iteration_count == 0:
        raise ValueError("Cannot plot: no steps have been added yet.")

    save_path: str | None = None
    if folder_save_path is not None:
        save_path = self._resolve_save_path(
            folder_save_path,
            f"{self._iteration_count}_mean_distance_matrix.png",
        )

    result = _plot_heatmap(
        self.mean_distance_matrix,
        self._rule_order,
        self._build_title("Mean rule distance matrix"),
        ax,
        annotate=annotate,
        annotation_fmt=".1f",
        colorbar_label="Mean distance (%)",
        show=show,
        save_path=save_path,
    )

    # Auto-save series parquet alongside the plot
    if save_path is not None:
        parquet_path = os.path.join(
            os.path.dirname(save_path),
            f"{self._iteration_count}_series.parquet",
        )
        self.save_to_file(parquet_path)

    return result

plot_rules_2d(ax=None, *, show=True, save_path=None)

Plot rules as labeled points in a 2D MDS projection.

Distances between points approximate mean pairwise rule distances. The normalized MDS stress is shown on the plot.

Parameters:

Name Type Description Default
ax Any | None

Optional matplotlib Axes to draw on. A new figure is created when None.

None
show bool

Whether to call plt.show() at the end.

True
save_path str | None

Optional path (file or directory) to save the plot.

None

Returns:

Type Description
Any

The matplotlib Axes used for plotting.

Source code in src/vote_simulation/models/results/series_result.py
def plot_rules_2d(
    self,
    ax: Any | None = None,
    *,
    show: bool = True,
    save_path: str | None = None,
) -> Any:
    """Plot rules as labeled points in a 2D MDS projection.

    Distances between points approximate mean pairwise rule distances.
    The normalized MDS stress is shown on the plot.

    Args:
        ax: Optional matplotlib Axes to draw on. A new figure is created
            when *None*.
        show: Whether to call ``plt.show()`` at the end.
        save_path: Optional path (file or directory) to save the plot.

    Returns:
        The matplotlib Axes used for plotting."""

    import matplotlib.pyplot as plt
    from matplotlib.figure import Figure as MplFigure

    projection = self.map_rules_2d()
    coords, stress = projection.coords, projection.stress
    labels = self._rule_order

    if ax is None:
        fig, ax = plt.subplots(figsize=(7, 7), constrained_layout=True)
        fig.patch.set_facecolor("white")

    # scatter points
    ax.scatter(
        coords[:, 0],
        coords[:, 1],
        s=60,
        edgecolors="white",
        linewidths=0.6,
        zorder=3,
    )

    # label each point with its rule short code
    for i, label in enumerate(labels):
        ax.annotate(
            label,
            (coords[i, 0], coords[i, 1]),
            textcoords="offset points",
            xytext=(6, 6),
            fontsize=8,
            fontweight="medium",
            color="#222222",
        )

    title = self._build_title("Rule proximity map")
    title += f"\nMDS stress: {stress:.4f}"
    ax.set_title(title, fontsize=11, pad=10)
    ax.set_xlabel("MDS 1", fontsize=9, color="#555555")
    ax.set_ylabel("MDS 2", fontsize=9, color="#555555")
    ax.tick_params(labelsize=8, colors="#888888")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_color("#CCCCCC")
    ax.spines["bottom"].set_color("#CCCCCC")
    ax.set_aspect("equal")
    ax.grid(True, linestyle="--", linewidth=0.4, alpha=0.5)

    if save_path is not None:
        resolved = self._resolve_save_path(
            save_path,
            f"{self._iteration_count}_rules_2d.png",
        )
        fig = ax.get_figure()
        if isinstance(fig, MplFigure):
            fig.savefig(resolved)
        # Auto-save series parquet alongside the plot
        parquet_path = os.path.join(
            os.path.dirname(resolved),
            f"{self._iteration_count}_series.parquet",
        )
        self.save_to_file(parquet_path)

    if show:
        plt.show()

    return ax

plot_rules_3d(ax=None, *, show=True, save_path=None)

Plot rules as labeled points in a 3D MDS projection.

Distances between points approximate mean pairwise rule distances. The normalized MDS stress is shown on the plot.

Parameters:

Name Type Description Default
ax Any | None

Optional matplotlib Axes to draw on. A new figure is created when None.

None
show bool

Whether to call plt.show() at the end.

True
save_path str | None

Optional path (file or directory) to save the plot.

None
Source code in src/vote_simulation/models/results/series_result.py
def plot_rules_3d(
    self,
    ax: Any | None = None,
    *,
    show: bool = True,
    save_path: str | None = None,
) -> Any:
    """Plot rules as labeled points in a 3D MDS projection.

    Distances between points approximate mean pairwise rule distances.
    The normalized MDS stress is shown on the plot.

    Args:
        ax: Optional matplotlib Axes to draw on. A new figure is created
            when *None*.
        show: Whether to call ``plt.show()`` at the end.
        save_path: Optional path (file or directory) to save the plot."""

    import matplotlib.pyplot as plt
    from matplotlib.figure import Figure as MplFigure
    from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

    projection = self.map_rules_3d()
    coords, stress = projection.coords, projection.stress
    labels = self._rule_order

    if ax is None:
        fig = plt.figure(figsize=(8, 6), constrained_layout=True)
        ax = fig.add_subplot(111, projection="3d")
        fig.patch.set_facecolor("white")

    # scatter points
    ax.scatter(
        coords[:, 0],
        coords[:, 1],
        coords[:, 2],
        s=60,
        edgecolors="white",
        linewidths=0.6,
        zorder=3,
    )

    # label each point with its rule short code
    for i, label in enumerate(labels):
        ax.text(
            coords[i, 0],
            coords[i, 1],
            coords[i, 2],
            label,
            fontsize=8,
            fontweight="medium",
            color="#222222",
        )

    title = self._build_title("Rule proximity map (3D)")
    title += f"\nMDS stress: {stress:.4f}"
    ax.set_title(title, fontsize=11, pad=10)
    ax.set_xlabel("MDS 1", fontsize=9, color="#555555")
    ax.set_ylabel("MDS 2", fontsize=9, color="#555555")
    ax.set_zlabel("MDS 3", fontsize=9, color="#555555")
    ax.tick_params(labelsize=8, colors="#888888")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_color("#CCCCCC")
    ax.spines["bottom"].set_color("#CCCCCC")
    ax.set_aspect("equal")
    ax.grid(True, linestyle="--", linewidth=0.4, alpha=0.5)

    if save_path is not None:
        resolved = self._resolve_save_path(
            save_path,
            f"{self._iteration_count}_rules_3d.png",
        )
        fig = ax.get_figure()
        if isinstance(fig, MplFigure):
            fig.savefig(resolved)
        # Auto-save series parquet alongside the plot
        parquet_path = os.path.join(
            os.path.dirname(resolved),
            f"{self._iteration_count}_series.parquet",
        )
        self.save_to_file(parquet_path)

    if show:
        plt.show()

    return ax

save_to_file(file_path)

Save the series result to a parquet file.

Per-step config is stored in columns GenModel, NVoters, NCandidates so that each row is self-describing. The aggregated series config is stored in schema metadata.

Parameters:

Name Type Description Default
file_path str

Path to the output parquet file.

required
Source code in src/vote_simulation/models/results/series_result.py
def save_to_file(self, file_path: str) -> None:
    """Save the series result to a parquet file.

    Per-step config is stored in columns ``GenModel``, ``NVoters``,
    ``NCandidates`` so that each row is self-describing.  The aggregated
    series config is stored in schema metadata.

    Args:
        file_path: Path to the output parquet file.
    """
    import pyarrow as pa
    import pyarrow.parquet as pq

    rows: list[dict[str, str | int]] = []
    for step in self.steps:
        # Flatten per-step config to single values (or empty string / 0)
        gm = ",".join(sorted(step.config.gen_models)) if step.config.gen_models else ""
        nv = ",".join(str(v) for v in sorted(step.config.n_voters)) if step.config.n_voters else ""
        nc = ",".join(str(c) for c in sorted(step.config.n_candidates)) if step.config.n_candidates else ""
        for rule, winners in step.winners_by_rule.items():
            for winner in winners:
                rows.append(
                    {
                        "DataSource": step.data_source,
                        "GenModel": gm,
                        "NVoters": nv,
                        "NCandidates": nc,
                        "Rule": rule,
                        "Winner": winner,
                    }
                )

    df = pd.DataFrame(rows)
    table = pa.Table.from_pandas(df, preserve_index=False)

    # Store aggregated config in schema metadata
    existing_meta = table.schema.metadata or {}
    config_meta = {f"vote_sim:{k}".encode(): v.encode() for k, v in self._config.to_dict().items()}
    table = table.replace_schema_metadata({**existing_meta, **config_meta})

    os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True)
    pq.write_table(table, file_path)