Skip to content

API Reference

Simulation package

Simulation package public API.

SimulationConfig dataclass

Validated simulation configuration.

Source code in src/vote_simulation/simulation/configuration.py
@dataclass(slots=True)
class SimulationConfig:
    """Validated simulation configuration."""

    rule_codes: list[str]
    candidates: list[int] | None = None
    voters: list[int] | None = None
    iterations: int = 1
    seed: int = 0
    generative_models: list[str] = field(default_factory=list)  # e.g. ["UNI", "IC"]
    output_base_path: str = "data"  # root folder for gen/ and sim_result/
    input_folder_path: str | None = None  # folder with pre-existing vote files for batch mode
    generator_params: dict[str, dict[str, object]] = field(default_factory=dict)  # per-model extra params

generate_data(config_path, show_progress=True)

Generate (or retrieve cached) profiles for every combination defined in the config.

Returns:

Type Description
list[str]

List of file paths of generated/cached parquet files.

Source code in src/vote_simulation/simulation/simulation.py
def generate_data(config_path: str, show_progress: bool = True) -> list[str]:
    """Generate (or retrieve cached) profiles for every combination defined in the config.

    Returns:
        List of file paths of generated/cached parquet files.
    """
    config = load_simulation_config(config_path)
    _validate_generation_config(config)

    paths: list[str] = []
    total = len(config.generative_models) * len(config.voters or []) * len(config.candidates or []) * config.iterations
    with tqdm(total=total, desc="Generating profiles", disable=not show_progress) as pbar:
        for model in config.generative_models:
            extra = config.generator_params.get(model, {})
            for n_v in config.voters or []:
                for n_c in config.candidates or []:
                    for it in range(config.iterations):
                        di = obtain_data_instance(
                            model=model,
                            n_v=n_v,
                            n_c=n_c,
                            iteration=it,
                            seed=config.seed,
                            base_path=config.output_base_path,
                            extra_params=extra,
                        )
                        paths.append(di.file_path)
                        pbar.update(1)
    print(f"Generated / loaded {len(paths)} profiles.")
    return paths

load_simulation_config(config_path=DEFAULT_CONFIG_PATH)

Load and validate a simulation config file.

Source code in src/vote_simulation/simulation/configuration.py
def load_simulation_config(config_path: str | Path = DEFAULT_CONFIG_PATH) -> SimulationConfig:
    """Load and validate a simulation config file."""
    path = Path(config_path)

    # Check path
    if not path.is_file():
        raise ValueError(f"Configuration file not found: {path}")

    with path.open("rb") as handle:
        payload = tomllib.load(handle)

    simulation = payload.get("simulation")
    # simulation section must exist and be a dict
    if not isinstance(simulation, dict):
        raise ValueError("Invalid configuration: missing [simulation] section")

    # Check validity of rules codes
    rule_codes = simulation.get("rule_codes")
    if not isinstance(rule_codes, list) or not rule_codes:
        raise ValueError("Invalid configuration: simulation.rule_codes must be a non-empty list")

    normalized_rule_codes = [str(code).strip().upper() for code in rule_codes if str(code).strip()]
    if not normalized_rule_codes:
        raise ValueError("Invalid configuration: simulation.rule_codes cannot be empty")

    # Check validity of candidates, voters, iterations
    candidates = simulation.get("candidates")
    if candidates is not None:
        if not isinstance(candidates, list) or not candidates:
            raise ValueError("Invalid configuration: simulation.candidates must be a non-empty list")
        if not all(isinstance(c, int) and c > 0 for c in candidates):
            raise ValueError("Invalid configuration: all simulation.candidates must be positive integers")

    voters = simulation.get("voters")
    if voters is not None:
        if not isinstance(voters, list) or not voters:
            raise ValueError("Invalid configuration: simulation.voters must be a non-empty list")
        if not all(isinstance(v, int) and v > 0 for v in voters):
            raise ValueError("Invalid configuration: all simulation.voters must be positive integers")

    iterations = simulation.get("iterations", 1)
    if not isinstance(iterations, int) or iterations <= 0:
        raise ValueError("Invalid configuration: simulation.iterations must be a positive integer")

    # Check validity of seed
    seed = simulation.get("seed", 0)
    if not isinstance(seed, int) or seed < 0:
        raise ValueError("Invalid configuration: simulation.seed must be a non-negative integer")

    # --- Generative models ---
    raw_gen_models = simulation.get("generative_models")
    generative_models: list[str] = []
    if raw_gen_models is not None:
        if not isinstance(raw_gen_models, list):
            raise ValueError("Invalid configuration: simulation.generative_models must be a list")
        generative_models = [str(m).strip().upper() for m in raw_gen_models if str(m).strip()]

    # --- Output base path ---
    output_base_path = simulation.get("output_base_path", "data")
    if not isinstance(output_base_path, str) or not output_base_path.strip():
        output_base_path = "data"
    if not Path(output_base_path).is_absolute():
        output_base_path = str((path.parent / output_base_path).resolve())

    # --- Per-model generator params (optional TOML sub-tables) ---
    generator_params: dict[str, dict[str, object]] = {}
    gen_params_section = payload.get("generator_params")
    if isinstance(gen_params_section, dict):
        for model_key, params in gen_params_section.items():
            if isinstance(params, dict):
                generator_params[model_key.strip().upper()] = dict(params)

    # --- Input folder path (optional, for batch mode) ---
    raw_input_folder = simulation.get("input_folder_path")
    input_folder_path: str | None = None
    if raw_input_folder is not None:
        input_folder_path = str(raw_input_folder).strip() or None
        if input_folder_path and not Path(input_folder_path).is_absolute():
            input_folder_path = str((path.parent / input_folder_path).resolve())

    return SimulationConfig(
        rule_codes=normalized_rule_codes,
        candidates=candidates,
        voters=voters,
        iterations=iterations,
        seed=seed,
        generative_models=generative_models,
        output_base_path=output_base_path,
        input_folder_path=input_folder_path,
        generator_params=generator_params,
    )

obtain_data_instance(model, n_v, n_c, *, iteration=0, seed=161, base_path='data', extra_params=None)

Load a cached profile or generate + persist it.

If the parquet file already exists the profile is loaded from disk; otherwise it is generated and saved for future reuse.

Parameters:

Name Type Description Default
model str

Generative model code (e.g. "UNI", "IC").

required
n_v int

Number of voters.

required
n_c int

Number of candidates.

required
iteration int

Iteration index.

0
seed int

Random seed for generation (will be combined with iteration index for variability).

161
base_path str

Root folder for generated data (see config.output_base_path).

'data'
extra_params dict[str, object] | None

Optional dict of extra parameters to pass to the generator (per-model).

None
Source code in src/vote_simulation/simulation/simulation.py
def obtain_data_instance(
    model: str,
    n_v: int,
    n_c: int,
    *,
    iteration: int = 0,
    seed: int = 161,
    base_path: str = "data",
    extra_params: dict[str, object] | None = None,
) -> DataInstance:
    """Load a cached profile or generate + persist it.

    If the parquet file already exists the profile is loaded from disk;
    otherwise it is generated and saved for future reuse.

    Args:
        model: Generative model code (e.g. "UNI", "IC").
        n_v: Number of voters.
        n_c: Number of candidates.
        iteration: Iteration index.
        seed: Random seed for generation (will be combined with iteration index for variability).
        base_path: Root folder for generated data (see config.output_base_path).
        extra_params: Optional dict of extra parameters to pass to the generator (per-model).
    """
    gen_path = _gen_dir(base_path, model, n_v, n_c) / _iter_filename(iteration)

    if gen_path.is_file():
        return DataInstance(str(gen_path))

    # Generate
    di = DataInstance.from_generator(
        model_code=model,
        n_v=n_v,
        n_c=n_c,
        seed=seed,
        iteration=iteration,
        **(extra_params or {}),
    )
    di.save_parquet(str(gen_path))
    di.file_path = str(gen_path)
    return di

sim(file_path, rule_code)

Execute a single rule on a single file

²

Source code in src/vote_simulation/simulation/simulation.py
def sim(file_path: str, rule_code: str) -> None:
    """Execute a single rule on a single file

    ²"""
    data_instance = DataInstance(file_path)
    profile = data_instance.profile
    rule_code = rule_code.strip().upper()

    try:
        rule_builder = get_rule_builder(rule_code)
        rule: RuleResult = rule_builder(profile, None)
        if not hasattr(rule, "w_") and not hasattr(rule, "winner_indices_") and not hasattr(rule, "winner_"):
            raise TypeError(f"Unexpected rule type for '{rule_code}': {type(rule)!r}")
        print(f"{rule_code.upper()} winner: {rule.cowinners_}")
    except Exception as e:
        print(f"Error building rule '{rule_code}': {e}")

simulation_from_config(config_path, show_progress=True, *, compute_metrics=True)

Full pipeline: generate profiles, apply rules, save results.

For every (model, n_voters, n_candidates, iteration) combination: 1. Obtain (generate or load) the profile. 2. Run all requested rules. 3. Save the result in sim_result/<MODEL>_v<NV>_c<NC>/iter_XXXX.parquet.

Parameters:

Name Type Description Default
config_path str

Path to the TOML configuration file (see docs for the template).

required
show_progress bool

Whether to display a progress bar.

True
compute_metrics bool

Whether to compute :class:~vote_simulation.models.rules.WinnerMetrics for each rule. Defaults to True.

True
Source code in src/vote_simulation/simulation/simulation.py
def simulation_from_config(config_path: str, show_progress: bool = True, *, compute_metrics: bool = True) -> None:
    """Full pipeline: generate profiles, apply rules, save results.

    For every ``(model, n_voters, n_candidates, iteration)`` combination:
    1. Obtain (generate or load) the profile.
    2. Run all requested rules.
    3. Save the result in ``sim_result/<MODEL>_v<NV>_c<NC>/iter_XXXX.parquet``.

    Args:
        config_path: Path to the TOML configuration file (see docs for the template).
        show_progress: Whether to display a progress bar.
        compute_metrics: Whether to compute :class:`~vote_simulation.models.rules.WinnerMetrics`
            for each rule.  Defaults to ``True``.
    """
    config = load_simulation_config(config_path)
    _validate_generation_config(config)

    total = len(config.generative_models) * len(config.voters or []) * len(config.candidates or []) * config.iterations
    print(f"Running full simulation: {total} profile(s) × {len(config.rule_codes)} rule(s)")

    with tqdm(total=total, desc="Simulating", disable=not show_progress) as pbar:
        for model in config.generative_models:
            extra = config.generator_params.get(model, {})
            for n_v in config.voters or []:
                for n_c in config.candidates or []:
                    step_cfg = ResultConfig.single(
                        gen_model=model,
                        n_voters=n_v,
                        n_candidates=n_c,
                        rules_codes=config.rule_codes,
                    )
                    for it in range(config.iterations):
                        # 1) Obtain data
                        di = obtain_data_instance(
                            model=model,
                            n_v=n_v,
                            n_c=n_c,
                            iteration=it,
                            seed=config.seed,
                            base_path=config.output_base_path,
                            extra_params=extra,
                        )

                        # 2) Apply rules
                        step = run_rules_on_instance(di, config.rule_codes, config=step_cfg, compute_metrics=compute_metrics)

                        # 3) Save result
                        result_path = _sim_dir(config.output_base_path, model, n_v, n_c) / _iter_filename(it)
                        result_path.parent.mkdir(parents=True, exist_ok=True)
                        step.save_to_file(str(result_path))

                        pbar.update(1)

    print("Full simulation completed.")

Simulation result models

Rule registry

Rule index for mapping short codes to svvamp rule factories.

RuleResult

Bases: Protocol

Protocol for rule results that have been post-processed to include co-winners.

Source code in src/vote_simulation/models/rules/registry.py
class RuleResult(Protocol):
    """Protocol for rule results that have been post-processed to include co-winners."""

    cowinners_: list[str]

    def compute_metrics(self) -> Any: ...

get_all_rules_codes()

Return a list of all registered rule codes.

Source code in src/vote_simulation/models/rules/registry.py
def get_all_rules_codes() -> list[str]:
    """Return a list of all registered rule codes."""
    return sorted(_RULE_BUILDERS.keys())

get_rule_builder(code)

Return rule builder from code

Parameters:

Name Type Description Default
code str

rule encoding (detailed index in documentation)

required

Raises:

Type Description
ValueError

if wrong code

Returns:

Name Type Description
RuleBuilder RuleBuilder

rule applied

Source code in src/vote_simulation/models/rules/registry.py
def get_rule_builder(code: str) -> RuleBuilder:
    """Return rule builder from code

    Args:
        code (str): rule encoding (detailed index in documentation)

    Raises:
        ValueError: if wrong code

    Returns:
        RuleBuilder: rule applied
    """
    normalized_code = code.strip().upper()
    try:
        return _RULE_BUILDERS[normalized_code]
    except KeyError as error:
        available = ", ".join(sorted(_RULE_BUILDERS))
        raise ValueError(f"Unknown rule code: '{code}'. Available codes: {available}") from error

make_rule_builder(rule_factory)

Create a public RuleBuilder from a Profile -> rule result factory.

This helper is intended for external users who want to register custom rules while reusing the registry's profile conversion and co-winner post-processing.

Parameters:

Name Type Description Default
rule_factory Callable[[Profile], Any]

Callable that takes a svvamp.Profile and returns a rule result.

required

Returns:

Type Description
RuleBuilder

A RuleBuilder that can be registered in the registry.

Source code in src/vote_simulation/models/rules/registry.py
def make_rule_builder(rule_factory: Callable[[Profile], Any]) -> RuleBuilder:
    """Create a public `RuleBuilder` from a `Profile -> rule result` factory.

    This helper is intended for external users who want to register custom rules
    while reusing the registry's profile conversion and co-winner post-processing.

    Args:
        rule_factory: Callable that takes a `svvamp.Profile` and returns a rule result.

    Returns:
        A `RuleBuilder` that can be registered in the registry.
    """
    return _build_with_rule(rule_factory)

register_rule(code, builder)

Register a rule builder under a short code.

Source code in src/vote_simulation/models/rules/registry.py
def register_rule(code: str, builder: RuleBuilder) -> None:
    """Register a rule builder under a short code."""
    normalized_code = code.strip().upper()
    _RULE_BUILDERS[normalized_code] = builder

Generator registry

Generator registry mapping short codes to svvamp GeneratorProfile factories.

Usage examples:

> from vote_simulation.models.data_generation.generator_registry import get_generator_builder
> builder = get_generator_builder("UNI")
> profile = builder(n_v=100, n_c=5, seed=42)

GeneratorBuilder = Callable[..., Profile] module-attribute

Signature: (n_v, n_c, seed=0, **extra) -> svvamp.Profile

get_generator_builder(code)

Return the generator builder for the given code.

Raises:

Type Description
ValueError

If code is not registered.

Source code in src/vote_simulation/models/data_generation/generator_registry.py
def get_generator_builder(code: str) -> GeneratorBuilder:
    """Return the generator builder for the given code.

    Raises:
        ValueError: If code is not registered.
    """
    normalized = code.strip().upper()
    try:
        return _GENERATOR_BUILDERS[normalized]
    except KeyError as exc:
        available = ", ".join(sorted(_GENERATOR_BUILDERS))
        raise ValueError(f"Unknown generator code: '{code}'. Available: {available}") from exc

list_generator_codes()

Return sorted list of all registered generator codes.

Source code in src/vote_simulation/models/data_generation/generator_registry.py
def list_generator_codes() -> list[str]:
    """Return sorted list of all registered generator codes."""
    return sorted(_GENERATOR_BUILDERS)

make_generator_builder(generator_factory, **default_kwargs)

Create a public GeneratorBuilder from a generator factory.

This helper is intended for external users who want to register custom generators while reusing the registry's seeding and relabeling logic.

Parameters:

Name Type Description Default
generator_factory Callable[..., Any]

Callable (n_v, n_c, **kw) -> svvamp generator that, when called, returns an svvamp generator object.

required
**default_kwargs object

Default keyword arguments forwarded to the factory.

{}

Returns:

Type Description
GeneratorBuilder

A GeneratorBuilder that can be registered in the registry.

Example::

from svvamp import GeneratorProfileEuclideanBox
builder = make_generator_builder(
    GeneratorProfileEuclideanBox,
    box_dimensions=[1.0, 1.0, 1.0],
)
register_generator("MY_EUCLID_3D", builder)
Source code in src/vote_simulation/models/data_generation/generator_registry.py
def make_generator_builder(
    generator_factory: Callable[..., Any],
    **default_kwargs: object,
) -> GeneratorBuilder:
    """Create a public `GeneratorBuilder` from a generator factory.

    This helper is intended for external users who want to register custom
    generators while reusing the registry's seeding and relabeling logic.

    Args:
        generator_factory: Callable ``(n_v, n_c, **kw) -> svvamp generator``
            that, when called, returns an svvamp generator object.
        **default_kwargs: Default keyword arguments forwarded to the factory.

    Returns:
        A `GeneratorBuilder` that can be registered in the registry.

    Example::

        from svvamp import GeneratorProfileEuclideanBox
        builder = make_generator_builder(
            GeneratorProfileEuclideanBox,
            box_dimensions=[1.0, 1.0, 1.0],
        )
        register_generator("MY_EUCLID_3D", builder)
    """

    def _builder(
        n_v: int,
        n_c: int,
        *,
        seed: int = 0,
        iteration: int = 0,
        **kw: object,
    ) -> Profile:
        _seed(seed, iteration)
        merged = {**default_kwargs, **kw}
        gen = generator_factory(n_v=n_v, n_c=n_c, **merged)
        return _relabel(gen(), n_c)

    return _builder

normalize_between_0_and_1(profile)

Return a new Profile with utilities normalized to [0, 1].

Source code in src/vote_simulation/models/data_generation/generator_registry.py
def normalize_between_0_and_1(profile: Profile) -> Profile:
    """Return a new Profile with utilities normalized to [0, 1]."""
    ut = profile.preferences_ut
    min_ut = np.min(ut)
    max_ut = np.max(ut)
    if max_ut > min_ut:
        normalized_ut = (ut - min_ut) / (max_ut - min_ut)
    else:
        normalized_ut = np.zeros_like(ut)
    return Profile(preferences_ut=normalized_ut, labels_candidates=profile.labels_candidates)

register_generator(code, builder)

Register a generator builder under a short code.

Parameters:

Name Type Description Default
code str

Short code - case-insensitive, will be normalized.

required
builder GeneratorBuilder

Callable (n_v, n_c, seed=0, **extra) -> Profile.

required
Source code in src/vote_simulation/models/data_generation/generator_registry.py
def register_generator(code: str, builder: GeneratorBuilder) -> None:
    """Register a generator builder under a short code.

    Args:
        code: Short code - case-insensitive, will be normalized.
        builder: Callable ``(n_v, n_c, seed=0, **extra) -> Profile``.
    """
    _GENERATOR_BUILDERS[code.strip().upper()] = builder

Data instances

Load or generate election profiles and persist them.

DataInstance

Encapsulates an election profile (utility matrix + candidate labels).

`DataInstance`` can be created in three ways:

From an existing file (CSV or Parquet)::

   di = DataInstance("path/to/data.csv")

From a generator (wraps svvamp GeneratorProfile*)::

   di = DataInstance.from_generator(
       model_code="UNI", n_v=101, n_c=5, seed=42, iteration=0
   )

From a raw Profile::

   di = DataInstance.from_profile(profile)
Source code in src/vote_simulation/models/data_generation/data_instance.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
class DataInstance:
    """Encapsulates an election profile (utility matrix + candidate labels).

    `DataInstance`` can be created in three ways:

     **From an existing file** (CSV or Parquet)::

           di = DataInstance("path/to/data.csv")

     **From a generator** (wraps svvamp ``GeneratorProfile*``)::

           di = DataInstance.from_generator(
               model_code="UNI", n_v=101, n_c=5, seed=42, iteration=0
           )

     **From a raw Profile**::

           di = DataInstance.from_profile(profile)
    """

    def __init__(self, file_path: str):
        try:
            self.candidates, raw = self.get_data(file_path)
            self.data, self._orig_min, self._orig_max = self._normalize(raw)
            self.profile = self.build_profile(self.candidates, self.data)
            self.file_path = file_path
            self.model = None
        except Exception as e:
            raise ValueError(f"Error initializing DataInstance: {e}") from e

    # -------------------------------------------------- normalization helpers

    @staticmethod
    def _normalize(data: np.ndarray) -> tuple[np.ndarray, float, float]:
        """Min-max normalize a utility matrix to [0, 1].

        The transformation is a global affine map that preserves every
        relative difference in the original data, making it fully
        reversible via :meth:`denormalize`.

        Args:
            data: 2-D array of shape ``(n_voters, n_candidates)``.

        Returns:
            A tuple ``(normalized, orig_min, orig_max)``.
        """
        dmin: float = float(data.min())
        dmax: float = float(data.max())
        spread = dmax - dmin
        if spread > 0.0:
            normalized = (data - dmin) * (1.0 / spread)  # one division
        else:
            # all utilities identical → perfect indifference
            normalized = np.full_like(data, 0.5)
        return normalized, dmin, dmax

    def denormalize(self) -> np.ndarray:
        """Restore the original (pre-normalization) utility values.

        Returns:
            2-D array with the same shape as ``self.data`` containing
            the utilities on their original scale.
        """
        spread = self._orig_max - self._orig_min
        if spread > 0.0:
            return self.data * spread + self._orig_min
        return np.full_like(self.data, self._orig_min)

    # --------------------------------------------------------- class methods

    @classmethod
    def from_generator(
        cls,
        model_code: str,
        n_v: int,
        n_c: int,
        *,
        seed: int = 0,
        iteration: int = 0,
        **extra_params: object,
    ) -> DataInstance:
        """Generate an election profile using a registered generator.

        Args:
            model_code: Registered generator short code (e.g. ``"UNI"``).
            n_v: Number of voters.
            n_c: Number of candidates.
            seed: Base random seed for reproducibility.
            iteration: Iteration index (added to *seed*).
            **extra_params: Model-specific keyword arguments forwarded to
                the generator builder.

        Returns:
            A new ``DataInstance`` whose profile was generated in-memory.
        """
        from vote_simulation.models.data_generation.generator_registry import (
            get_generator_builder,
        )

        builder = get_generator_builder(model_code)
        profile: Profile = builder(n_v, n_c, seed=seed, iteration=iteration, **extra_params)

        instance = object.__new__(cls)
        instance.candidates = np.asarray(profile.labels_candidates, dtype=str)
        raw = np.asarray(profile.preferences_ut, dtype=np.float64)
        instance.data, instance._orig_min, instance._orig_max = cls._normalize(raw)
        instance.profile = Profile(
            preferences_ut=instance.data,
            labels_candidates=profile.labels_candidates,
        )
        instance.file_path = ""  # not loaded from disk
        return instance

    @classmethod
    def from_profile(cls, profile: Profile, file_path: str = "") -> DataInstance:
        """Wrap an existing ``svvamp.Profile`` into a ``DataInstance``.

        Args:
            profile: An existing ``svvamp.Profile`` object.
            file_path: Optional file path associated with the profile.

        Returns:
            A new ``DataInstance`` wrapping the provided profile.
        """
        instance = object.__new__(cls)
        instance.candidates = np.asarray(profile.labels_candidates, dtype=str)
        raw = np.asarray(profile.preferences_ut, dtype=np.float64)
        instance.data, instance._orig_min, instance._orig_max = cls._normalize(raw)
        instance.profile = Profile(
            preferences_ut=instance.data,
            labels_candidates=profile.labels_candidates,
        )
        instance.file_path = file_path
        return instance

    # loaders

    def get_csv(self, file_path: str) -> tuple[np.ndarray, np.ndarray]:
        """Load candidate labels and utility matrix from a CSV file.

        Args:
            file_path: Path to the CSV file.

        Returns:
            A tuple containing:
                - candidates: 1-D array of candidate names.
                - data: 2-D array of shape (n_voters, n_candidates) with utility values.
        """
        try:
            candidates_list: list[str] = []
            rows: list[list[float]] = []

            with open(file_path, encoding="utf-8", newline="") as fh:
                csv_reader = reader(fh)
                next(csv_reader, None)

                for row in csv_reader:
                    if len(row) < 2:
                        raise ValueError("CSV file must contain at least one data column.")
                    candidates_list.append(row[0].strip('"'))
                    rows.append([float(value) for value in row[1:]])

            if not rows:
                raise ValueError("CSV file must contain at least one row.")

            candidates = np.asarray(candidates_list, dtype=str)
            data = np.asarray(rows, dtype=np.float64).T  # rows = voters, columns = candidates

        except Exception as e:
            raise ValueError(f"Error reading the file : {e}") from e

        return candidates, data

    def get_parquet(self, file_path: str) -> tuple[np.ndarray, np.ndarray]:
        """Load candidate labels and utility matrix from a Parquet file.

        The Parquet file is expected to have one column per candidate
        (column name = candidate label) and one row per voter.

        Args:
            file_path: Path to the Parquet file.

        Returns:
            A tuple containing:
                - candidates: 1-D array of candidate names.
                - data: 2-D array of shape (n_voters, n_candidates) with utility values.
        """
        try:
            df = pd.read_parquet(file_path)
            if df.empty:
                raise ValueError("Parquet file is empty.")
            candidates = np.asarray(df.columns.tolist(), dtype=str)
            data = df.to_numpy(dtype=np.float64)  # (n_voters, n_candidates)
        except Exception as e:
            raise ValueError(f"Error reading parquet file: {e}") from e
        return candidates, data

    def get_data(self, file_path: str) -> tuple[np.ndarray, np.ndarray]:
        """Load data from a CSV or Parquet file.

        Args:
            file_path: Path to the data file.

        Returns:
            candidates: 1-D array of candidate names.
            data: 2-D array of shape ``(n_voters, n_candidates)``.
        """
        if not os.path.isfile(file_path):
            raise ValueError("Invalid file path. Please provide a valid file path.")

        if file_path.endswith(".csv"):
            return self.get_csv(file_path)

        if file_path.endswith(".parquet"):
            return self.get_parquet(file_path)

        raise ValueError("Unable to load data from provided file path.")

    # profile builder

    def build_profile(self, candidates: np.ndarray, data: np.ndarray) -> Profile:
        """Build a ``svvamp.Profile`` from candidate labels and utility matrix."""
        return Profile(preferences_ut=data, labels_candidates=candidates.tolist())

    def save_parquet(self, file_path: str) -> str:
        """Persist the utility matrix to a Parquet file.

        Creates parent directories if needed. The file contains one column
        per candidate and one row per voter.

        Args:
            file_path: Destination path (should end in ``.parquet``).

        Returns:
            The resolved absolute path of the written file.
        """
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame(self.data, columns=self.candidates.tolist())
        df.to_parquet(str(path), index=False)
        return str(path.resolve())

    def save_csv(self, file_path: str) -> str:
        """Persist the utility matrix to a CSV file (same layout as input).

        Args:
            file_path: Destination path (should end in ``.csv``).

        Returns:
            The resolved absolute path of the written file.
        """
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame(self.data, columns=self.candidates.tolist())
        df.to_csv(str(path), index=False)
        return str(path.resolve())

    @property
    def n_voters(self) -> int:
        """Number of voters in this instance."""
        return int(self.data.shape[0])

    @property
    def n_candidates(self) -> int:
        """Number of candidates in this instance."""
        return int(self.data.shape[1])

    @staticmethod
    def _cluster_order(matrix: np.ndarray, axis: int, method: str = "average", metric: str = "euclidean") -> np.ndarray:
        """Return the reordered indices of rows or columns via hierarchical clustering.

        Args:
            matrix: 2-D array ``(n_voters, n_candidates)``.
            axis: 0 = cluster rows (voters), 1 = cluster columns (candidates).
            method: Linkage method passed to ``scipy.cluster.hierarchy.linkage``.
            metric: Distance metric passed to ``scipy.cluster.hierarchy.linkage``.

        Returns:
            1-D array of reordered indices.
        """
        n_items = matrix.shape[axis]
        if n_items <= 1 or not _HAS_SCIPY:
            return np.arange(n_items)

        # For column clustering, transpose so rows = candidates
        data = matrix.T if axis == 1 else matrix
        lnk = linkage(data, method=method, metric=metric)
        return leaves_list(lnk)



    def plot_heatmap(
        self,
        *,
        cluster_columns: bool = False,
        cluster_rows: bool = True,
        method: str = "average",
        metric: str = "euclidean",
        cmap: str = "viridis",
        title: str | None = None,
        save_path: str | None = None,
        show: bool = True,
    ) -> dict:
        """Visualize the utility matrix as a heatmap with optional hierarchical clustering.

        Values are already in [0, 1]. Columns (candidates) are reordered by
        hierarchical clustering by default so that similar preference profiles
        appear next to each other.

        Args:
            cluster_columns: Reorder candidates by hierarchical clustering (default True).
            cluster_rows: Reorder voters by hierarchical clustering (default False).
            method: Linkage method (``"average"``, ``"ward"``, ``"complete"``, …).
            metric: Distance metric (``"euclidean"``, ``"cosine"``, …).
            cmap: Matplotlib colormap name.
            title: Figure title. Defaults to model code if available.
            save_path: If provided, save the figure to this path.
            show: Whether to call ``plt.show()``.

        Returns:
            Dict with keys ``ordered_matrix``, ``row_order``, ``col_order``.

        Raises:
            ImportError: If matplotlib is not installed.
        """
        if not _HAS_MPL:
            raise ImportError("matplotlib is required for plot_heatmap(). Install it with: pip install matplotlib")

        if not _HAS_SCIPY and (cluster_columns or cluster_rows):
            print("[Warning] scipy not found. Clustering disabled. Install with: pip install scipy")

        matrix = self.data  # already in [0, 1]

        row_order: np.ndarray = (
            self._cluster_order(matrix, axis=0, method=method, metric=metric)
            if cluster_rows
            else np.arange(matrix.shape[0])
        )
        col_order: np.ndarray = (
            self._cluster_order(matrix, axis=1, method=method, metric=metric)
            if cluster_columns
            else np.arange(matrix.shape[1])
        )

        ordered = matrix[row_order][:, col_order]
        ordered_candidates = self.candidates[col_order]

        # --- figure sizing
        fig_w = max(8, ordered.shape[1] * 0.5)
        fig_h = max(5, ordered.shape[0] * 0.08)

        fig, ax = plt.subplots(figsize=(fig_w, fig_h))

        im = ax.imshow(
            ordered,
            aspect="auto",
            interpolation="nearest",
            cmap=cmap,
            vmin=0.0,
            vmax=1.0,
        )

        # --- axes labels
        if title is None:
            title = f"Profiles heatmap — {self.model_code}" if self.model_code else "Profiles heatmap"

        xlabel = "Candidates"
        if cluster_columns and _HAS_SCIPY:
            xlabel += " (clustered)"
        ylabel = "Voters"
        if cluster_rows and _HAS_SCIPY:
            ylabel += " (clustered)"

        ax.set_title(title, fontsize=14, fontweight="bold")
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)

        # candidate labels on x-axis (readable even with many candidates)
        ax.set_xticks(np.arange(len(ordered_candidates)))
        ax.set_xticklabels(ordered_candidates, rotation=45, ha="right", fontsize=8)

        # hide individual voter ticks when there are many
        if ordered.shape[0] <= 30:
            ax.set_yticks(np.arange(ordered.shape[0]))
            ax.set_yticklabels(row_order, fontsize=7)
        else:
            ax.set_yticks([])

        plt.colorbar(im, ax=ax, label="Normalized utility [0, 1]")
        plt.tight_layout()

        if save_path:
            fig.savefig(save_path, dpi=200, bbox_inches="tight")
            print(f"Figure saved to: {save_path}")

        if show:
            plt.show()

        return {
            "ordered_matrix": ordered,
            "row_order": row_order,
            "col_order": col_order,
        }

n_candidates property

Number of candidates in this instance.

n_voters property

Number of voters in this instance.

build_profile(candidates, data)

Build a svvamp.Profile from candidate labels and utility matrix.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def build_profile(self, candidates: np.ndarray, data: np.ndarray) -> Profile:
    """Build a ``svvamp.Profile`` from candidate labels and utility matrix."""
    return Profile(preferences_ut=data, labels_candidates=candidates.tolist())

denormalize()

Restore the original (pre-normalization) utility values.

Returns:

Type Description
ndarray

2-D array with the same shape as self.data containing

ndarray

the utilities on their original scale.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def denormalize(self) -> np.ndarray:
    """Restore the original (pre-normalization) utility values.

    Returns:
        2-D array with the same shape as ``self.data`` containing
        the utilities on their original scale.
    """
    spread = self._orig_max - self._orig_min
    if spread > 0.0:
        return self.data * spread + self._orig_min
    return np.full_like(self.data, self._orig_min)

from_generator(model_code, n_v, n_c, *, seed=0, iteration=0, **extra_params) classmethod

Generate an election profile using a registered generator.

Parameters:

Name Type Description Default
model_code str

Registered generator short code (e.g. "UNI").

required
n_v int

Number of voters.

required
n_c int

Number of candidates.

required
seed int

Base random seed for reproducibility.

0
iteration int

Iteration index (added to seed).

0
**extra_params object

Model-specific keyword arguments forwarded to the generator builder.

{}

Returns:

Type Description
DataInstance

A new DataInstance whose profile was generated in-memory.

Source code in src/vote_simulation/models/data_generation/data_instance.py
@classmethod
def from_generator(
    cls,
    model_code: str,
    n_v: int,
    n_c: int,
    *,
    seed: int = 0,
    iteration: int = 0,
    **extra_params: object,
) -> DataInstance:
    """Generate an election profile using a registered generator.

    Args:
        model_code: Registered generator short code (e.g. ``"UNI"``).
        n_v: Number of voters.
        n_c: Number of candidates.
        seed: Base random seed for reproducibility.
        iteration: Iteration index (added to *seed*).
        **extra_params: Model-specific keyword arguments forwarded to
            the generator builder.

    Returns:
        A new ``DataInstance`` whose profile was generated in-memory.
    """
    from vote_simulation.models.data_generation.generator_registry import (
        get_generator_builder,
    )

    builder = get_generator_builder(model_code)
    profile: Profile = builder(n_v, n_c, seed=seed, iteration=iteration, **extra_params)

    instance = object.__new__(cls)
    instance.candidates = np.asarray(profile.labels_candidates, dtype=str)
    raw = np.asarray(profile.preferences_ut, dtype=np.float64)
    instance.data, instance._orig_min, instance._orig_max = cls._normalize(raw)
    instance.profile = Profile(
        preferences_ut=instance.data,
        labels_candidates=profile.labels_candidates,
    )
    instance.file_path = ""  # not loaded from disk
    return instance

from_profile(profile, file_path='') classmethod

Wrap an existing svvamp.Profile into a DataInstance.

Parameters:

Name Type Description Default
profile Profile

An existing svvamp.Profile object.

required
file_path str

Optional file path associated with the profile.

''

Returns:

Type Description
DataInstance

A new DataInstance wrapping the provided profile.

Source code in src/vote_simulation/models/data_generation/data_instance.py
@classmethod
def from_profile(cls, profile: Profile, file_path: str = "") -> DataInstance:
    """Wrap an existing ``svvamp.Profile`` into a ``DataInstance``.

    Args:
        profile: An existing ``svvamp.Profile`` object.
        file_path: Optional file path associated with the profile.

    Returns:
        A new ``DataInstance`` wrapping the provided profile.
    """
    instance = object.__new__(cls)
    instance.candidates = np.asarray(profile.labels_candidates, dtype=str)
    raw = np.asarray(profile.preferences_ut, dtype=np.float64)
    instance.data, instance._orig_min, instance._orig_max = cls._normalize(raw)
    instance.profile = Profile(
        preferences_ut=instance.data,
        labels_candidates=profile.labels_candidates,
    )
    instance.file_path = file_path
    return instance

get_csv(file_path)

Load candidate labels and utility matrix from a CSV file.

Parameters:

Name Type Description Default
file_path str

Path to the CSV file.

required

Returns:

Type Description
tuple[ndarray, ndarray]

A tuple containing: - candidates: 1-D array of candidate names. - data: 2-D array of shape (n_voters, n_candidates) with utility values.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def get_csv(self, file_path: str) -> tuple[np.ndarray, np.ndarray]:
    """Load candidate labels and utility matrix from a CSV file.

    Args:
        file_path: Path to the CSV file.

    Returns:
        A tuple containing:
            - candidates: 1-D array of candidate names.
            - data: 2-D array of shape (n_voters, n_candidates) with utility values.
    """
    try:
        candidates_list: list[str] = []
        rows: list[list[float]] = []

        with open(file_path, encoding="utf-8", newline="") as fh:
            csv_reader = reader(fh)
            next(csv_reader, None)

            for row in csv_reader:
                if len(row) < 2:
                    raise ValueError("CSV file must contain at least one data column.")
                candidates_list.append(row[0].strip('"'))
                rows.append([float(value) for value in row[1:]])

        if not rows:
            raise ValueError("CSV file must contain at least one row.")

        candidates = np.asarray(candidates_list, dtype=str)
        data = np.asarray(rows, dtype=np.float64).T  # rows = voters, columns = candidates

    except Exception as e:
        raise ValueError(f"Error reading the file : {e}") from e

    return candidates, data

get_data(file_path)

Load data from a CSV or Parquet file.

Parameters:

Name Type Description Default
file_path str

Path to the data file.

required

Returns:

Name Type Description
candidates ndarray

1-D array of candidate names.

data ndarray

2-D array of shape (n_voters, n_candidates).

Source code in src/vote_simulation/models/data_generation/data_instance.py
def get_data(self, file_path: str) -> tuple[np.ndarray, np.ndarray]:
    """Load data from a CSV or Parquet file.

    Args:
        file_path: Path to the data file.

    Returns:
        candidates: 1-D array of candidate names.
        data: 2-D array of shape ``(n_voters, n_candidates)``.
    """
    if not os.path.isfile(file_path):
        raise ValueError("Invalid file path. Please provide a valid file path.")

    if file_path.endswith(".csv"):
        return self.get_csv(file_path)

    if file_path.endswith(".parquet"):
        return self.get_parquet(file_path)

    raise ValueError("Unable to load data from provided file path.")

get_parquet(file_path)

Load candidate labels and utility matrix from a Parquet file.

The Parquet file is expected to have one column per candidate (column name = candidate label) and one row per voter.

Parameters:

Name Type Description Default
file_path str

Path to the Parquet file.

required

Returns:

Type Description
tuple[ndarray, ndarray]

A tuple containing: - candidates: 1-D array of candidate names. - data: 2-D array of shape (n_voters, n_candidates) with utility values.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def get_parquet(self, file_path: str) -> tuple[np.ndarray, np.ndarray]:
    """Load candidate labels and utility matrix from a Parquet file.

    The Parquet file is expected to have one column per candidate
    (column name = candidate label) and one row per voter.

    Args:
        file_path: Path to the Parquet file.

    Returns:
        A tuple containing:
            - candidates: 1-D array of candidate names.
            - data: 2-D array of shape (n_voters, n_candidates) with utility values.
    """
    try:
        df = pd.read_parquet(file_path)
        if df.empty:
            raise ValueError("Parquet file is empty.")
        candidates = np.asarray(df.columns.tolist(), dtype=str)
        data = df.to_numpy(dtype=np.float64)  # (n_voters, n_candidates)
    except Exception as e:
        raise ValueError(f"Error reading parquet file: {e}") from e
    return candidates, data

plot_heatmap(*, cluster_columns=False, cluster_rows=True, method='average', metric='euclidean', cmap='viridis', title=None, save_path=None, show=True)

Visualize the utility matrix as a heatmap with optional hierarchical clustering.

Values are already in [0, 1]. Columns (candidates) are reordered by hierarchical clustering by default so that similar preference profiles appear next to each other.

Parameters:

Name Type Description Default
cluster_columns bool

Reorder candidates by hierarchical clustering (default True).

False
cluster_rows bool

Reorder voters by hierarchical clustering (default False).

True
method str

Linkage method ("average", "ward", "complete", …).

'average'
metric str

Distance metric ("euclidean", "cosine", …).

'euclidean'
cmap str

Matplotlib colormap name.

'viridis'
title str | None

Figure title. Defaults to model code if available.

None
save_path str | None

If provided, save the figure to this path.

None
show bool

Whether to call plt.show().

True

Returns:

Type Description
dict

Dict with keys ordered_matrix, row_order, col_order.

Raises:

Type Description
ImportError

If matplotlib is not installed.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def plot_heatmap(
    self,
    *,
    cluster_columns: bool = False,
    cluster_rows: bool = True,
    method: str = "average",
    metric: str = "euclidean",
    cmap: str = "viridis",
    title: str | None = None,
    save_path: str | None = None,
    show: bool = True,
) -> dict:
    """Visualize the utility matrix as a heatmap with optional hierarchical clustering.

    Values are already in [0, 1]. Columns (candidates) are reordered by
    hierarchical clustering by default so that similar preference profiles
    appear next to each other.

    Args:
        cluster_columns: Reorder candidates by hierarchical clustering (default True).
        cluster_rows: Reorder voters by hierarchical clustering (default False).
        method: Linkage method (``"average"``, ``"ward"``, ``"complete"``, …).
        metric: Distance metric (``"euclidean"``, ``"cosine"``, …).
        cmap: Matplotlib colormap name.
        title: Figure title. Defaults to model code if available.
        save_path: If provided, save the figure to this path.
        show: Whether to call ``plt.show()``.

    Returns:
        Dict with keys ``ordered_matrix``, ``row_order``, ``col_order``.

    Raises:
        ImportError: If matplotlib is not installed.
    """
    if not _HAS_MPL:
        raise ImportError("matplotlib is required for plot_heatmap(). Install it with: pip install matplotlib")

    if not _HAS_SCIPY and (cluster_columns or cluster_rows):
        print("[Warning] scipy not found. Clustering disabled. Install with: pip install scipy")

    matrix = self.data  # already in [0, 1]

    row_order: np.ndarray = (
        self._cluster_order(matrix, axis=0, method=method, metric=metric)
        if cluster_rows
        else np.arange(matrix.shape[0])
    )
    col_order: np.ndarray = (
        self._cluster_order(matrix, axis=1, method=method, metric=metric)
        if cluster_columns
        else np.arange(matrix.shape[1])
    )

    ordered = matrix[row_order][:, col_order]
    ordered_candidates = self.candidates[col_order]

    # --- figure sizing
    fig_w = max(8, ordered.shape[1] * 0.5)
    fig_h = max(5, ordered.shape[0] * 0.08)

    fig, ax = plt.subplots(figsize=(fig_w, fig_h))

    im = ax.imshow(
        ordered,
        aspect="auto",
        interpolation="nearest",
        cmap=cmap,
        vmin=0.0,
        vmax=1.0,
    )

    # --- axes labels
    if title is None:
        title = f"Profiles heatmap — {self.model_code}" if self.model_code else "Profiles heatmap"

    xlabel = "Candidates"
    if cluster_columns and _HAS_SCIPY:
        xlabel += " (clustered)"
    ylabel = "Voters"
    if cluster_rows and _HAS_SCIPY:
        ylabel += " (clustered)"

    ax.set_title(title, fontsize=14, fontweight="bold")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    # candidate labels on x-axis (readable even with many candidates)
    ax.set_xticks(np.arange(len(ordered_candidates)))
    ax.set_xticklabels(ordered_candidates, rotation=45, ha="right", fontsize=8)

    # hide individual voter ticks when there are many
    if ordered.shape[0] <= 30:
        ax.set_yticks(np.arange(ordered.shape[0]))
        ax.set_yticklabels(row_order, fontsize=7)
    else:
        ax.set_yticks([])

    plt.colorbar(im, ax=ax, label="Normalized utility [0, 1]")
    plt.tight_layout()

    if save_path:
        fig.savefig(save_path, dpi=200, bbox_inches="tight")
        print(f"Figure saved to: {save_path}")

    if show:
        plt.show()

    return {
        "ordered_matrix": ordered,
        "row_order": row_order,
        "col_order": col_order,
    }

save_csv(file_path)

Persist the utility matrix to a CSV file (same layout as input).

Parameters:

Name Type Description Default
file_path str

Destination path (should end in .csv).

required

Returns:

Type Description
str

The resolved absolute path of the written file.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def save_csv(self, file_path: str) -> str:
    """Persist the utility matrix to a CSV file (same layout as input).

    Args:
        file_path: Destination path (should end in ``.csv``).

    Returns:
        The resolved absolute path of the written file.
    """
    path = Path(file_path)
    path.parent.mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame(self.data, columns=self.candidates.tolist())
    df.to_csv(str(path), index=False)
    return str(path.resolve())

save_parquet(file_path)

Persist the utility matrix to a Parquet file.

Creates parent directories if needed. The file contains one column per candidate and one row per voter.

Parameters:

Name Type Description Default
file_path str

Destination path (should end in .parquet).

required

Returns:

Type Description
str

The resolved absolute path of the written file.

Source code in src/vote_simulation/models/data_generation/data_instance.py
def save_parquet(self, file_path: str) -> str:
    """Persist the utility matrix to a Parquet file.

    Creates parent directories if needed. The file contains one column
    per candidate and one row per voter.

    Args:
        file_path: Destination path (should end in ``.parquet``).

    Returns:
        The resolved absolute path of the written file.
    """
    path = Path(file_path)
    path.parent.mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame(self.data, columns=self.candidates.tolist())
    df.to_parquet(str(path), index=False)
    return str(path.resolve())