Source code for pmecg.utils.data

from __future__ import annotations

import warnings
from collections import Counter
from collections.abc import Sequence
from typing import NamedTuple

import numpy as np
import pandas as pd

from pmecg.types import ConfigurationDataType, ECGDataType, LeadSegment

SUPPORTED_LEADS = ("I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6")
SUPPORTED_TEMPLATES = (
    "1x1",
    "1x2",
    "1x3",
    "1x4",
    "1x6",
    "1x8",
    "1x12",
    "2x4",
    "2x6",
    "4x3",
    "2x4+1",
    "2x6+1",
    "4x3+1",
    "2x4+3",
    "2x6+3",
    "4x3+3",
)

# Maps each standard limb lead name (as it appears in a built-in template)
# to the Cabrera-format lead name that should replace it.
_CABRERA_SUBSTITUTION: dict[str, str] = {
    "I": "aVL",
    "II": "I",
    "III": "-aVR",
    "aVR": "II",
    "aVL": "aVF",
    "aVF": "III",
}

_CABRERA_LIMB_LEADS = frozenset(("I", "II", "III", "aVR", "aVL", "aVF"))
_CONFIGURATION_ENTRY_ERROR = (
    "configuration list must contain either strings (lead names), sub-lists of strings (lead names for each row), "
    "LeadSegment objects, or sub-lists of LeadSegment objects; "
    "all rows must use the same type (all string-based or all LeadSegment-based)"
)


class _LeadsMapBase(NamedTuple):
    """Internal NamedTuple backing :class:`LeadsMap`. Use :class:`LeadsMap` directly."""

    I: str | None = None  # noqa: E741
    II: str | None = None
    III: str | None = None
    aVR: str | None = None
    aVL: str | None = None
    aVF: str | None = None
    V1: str | None = None
    V2: str | None = None
    V3: str | None = None
    V4: str | None = None
    V5: str | None = None
    V6: str | None = None



[docs]
class LeadsMap(_LeadsMapBase):
    """Optional mapping from canonical leads to input lead names.

    All 12 fields default to ``None``; only the leads that differ from their
    canonical names need to be specified.  For example, if the input ECG data
    uses ``"LI"`` for lead I and ``"-aVR"`` for lead aVR::

        LeadsMap(I="LI", aVR="-aVR")

    This allows the built-in templates to be resolved correctly even when the
    input data uses non-canonical lead names.

    .. deprecated::
        Keyword arguments ``AVR``, ``AVL``, ``AVF`` (uppercase) are accepted
        for backward compatibility but will be removed in a future release.
        Use ``aVR``, ``aVL``, ``aVF`` instead.
    """

    def __new__(
        cls,
        I: str | None = None,  # noqa: E741
        II: str | None = None,
        III: str | None = None,
        aVR: str | None = None,
        aVL: str | None = None,
        aVF: str | None = None,
        V1: str | None = None,
        V2: str | None = None,
        V3: str | None = None,
        V4: str | None = None,
        V5: str | None = None,
        V6: str | None = None,
        *,
        AVR: str | None = None,
        AVL: str | None = None,
        AVF: str | None = None,
    ) -> LeadsMap:
        if AVR is not None:
            warnings.warn(
                "LeadsMap(AVR=...) is deprecated; use LeadsMap(aVR=...) instead",
                DeprecationWarning,
                stacklevel=2,
            )
            if aVR is None:
                aVR = AVR
        if AVL is not None:
            warnings.warn(
                "LeadsMap(AVL=...) is deprecated; use LeadsMap(aVL=...) instead",
                DeprecationWarning,
                stacklevel=2,
            )
            if aVL is None:
                aVL = AVL
        if AVF is not None:
            warnings.warn(
                "LeadsMap(AVF=...) is deprecated; use LeadsMap(aVF=...) instead",
                DeprecationWarning,
                stacklevel=2,
            )
            if aVF is None:
                aVF = AVF
        return _LeadsMapBase.__new__(cls, I, II, III, aVR, aVL, aVF, V1, V2, V3, V4, V5, V6)



_TEMPLATE_CONFIGURATIONS: dict[str, ConfigurationDataType] = {
    "1x1": ["I"],
    "1x2": ["I", "II"],
    "1x3": ["I", "II", "V2"],
    "1x4": ["I", "II", "III", "V2"],
    "1x6": ["I", "II", "III", "aVR", "aVL", "aVF"],
    "1x8": ["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
    "1x12": ["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"],
    "2x4": [["I", "V3"], ["II", "V4"], ["III", "V5"], ["aVR", "V6"]],
    "2x4+1": [["I", "V3"], ["II", "V4"], ["III", "V5"], ["aVR", "V6"], "II"],
    "2x6": [["I", "V1"], ["II", "V2"], ["III", "V3"], ["aVR", "V4"], ["aVL", "V5"], ["aVF", "V6"]],
    "2x6+1": [["I", "V1"], ["II", "V2"], ["III", "V3"], ["aVR", "V4"], ["aVL", "V5"], ["aVF", "V6"], "II"],
    "4x3": [["I", "aVR", "V1", "V4"], ["II", "aVL", "V2", "V5"], ["III", "aVF", "V3", "V6"]],
    "4x3+1": [["I", "aVR", "V1", "V4"], ["II", "aVL", "V2", "V5"], ["III", "aVF", "V3", "V6"], "II"],
    "2x4+3": [["I", "V3"], ["II", "V4"], ["III", "V5"], ["aVR", "V6"], "II", "V1", "V5"],
    "2x6+3": [["I", "V1"], ["II", "V2"], ["III", "V3"], ["aVR", "V4"], ["aVL", "V5"], ["aVF", "V6"], "II", "V1", "V5"],
    "4x3+3": [["I", "aVR", "V1", "V4"], ["II", "aVL", "V2", "V5"], ["III", "aVF", "V3", "V6"], "II", "V1", "V5"],
}


def _normalize_canonical_lead_name(lead_name: str) -> str:
    """Validate that a supported standard lead name matches exactly."""
    if lead_name not in SUPPORTED_LEADS:
        raise ValueError(f"Lead name '{lead_name}' is not supported. Supported leads are: {SUPPORTED_LEADS}")
    return lead_name


def _numpy_to_dataframe(ecg_data: np.ndarray | list[np.ndarray], lead_names: list[str] | None = None) -> pd.DataFrame:
    """Convert ECG data in numpy array format to a pandas DataFrame.

    Parameters
    ----------
    ecg_data : numpy.ndarray | list[numpy.ndarray]
        The ECG data to be converted. It should either be a numpy array with shape
        (n_samples, n_leads) or a list of numpy arrays, each with shape (n_samples,).
    lead_names : list[str] | None, defaults to None
        The names of the leads corresponding to the number of leads. If None, the
        function will use the standard 12 leads as default.

    Returns
    -------
    pandas.DataFrame
        A pandas DataFrame containing the ECG data, where each column corresponds to
        a lead and the column names are the names of the leads.
    """
    if isinstance(ecg_data, np.ndarray):
        assert ecg_data.ndim == 2, "ecg_data must be a 2D numpy array with shape (n_samples, n_leads)"
        assert ecg_data.shape[1] <= len(SUPPORTED_LEADS), (
            f"lead_names should be specified when the number of leads is not {len(SUPPORTED_LEADS)}"
        )
        if lead_names is None:
            assert ecg_data.shape[1] == len(SUPPORTED_LEADS), (
                "If lead_names is not provided, ecg_data must have the same number of leads as the standard 12 leads"
            )
            # Deep copy of leads names
            lead_names = [str(lead) for lead in SUPPORTED_LEADS]
        else:
            assert ecg_data.shape[1] == len(lead_names), (
                "The number of leads in ecg_data must match the number of lead names provided in lead_names"
            )

    elif isinstance(ecg_data, list) and len(ecg_data) > 0 and all(isinstance(row, np.ndarray) for row in ecg_data):
        if lead_names is None:
            assert len(ecg_data) == len(SUPPORTED_LEADS), (
                "If lead_names is not provided, ecg_data must have the same number of leads as the standard 12 leads"
            )
            # Deep copy of leads names
            lead_names = [str(lead) for lead in SUPPORTED_LEADS]
        else:
            assert len(lead_names) == len(ecg_data), (
                "The number of leads in ecg_data must match the number of lead names provided in lead_names"
            )

        assert all(arr.shape[0] == ecg_data[0].shape[0] for arr in ecg_data), "All arrays in ecg_data must have the same length"
        ecg_data = np.stack([ecg.flatten() for ecg in ecg_data], axis=1)

    else:
        raise ValueError("ecg_data must be a numpy array or list of numpy arrays")

    # At this stage, ecg_data is a 2D numpy array with shape (n_samples, n_leads)
    # and lead_names is a list of strings with length equal to the number of leads.
    return pd.DataFrame(ecg_data, columns=[str(name) for name in lead_names])


def _validate_input_lead_names(lead_names: Sequence[str]) -> None:
    """Validate that user-provided input lead names are unique and non-empty."""
    normalized_names = [str(name) for name in lead_names]
    if any(len(name) == 0 for name in normalized_names):
        raise ValueError("Lead names must be non-empty strings")

    duplicates = sorted(name for name, count in Counter(normalized_names).items() if count > 1)
    if duplicates:
        duplicate_names = ", ".join(repr(name) for name in duplicates)
        raise ValueError(f"Duplicate lead names are not allowed: {duplicate_names}")


def _validate_and_resolve_leads_map(
    leads_map: LeadsMap | None,
    input_leads: Sequence[str],
) -> dict[str, str]:
    """Validate ``leads_map`` and convert it into the internal canonical-to-input lookup.

    Parameters
    ----------
    leads_map : LeadsMap | None
        Optional mapping whose keys are conventional ECG lead names (``I``, ``II``,
        ``aVR``, ``V1``, ...) and whose values are the corresponding column names
        present in the caller's input data.
    input_leads : Sequence[str]
        Lead names available in the provided ECG input.

    Returns
    -------
    dict[str, str]
        Mapping from canonical lead names to the matching input lead names.
        For example, ``{"I": "LI", "V1": "Chest-1"}``.

    Raises
    ------
    ValueError
        If input lead names are invalid, if a mapped input lead is missing from the
        ECG data, or if the mapping contains duplicates.
    """
    _validate_input_lead_names(input_leads)

    if leads_map is None:
        return {}

    canonical_to_custom: dict[str, str] = {}
    available_input_leads = set(input_leads)

    for canonical_name, custom_name in leads_map._asdict().items():
        if custom_name is None:
            continue
        canonical_key = _normalize_canonical_lead_name(str(canonical_name))
        custom_value = str(custom_name)
        if custom_value not in available_input_leads:
            raise ValueError(
                f"Leads map value '{custom_value}' for conventional lead '{canonical_key}' is not present in the input data"
            )
        if canonical_key in canonical_to_custom:
            raise ValueError(f"Duplicate conventional lead mapping for '{canonical_key}' is not allowed")
        if custom_value in canonical_to_custom.values():
            raise ValueError(f"Duplicate custom lead name '{custom_value}' in leads_map is not allowed")

        canonical_to_custom[canonical_key] = custom_value

    return canonical_to_custom


def _resolve_template_lead(
    template: str,
    canonical_name: str,
    canonical_to_custom: dict[str, str],
    available_input_leads: set[str],
) -> str:
    """Resolve one canonical lead from a built-in template into an actual input lead name.

    Resolution order: (1) explicit entry in ``canonical_to_custom``,
    (2) canonical name present as-is in the input, (3) raise ``ValueError``.

    Returns
    -------
    str
        The resolved input column name for this lead.
    """
    canonical_key = _normalize_canonical_lead_name(canonical_name)
    if canonical_key in canonical_to_custom:
        return canonical_to_custom[canonical_key]
    if canonical_key in available_input_leads:
        return canonical_key
    raise ValueError(
        f"Template '{template}' requires conventional lead '{canonical_key}', "
        "but it is missing from leads_map and the input data"
    )


def _validate_configuration_row_definition(
    entry: list[str] | str | list[LeadSegment] | LeadSegment,
    available_input_leads: Sequence[str] | None = None,
) -> list[str] | str | list[LeadSegment] | LeadSegment:
    """Validate one row definition from a user configuration.

    A valid row definition is either:
    - a single lead name as ``str`` for a full-width row
    - a ``list[str]`` of lead names to be concatenated within the same row
    - a single :class:`~pmecg.types.LeadSegment` for a full-width row with explicit range
    - a ``list[LeadSegment]`` for leads with explicit ranges in one row

    Within a list, all entries must be the same type (all strings or all LeadSegments).

    When ``available_input_leads`` is provided, every referenced lead name must be
    present in that sequence.

    Returns
    -------
    list[str] | str | list[LeadSegment] | LeadSegment
        The validated entry, mirroring the input type (e.g. ``str`` in → ``str`` out).
    """
    if isinstance(entry, str):
        leads: list[str] | str | list[LeadSegment] | LeadSegment = entry
        leads_to_check = [entry]
    elif isinstance(entry, LeadSegment):
        if available_input_leads is not None and entry.lead not in available_input_leads:
            raise ValueError(f"Lead name '{entry.lead}' in configuration is not present in the input data")
        return entry
    elif isinstance(entry, list) and len(entry) == 0:
        raise ValueError("configuration row must not be an empty list")
    elif isinstance(entry, list):
        if all(isinstance(e, str) for e in entry):
            leads = list(entry)
            leads_to_check = leads
        elif all(isinstance(e, LeadSegment) for e in entry):
            if available_input_leads is not None:
                for e in entry:
                    if e.lead not in available_input_leads:
                        raise ValueError(f"Lead name '{e.lead}' in configuration is not present in the input data")
            return list(entry)
        else:
            raise ValueError(
                "Within a configuration row, all entries must be the same type: all strings or all LeadSegment objects"
            )
    else:
        raise ValueError(_CONFIGURATION_ENTRY_ERROR)

    if available_input_leads is not None:
        for lead_name in leads_to_check:
            if lead_name not in available_input_leads:
                raise ValueError(f"Lead name '{lead_name}' in configuration is not present in the input data")

    return leads


def _template_configuration(template: str) -> ConfigurationDataType:
    """Return a copy of the built-in row layout for ``template``.

    The returned value uses the public configuration format:
    strings for full-width rows and ``list[str]`` for concatenated rows.
    """
    try:
        template_configuration = _TEMPLATE_CONFIGURATIONS[template]
    except KeyError as exc:
        raise ValueError(f"Template '{template}' is not supported. Supported templates are: {SUPPORTED_TEMPLATES}") from exc
    return [list(entry) if isinstance(entry, list) else entry for entry in template_configuration]


def _extract_input_leads(ecg_data: ECGDataType) -> list[str]:
    """Return input lead names from a DataFrame or a tuple ECG representation."""
    if isinstance(ecg_data, pd.DataFrame):
        return [str(name) for name in ecg_data.columns]
    return [str(name) for name in ecg_data[1]]



[docs]
def template_factory(template: str, ecg_data: ECGDataType, leads_map: LeadsMap | None) -> ConfigurationDataType:
    """Resolve a built-in template to an explicit configuration for the provided ECG input.

    Parameters
    ----------
    template : str
        Name of the built-in template to expand. Supported values:
        ``'1x1'``, ``'1x2'``, ``'1x3'``, ``'1x4'``, ``'1x6'``, ``'1x8'``,
        ``'1x12'``, ``'2x4'``, ``'2x6'``, ``'4x3'``, ``'2x4+1'``, ``'2x6+1'``,
        ``'4x3+1'``, ``'2x4+3'``, ``'2x6+3'``, ``'4x3+3'``.
    ecg_data : ECGDataType
        The ECG input used to resolve the final lead names. Must be the same
        object (or an object of the same type and with the same columns/lead
        names) that will later be passed to :meth:`~pmecg.ECGPlotter.plot`.
    leads_map : LeadsMap | None
        Optional mapping from conventional template lead names (``I``, ``II``,
        ``aVR``, ``aVL``, ``aVF``, ``V1``, …) to the corresponding column names in ``ecg_data``.
        Pass ``None`` when the input already uses the canonical names.

    Returns
    -------
    ConfigurationDataType
        Explicit plotting configuration: a list where each element is either a
        string (full-width rhythm strip) or a list of strings (leads concatenated
        within the same row).

    Raises
    ------
    ValueError
        If ``template`` is not one of the supported template names, if a
        required canonical lead is missing from both ``leads_map`` and
        ``ecg_data``, or if ``leads_map`` contains invalid or duplicate
        mappings.
    """
    input_leads = _extract_input_leads(ecg_data)
    canonical_to_custom = _validate_and_resolve_leads_map(leads_map, input_leads)
    available_input_leads = set(input_leads)
    resolved_configuration: ConfigurationDataType = []

    for entry in _template_configuration(template):
        if isinstance(entry, list):
            resolved_entry = [
                _resolve_template_lead(template, canonical_name, canonical_to_custom, available_input_leads)
                for canonical_name in entry
            ]
            resolved_configuration.append(resolved_entry)
        else:
            resolved_configuration.append(_resolve_template_lead(template, entry, canonical_to_custom, available_input_leads))

    return resolved_configuration




[docs]
def cabrera_factory(
    template: str,
    ecg_data: ECGDataType,
    leads_map: LeadsMap | None = None,
) -> tuple[ECGDataType, ConfigurationDataType]:
    """Build Cabrera-ordered ECG data and plotting configuration from a template.

    Cabrera format reorders the six limb leads as aVL, I, -aVR, II, aVF, III
    (instead of the standard I, II, III, aVR, aVL, aVF) and creates a new
    ``-aVR`` lead (negated aVR).

    Parameters
    ----------
    template : str
        Name of a built-in template to expand. The template must reference
        all six limb leads (supported: ``'1x6'``, ``'1x12'``, ``'2x6'``,
        ``'4x3'``, ``'2x6+1'``, ``'4x3+1'``, ``'2x6+3'``, ``'4x3+3'``).
    ecg_data : ECGDataType
        ECG input. Must include all six limb leads. When the input uses
        non-canonical column names, provide ``leads_map`` to map them.
    leads_map : LeadsMap | None, optional
        Optional mapping from canonical lead names (``I``, ``II``, ``aVR``,
        …) to the corresponding column names in ``ecg_data``. Pass ``None``
        when the input already uses canonical names. By default ``None``.

        If the custom column name mapped to ``aVR`` starts with ``'-'``
        (e.g. ``LeadsMap(aVR='-aVR')`` or ``LeadsMap(aVR='-AVR')``), the
        data are assumed to be already negated and the sign flip is skipped;
        only the rename to ``'-aVR'`` is performed.

    Returns
    -------
    tuple[ECGDataType, ConfigurationDataType]
        A pair of ``(modified_ecg_data, cabrera_configuration)`` where:

        - ``modified_ecg_data`` is a copy of the input where the ``'aVR'``
          column (or lead) has been renamed to ``'-aVR'``. The sign is
          flipped unless the source column name already starts with ``'-'``,
          in which case the data are treated as pre-negated.
        - ``cabrera_configuration`` is the layout configuration with limb
          leads reordered into Cabrera sequence, using the same column names
          as the returned ``modified_ecg_data``. Rhythm-strip rows (string
          entries in multi-row templates) are also resolved through
          ``leads_map``.

    Raises
    ------
    ValueError
        If the template does not reference all six limb leads, or if the
        ``'aVR'`` lead is missing from the input data.
    """
    # Validate template and check it uses all 6 limb leads
    config = _template_configuration(template)
    template_leads: set[str] = set()
    for entry in config:
        if isinstance(entry, list):
            template_leads.update(entry)
        else:
            template_leads.add(entry)

    missing_limb = _CABRERA_LIMB_LEADS - template_leads
    if missing_limb:
        raise ValueError(
            f"Cabrera format requires all six limb leads in the template. "
            f"Template '{template}' is missing: {', '.join(sorted(missing_limb))}"
        )

    # Resolve leads_map and find the actual column name for aVR
    input_leads = _extract_input_leads(ecg_data)
    canonical_to_custom = _validate_and_resolve_leads_map(leads_map, input_leads)
    avr_col = canonical_to_custom.get("aVR", "aVR")
    if avr_col not in input_leads:
        raise ValueError("Cabrera format requires 'aVR' lead in the input data")

    # Replace aVR with -aVR (rename + flip sign, unless already negated)
    already_negated = avr_col.startswith("-")
    if isinstance(ecg_data, pd.DataFrame):
        new_data: ECGDataType = ecg_data.copy()
        new_data = new_data.rename(columns={avr_col: "-aVR"})
        if not already_negated:
            new_data["-aVR"] = -new_data["-aVR"]
    else:
        array, names = ecg_data
        avr_idx = [str(n) for n in names].index(avr_col)
        new_names = list(names)
        new_names[avr_idx] = "-aVR"
        if isinstance(array, np.ndarray):
            new_array: np.ndarray | list[np.ndarray] = array.copy()
            if not already_negated:
                new_array[:, avr_idx] = -new_array[:, avr_idx]
        else:
            new_array = list(array)
            if not already_negated:
                new_array[avr_idx] = -new_array[avr_idx]
        new_data = (new_array, new_names)

    # Determine if the template has mixed row types (lists + strings = has rhythm strips)
    has_list_entries = any(isinstance(e, list) for e in config)

    def _resolve(canonical: str) -> str:
        """Apply Cabrera substitution then resolve to the custom column name."""
        cabrera = _CABRERA_SUBSTITUTION.get(canonical, canonical)
        if cabrera == "-aVR":
            return "-aVR"
        return canonical_to_custom.get(cabrera, cabrera)

    # Apply Cabrera substitution and resolve to actual column names
    cabrera_config: ConfigurationDataType = []
    for entry in config:
        if isinstance(entry, list):
            cabrera_config.append([_resolve(lead) for lead in entry])
        elif has_list_entries:
            # String entry in a mixed template = rhythm strip; resolve through leads_map
            cabrera_config.append(canonical_to_custom.get(entry, entry))
        else:
            # All-string template (1xN), remap
            cabrera_config.append(_resolve(entry))

    return new_data, cabrera_config




[docs]
def expand_to_12_leads(
    ecg_data: ECGDataType,
    leads_map: LeadsMap | None = None,
) -> pd.DataFrame:
    """Derive the four missing limb leads and return a full 12-lead ECG DataFrame.

    Given an ECG containing at least leads I, II, V1–V6, this function computes
    the four remaining limb leads using Einthoven's law:

    .. math::

        \\text{III} &= \\text{II} - \\text{I} \\\\
        \\text{aVR} &= -\\tfrac{\\text{I} + \\text{II}}{2} \\\\
        \\text{aVL} &= \\text{I} - \\tfrac{\\text{II}}{2} \\\\
        \\text{aVF} &= \\text{II} - \\tfrac{\\text{I}}{2}

    Parameters
    ----------
    ecg_data : ECGDataType
        ECG input. Must contain at least leads I, II, V1–V6. Non-standard leads
        or extra columns are not carried through; the output contains only the
        12 standard leads in canonical order.
        When the input uses non-canonical column names, supply ``leads_map`` to
        map them to canonical names.
    leads_map : LeadsMap | None, optional
        Mapping from canonical lead names (``I``, ``II``, ``V1``, …) to the
        column names present in ``ecg_data``. Pass ``None`` when the input
        already uses canonical names. By default ``None``.

    Returns
    -------
    pandas.DataFrame
        12-lead ECG DataFrame with columns in standard order:
        I, II, III, aVR, aVL, aVF, V1, V2, V3, V4, V5, V6.

    Raises
    ------
    ValueError
        If any of the eight required leads (I, II, V1–V6) is missing from the
        input data or the leads map.
    """
    _8_LEAD_CANONICAL = ("I", "II", "V1", "V2", "V3", "V4", "V5", "V6")

    input_leads = _extract_input_leads(ecg_data)
    canonical_to_custom = _validate_and_resolve_leads_map(leads_map, input_leads)

    # Resolve each of the 8 required leads to its actual column name
    available = set(input_leads)

    def _resolve(canonical: str) -> str:
        if canonical in canonical_to_custom:
            return canonical_to_custom[canonical]
        if canonical in available:
            return canonical
        raise ValueError(f"expand_to_12_leads requires lead '{canonical}', but it is missing from leads_map and the input data")

    col = {c: _resolve(c) for c in _8_LEAD_CANONICAL}

    # Load into a DataFrame for easy arithmetic
    if isinstance(ecg_data, pd.DataFrame):
        src = ecg_data
    else:
        src = _numpy_to_dataframe(ecg_data[0], ecg_data[1])

    lead_I = src[col["I"]].values
    lead_II = src[col["II"]].values

    derived = {
        "III": lead_II - lead_I,
        "aVR": -(lead_I + lead_II) / 2.0,
        "aVL": lead_I - lead_II / 2.0,
        "aVF": lead_II - lead_I / 2.0,
    }

    data: dict[str, np.ndarray] = {}
    for canonical in SUPPORTED_LEADS:
        if canonical in derived:
            data[canonical] = derived[canonical]
        else:
            data[canonical] = src[col[canonical]].values

    return pd.DataFrame(data)



def _resolve_configuration(
    configuration: ConfigurationDataType | None,
    input_leads: Sequence[str],
) -> ConfigurationDataType | None:
    """Validate and normalise a user configuration against the input lead names.

    Each row entry is passed through :func:`_validate_configuration_row_definition`,
    which validates referenced lead names and normalises the entry type.
    """
    _validate_input_lead_names(input_leads)
    available_input_leads = list(input_leads)

    if configuration is None:
        return None

    if isinstance(configuration, list):
        resolved_configuration: ConfigurationDataType = []
        for entry in configuration:
            resolved_configuration.append(_validate_configuration_row_definition(entry, available_input_leads))
        return resolved_configuration

    raise ValueError("configuration must be a list containing lead names or lists of lead names")


def _even_leads_split(entry: list[str] | str, total_samples: int) -> list[LeadSegment]:
    """Convert a classic string-based row entry to equal-length :class:`LeadSegment` slices.

    Parameters
    ----------
    entry : list[str] | str
        A single lead name or a list of lead names.
    total_samples : int
        Total number of samples in the recording. Each lead receives
        ``total_samples // len(leads)`` samples.

    Returns
    -------
    list[LeadSegment]
        One :class:`LeadSegment` per lead with ``start``/``end`` set to equal slices.

    Warns
    -----
    UserWarning
        When ``total_samples`` is not evenly divisible by the number of leads;
        the trailing samples are silently dropped.
    """
    selected = [entry] if isinstance(entry, str) else list(entry)
    n = len(selected)
    segment_len = total_samples // n
    if total_samples != n * segment_len:
        warnings.warn(
            f"total_samples ({total_samples}) is not evenly divisible by the "
            f"number of selected leads ({n}). "
            "The last few samples will not be plotted.",
            stacklevel=3,
        )
    return [LeadSegment(lead=lead, start=i * segment_len, end=(i + 1) * segment_len) for i, lead in enumerate(selected)]


def _build_row_signal(
    df: pd.DataFrame,
    lead_configs: list[LeadSegment],
    disconnect_segments: bool = True,
) -> tuple[np.ndarray, list[str], list[int]]:
    """Build a 1-D signal by concatenating per-lead sample ranges.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame containing the ECG data.
    lead_configs : list[LeadSegment]
        One :class:`~pmecg.types.LeadSegment` per segment.
    disconnect_segments : bool, optional
        If True, the last sample of each segment is set to NaN so that adjacent
        segments are not visually connected. By default True.

    Returns
    -------
    tuple[numpy.ndarray, list[str], list[int]]
        Concatenated signal array, the corresponding lead names, and the
        sample offset (in the concatenated signal) at which each lead starts.
    """
    total_samples = sum(lc.end - lc.start for lc in lead_configs)
    signal = np.full((total_samples,), np.nan)
    lead_names: list[str] = []
    offsets: list[int] = []
    offset = 0

    for lc in lead_configs:
        lead = lc.lead
        start = lc.start
        end = lc.end
        seg_len = end - start
        n_samples = len(df)
        if start >= n_samples or end > n_samples:
            raise ValueError(
                f"LeadSegment for lead '{lead}' requests samples [{start}:{end}], "
                f"but the DataFrame only has {n_samples} samples."
            )
        offsets.append(offset)
        signal[offset : offset + seg_len] = df[lead].values[start:end]
        if disconnect_segments and seg_len > 0:
            signal[offset + seg_len - 1] = np.nan
        lead_names.append(lead)
        offset += seg_len

    return signal, lead_names, offsets


def _apply_configuration(
    df: pd.DataFrame,
    configuration: ConfigurationDataType | None = None,
    disconnect_segments: bool = True,
) -> tuple[tuple[np.ndarray, list[str], list[int], list[LeadSegment]], ...]:
    """Apply the plotting configuration to the ECG data.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame containing the ECG data, where each column corresponds to a
        lead and the column names are the names of the leads.
    configuration : ConfigurationDataType | None, optional
        The plotting configuration to be applied.
        - If a list is provided, each element represents a row. All rows must be the
          same kind — either all string-based or all :class:`~pmecg.types.LeadSegment`-based;
          mixing is not allowed.
          - If the element is a string, it is a lead plotted for its entire duration.
          - If the element is a list of strings, those leads are concatenated in that row.
          - If the element is a LeadSegment, it is a lead with an explicit sample range.
          - If the element is a list of LeadSegments, those leads are concatenated.
        - If None, all leads in the DataFrame are plotted on separate full-width rows.
        By default None.
    disconnect_segments : bool, optional
        Passed through to :func:`_build_row_signal`. By default True.

    Returns
    -------
    tuple[tuple[numpy.ndarray, list[str], list[int], list[LeadSegment]], ...]
        A tuple of (signal, selected_leads, offsets, segments) 4-tuples — one per row
        in the configuration — where signal is the segmented ECG data for that row,
        offsets[i] is the sample index in the signal where lead i starts, and
        segments[i] is the :class:`~pmecg.types.LeadSegment` describing lead i's slice.

    Warns
    -----
    UserWarning
        When any rows that use :class:`~pmecg.types.LeadSegment` entries have
        unequal total sample counts across rows.
    """
    if configuration is None:
        configuration = [[lead] for lead in df.columns]

    result: list[tuple[np.ndarray, list[str], list[int], list[LeadSegment]]] = []
    has_lead_segment_rows = False
    if isinstance(configuration, list):
        # Validate cross-row homogeneity: all rows must be string-based or all LeadSegment-based.
        def _is_segment_entry(e: object) -> bool:
            return isinstance(e, LeadSegment) or (isinstance(e, list) and len(e) > 0 and isinstance(e[0], LeadSegment))

        has_segment = any(_is_segment_entry(e) for e in configuration)
        has_string = any(isinstance(e, (str, list)) and not _is_segment_entry(e) for e in configuration)
        if has_segment and has_string:
            raise ValueError(
                "configuration mixes string-based and LeadSegment-based rows; "
                "all rows must use the same type (all string-based or all LeadSegment-based)"
            )
        for entry in configuration:
            validated = _validate_configuration_row_definition(entry)
            is_str_list = isinstance(validated, list) and len(validated) > 0 and isinstance(validated[0], str)
            if isinstance(validated, str) or is_str_list:
                lead_configs = _even_leads_split(validated, df.shape[0])
            elif isinstance(validated, LeadSegment):
                lead_configs = [validated]
                has_lead_segment_rows = True
            else:
                lead_configs = validated  # list[LeadSegment]
                has_lead_segment_rows = True
            signal, leads, offsets = _build_row_signal(df, lead_configs, disconnect_segments)
            result.append((signal, leads, offsets, lead_configs))
    else:
        raise ValueError("configuration must be a list containing lead names or lists of lead names")

    if has_lead_segment_rows:
        row_lengths = [len(row[0]) for row in result]
        if len(set(row_lengths)) > 1:
            warnings.warn(
                f"Rows have unequal total sample counts: {row_lengths}. "
                "For consistent visual output, each row should span the same number of samples.",
                stacklevel=2,
            )

    return tuple(result)