Source code for ramanlib.core

"""
Core container primitives for RamanLib.

This module defines :class:`GroupedSpectralContainer`, a thin wrapper around a
:class:`pandas.DataFrame` whose first-class column is ``"spectrum"`` containing
:mod:`ramanspy` ``Spectrum`` objects (one per row). All other columns are free-form
metadata (strings, numbers, categories, etc.) that stay aligned to each spectrum.

The class provides a small, opinionated surface for:
- Safe construction from lists of spectra plus metadata rows.
- Conversion to a :mod:`ramanspy` ``SpectralContainer`` when axes match.
- Simple grouped reductions (e.g., group-wise mean spectra and optional stats).
- Convenience plotting hooks that defer to :mod:`ramanlib.plot`.

Notes
-----
The design goal is to make typical dataset manipulations ergonomic while keeping
the full power of :class:`pandas.DataFrame` available via the ``.df`` attribute.
For transformations beyond the light helpers here, operate directly on
``GroupedSpectralContainer.df`` and rebuild a container with
:meth:`GroupedSpectralContainer.from_dataframe`.

"""

from __future__ import annotations

import ramanspy as rp
import pandas as pd
import numpy as np

from . import plot


[docs] class GroupedSpectralContainer: """ A table of Raman spectra with aligned metadata. Each row contains a :mod:`ramanspy` ``Spectrum`` in the ``"spectrum"`` column, plus arbitrary metadata columns (e.g., ``"sample"``, ``"region"``, ``"label"``). The container exposes a minimal API; for advanced operations, use the underlying :class:`pandas.DataFrame` via :attr:`df`. Parameters ---------- spectral_list : list of ramanspy.Spectrum One spectrum per row. metadata : list of dict One metadata mapping per spectrum. Each dict's keys become columns in the backing DataFrame. The length must match ``spectral_list``. Attributes ---------- df : pandas.DataFrame Backing table with column ``"spectrum"`` and zero or more metadata columns. Raises ------ TypeError If any element of ``spectral_list`` is not a :mod:`ramanspy` ``Spectrum``. ValueError If ``spectral_list`` and ``metadata`` lengths differ. See Also -------- GroupedSpectralContainer.from_dataframe : Build from an existing DataFrame. GroupedSpectralContainer.to_spectral_container : Convert to ``rp.SpectralContainer``. GroupedSpectralContainer.mean : Group-wise mean spectra. GroupedSpectralContainer.plot_mean : Plot group means with CIs. GroupedSpectralContainer.plot_random : Plot random spectra per group. """ def __init__(self, spectral_list, metadata): # metadata is a list of dictionaries """ Construct a container from spectra and per-row metadata. Notes ----- This initializer validates types/lengths and constructs :attr:`df`. For robustness with preexisting DataFrames, prefer :meth:`GroupedSpectralContainer.from_dataframe`. """ # Raise errors if data isn't spectrum objects or not the same length as the metadata if not all(isinstance(s, rp.Spectrum) for s in spectral_list): raise TypeError("All items in spectral_list must be RamanSPy Spectrum objects.") if len(spectral_list) != len(metadata): raise ValueError("spectral_list and metadata must be the same length.") rows = [{"spectrum": s, **meta} for s, meta in zip(spectral_list, metadata)] self.df = pd.DataFrame(rows)
[docs] @classmethod def from_dataframe(cls, df) -> "GroupedSpectralContainer": """ Build a container from an existing DataFrame. The constructor validates that a ``'spectrum'`` column exists and that each entry is a :mod:`ramanspy` ``Spectrum``. All other columns are treated as metadata and preserved. Parameters ---------- df : pandas.DataFrame Input table with a ``'spectrum'`` column of :mod:`ramanspy` ``Spectrum`` objects and any number of metadata columns. Returns ------- GroupedSpectralContainer A new container referencing a copy of ``df``'s contents. Raises ------ ValueError If the DataFrame lacks a ``'spectrum'`` column. TypeError If any value in ``df['spectrum']`` is not a :mod:`ramanspy` ``Spectrum``. """ # Check for a spectrum column and the type of the spectrum columns if "spectrum" not in df.columns: raise ValueError("DataFrame must contain a 'spectrum' column.") if not all(isinstance(s, rp.Spectrum) for s in df["spectrum"]): raise TypeError("All entries in 'spectrum' column must be Spectrum objects.") spectra = df["spectrum"].tolist() metadata = df.drop(columns=["spectrum"]).to_dict(orient="records") return cls(spectra, metadata)
[docs] def copy(self) -> "GroupedSpectralContainer": """ Return a deep copy of the container. Returns ------- GroupedSpectralContainer A new container whose :attr:`df` is a copy of the original. """ return GroupedSpectralContainer.from_dataframe(self.df.copy())
[docs] def to_spectral_container(self) -> rp.SpectralContainer: """ Convert to a :mod:`ramanspy` :class:`~ramanspy.SpectralContainer`. All spectra must share an identical spectral axis. The spectra are stacked in their current row order. Returns ------- ramanspy.SpectralContainer A spectral container built by stacking the row spectra. Raises ------ ValueError If any spectrum has a spectral axis different from the first row's. """ axes = [s.spectral_axis for s in self.df['spectrum']] # Check that all spectra within have the same spectral axis. if not all((axes[0] == ax).all() for ax in axes[1:]): raise ValueError("All spectra must have the same spectral axis to convert to a SpectralContainer.") return rp.SpectralContainer.from_stack(self.df["spectrum"].tolist())
[docs] def mean( self, by: str | list[str] | None = None, include_stats: bool = False, ddof: int = 1, ) -> "GroupedSpectralContainer": """ Compute mean spectra per group. Groups the rows by the key(s) in ``by`` (or treats the whole table as a single group when ``by=None``), computes the mean spectrum per group, and returns a new container with one row per group. Optionally adds group-level statistics. Parameters ---------- by : str or list of str or None, optional Column name(s) to group by, passed to :meth:`pandas.DataFrame.groupby`. If ``None`` (default), all rows belong to a single group named ``"all"``. include_stats : bool, optional If ``True``, append the following columns to the result: ``'n'`` (group size), ``'var_vector'`` and ``'std_vector'`` (per-wavenumber variance and standard deviation). Default is ``False``. ddof : int, optional Delta degrees of freedom used for variance/std (as in :func:`numpy.var`); ``ddof=1`` gives sample variance. Default is ``1``. Returns ------- GroupedSpectralContainer A new container where each row holds the group's mean :mod:`ramanspy` ``Spectrum`` in ``'spectrum'`` and the group key(s) as metadata columns. Notes ----- The mean is computed via :meth:`ramanspy.SpectralContainer.mean`. The variance/standard deviation vectors, when requested, are aligned to the spectrum's spectral axis and computed over the stacked intensities. Examples -------- >>> means = gsc.mean(by=["sample", "region"], include_stats=True) >>> list(means.df.columns) ['spectrum', 'sample', 'region', 'n', 'var_vector', 'std_vector'] """ # Build iterable of (group_key, group_df) grouped = [("all", self.df)] if by is None else list(self.df.groupby(by, dropna=False)) rows = [] for key, gdf in grouped: if gdf.empty: continue spectra = gdf["spectrum"].tolist() container = rp.SpectralContainer.from_stack(spectra) mean_spec = container.mean # Rehydrate group key(s) into metadata columns meta = {} if by is None: meta["group"] = "all" else: by_cols = by if isinstance(by, (list, tuple)) else [by] # pandas returns tuple keys for multi-column groups key_vals = key if isinstance(key, tuple) else (key,) meta.update(dict(zip(by_cols, key_vals))) # Always include the mean Spectrum meta["spectrum"] = mean_spec if include_stats: var = np.var(container.spectral_data, axis=0, ddof=ddof) meta["n"] = container.shape[0] meta["var_vector"] = var meta["std_vector"] = np.sqrt(var) rows.append(meta) mean_df = pd.DataFrame(rows) return GroupedSpectralContainer.from_dataframe(mean_df)
[docs] def plot_mean( self, by: str | list[str] | None = None, interval: tuple[float, float] | None = None, plot_type: str = "separate", ci_z: float = 1.96, **kwargs, ): """ Plot mean spectra per group. This is a thin wrapper around :func:`ramanlib.plot.mean_per_group`. Parameters ---------- by : str or list of str or None, optional Grouping key(s). See :meth:`GroupedSpectralContainer.mean`. interval : tuple of (float, float) or None, optional Optional spectral axis range (min, max) to display. plot_type : {"single", "separate", "stacked", "single stacked"}, optional Plot style. ``"separate"`` draws one subplot per group; ``"single"`` overlays all groups; ``"stacked"`` create separate plots stacked vertically; ``"single stacked"`` overlays spectra in a single plot with vertical offsets. Default is ``"separate"``. ci_z : float, optional Z-score for confidence intervals (e.g., 1.96 ≈ 95% CI). Default is 1.96. **kwargs Forwarded to the underlying plotting function/matplotlib. Returns ------- matplotlib.axes.Axes or numpy.ndarray Axes object(s) produced by the plotting backend (RamanSPy). See Also -------- ramanlib.plot.mean_per_group : Implementation of the plotting logic. """ return plot.mean_per_group(self, by=by, interval=interval, plot_type=plot_type, ci_z=ci_z, **kwargs)
[docs] def plot_random( self, by: str | list[str] | None = None, n_samples: int = 3, plot_type: str = "single", seed: int | None = None, **kwargs, ): """ Plot a random sample of spectra per group. This is a thin wrapper around :func:`ramanlib.plot.random_per_group`. Parameters ---------- by : str or list of str or None, optional Grouping key(s). If ``None``, sample from all rows. n_samples : int, optional Number of spectra to sample per group. Default is ``3``. plot_type : {"single", "separate", "stacked", "single stacked"}, optional Plot style. ``"separate"`` draws one subplot per group; ``"single"`` overlays all groups; ``"stacked"`` create separate plots stacked vertically; ``"single stacked"`` overlays spectra in a single plot with vertical offsets. Default is ``"separate"``. seed : int or None, optional Random seed for reproducibility. Default is ``None``. **kwargs Forwarded to the underlying plotting function/matplotlib. Returns ------- matplotlib.axes.Axes or numpy.ndarray Axes object(s) produced by the plotting backend. See Also -------- ramanlib.plot.random_per_group : Implementation of the plotting logic. """ return plot.random_per_group(self, by=by, n_samples=n_samples, plot_type=plot_type, seed=seed, **kwargs)
[docs] def apply_pipeline(self, pipeline) -> "GroupedSpectralContainer": """ Apply a RamanSPy processing pipeline to each spectrum. Parameters ---------- pipeline : object Any object exposing an ``.apply(Spectrum) -> Spectrum`` method (e.g., a :mod:`ramanspy` pipeline). Returns ------- GroupedSpectralContainer A new container with transformed spectra and the same metadata. Notes ----- The operation is row-wise and does not mutate the original container. """ df = self.df.assign(spectrum=self.df["spectrum"].apply(pipeline.apply)) return GroupedSpectralContainer.from_dataframe(df)
def __len__(self) -> int: """ Number of rows (spectra) in the container. Returns ------- int Row count of :attr:`df`. """ return len(self.df) def __getitem__(self, key): """ Column/row selection proxy to the underlying DataFrame. Parameters ---------- key : Any Key accepted by :class:`pandas.DataFrame`'s ``__getitem__`` (e.g., a column name, a boolean mask, or a list of columns). Returns ------- GroupedSpectralContainer or pandas.Series or pandas.DataFrame If the selection yields a DataFrame, it is wrapped back into a :class:`GroupedSpectralContainer`. Otherwise the raw pandas object is returned (e.g., a Series for a single column). Notes ----- This is a convenience to keep simple subsetting ergonomic. For complex indexing, slice :attr:`df` directly. """ result = self.df[key] if isinstance(result, pd.DataFrame): return GroupedSpectralContainer.from_dataframe(result) return result def __repr__(self) -> str: """ Debug-friendly representation with a small preview of the DataFrame. Returns ------- str Human-readable summary including a head of :attr:`df`. """ return f"GroupedSpectralContainer({len(self.df)} spectra)\n\n{self.df.head()}"