Source code for playnano.io.data_loaders

"""
Data loaders for AFM image stacks exported by **playNano**.

This module provides readers for serialized AFMImageStack bundles created
by the export routines (``.npz``, ``.h5``, and OME-TIFF). Each loader
reconstructs a :class:`~playNano.afm_stack.AFMImageStack` with correct
data, pixel size, channel name, and per-frame metadata (timestamps).
All loaders restore provenance and any stored processing or mask data.

Functions
---------
load_npz_bundle
    Load a `.npz` bundle into an :class:`~playNano.afm_stack.AFMImageStack`.
load_h5_bundle
    Load a `.h5` bundle into an :class:`~playNano.afm_stack.AFMImageStack`.
load_ome_tiff_stack
    Load an OME-TIFF bundle into an :class:`~playNano.afm_stack.AFMImageStack`.
"""

import json
import logging
from pathlib import Path

import h5py
import numpy as np
import tifffile

from playnano.afm_stack import AFMImageStack

logger = logging.getLogger(__name__)


[docs] def load_npz_bundle(path: Path, channel: str = "height_trace") -> AFMImageStack: """ Load an :class:`~playNano.afm_stack.AFMImageStack` from a `.npz` bundle. The `.npz` file must contain the following keys: - ``data`` : ``float32`` array of shape ``(n_frames, H, W)`` - ``pixel_size_nm`` : scalar ``float`` - ``channel`` : ``str`` scalar - ``frame_metadata_json`` : JSON-encoded list of dicts - ``provenance_json`` : JSON-encoded dict - ``processed__<step>`` : optional processed frame arrays - ``masks__<mask>`` : optional boolean mask arrays - ``state_backups_json`` : optional JSON-encoded dict of saved states This is the structure produced by :func:`playNano.io.export_data.save_npz_bundle`. Parameters ---------- path : pathlib.Path Path to the `.npz` file. channel : str, default="height_trace" Provided for API compatibility with :func:`~playNano.io.loader.load_afm_stack` but ignored when reading the bundle. Returns ------- playNano.afm_stack.AFMImageStack Reconstructed AFM image stack with attributes populated: ``.processed``, ``.masks``, and ``.provenance``. Raises ------ ValueError If required keys are missing or JSON blobs cannot be decoded. """ arrs = np.load(str(path), allow_pickle=True) # Core data data = arrs["data"] pixel_size_nm = float(arrs["pixel_size_nm"].item()) channel = str(arrs["channel"].item()) # Metadata try: frame_metadata = json.loads(arrs["frame_metadata_json"].item()) except KeyError: raise ValueError(f"{path} missing 'frame_metadata_json'") from None except Exception as e: raise ValueError( f"{path}: invalid JSON in 'frame_metadata_json': {e}" ) from None try: provenance = json.loads(arrs["provenance_json"].item()) except KeyError: raise ValueError(f"{path} missing 'provenance_json'") from None except Exception as e: raise ValueError(f"{path}: invalid JSON in 'provenance_json': {e}") from None state_backups = None if "state_backups_json" in arrs: try: state_backups = json.loads(arrs["state_backups_json"].item()) except Exception as e: raise ValueError( f"{path}: invalid JSON in 'state_backups_json': {e}" ) from None # Build stack stack = AFMImageStack( data=data, pixel_size_nm=pixel_size_nm, channel=channel, file_path=path, frame_metadata=frame_metadata, ) if state_backups is not None: stack.state_backups = state_backups # Provenance saved_prov = provenance.copy() # annotate bundle info saved_prov.setdefault("bundle", {}).update(bundle_file=str(path), bundle_type="npz") # then replace stack.provenance wholesale stack.provenance = saved_prov # Processed and mask layers for key in arrs.files: if key.startswith("processed__"): step = key.split("__", 1)[1] stack.processed[step] = arrs[key].astype(np.float32) elif key.startswith("masks__"): mask = key.split("__", 1)[1] stack.masks[mask] = arrs[key].astype(bool) return stack
[docs] def load_h5_bundle(path: Path, channel: str = "height_trace") -> AFMImageStack: """ Load an :class:`~playNano.afm_stack.AFMImageStack` from an HDF5 bundle. Expected HDF5 structure ----------------------- Datasets - ``/data`` : ``float32`` array of shape ``(n_frames, H, W)`` - ``/processed/<step>`` : optional processed datasets - ``/masks/<mask>`` : optional boolean mask datasets - ``/frame_metadata_json`` : UTF-8 encoded JSON (list of dicts) - ``/provenance_json`` : UTF-8 encoded JSON (dict) - ``/state_backups_json`` : optional UTF-8 JSON (dict) Attributes - ``pixel_size_nm`` : scalar float - ``channel`` : string Files with the structure are produced by :func:`playNano.io.export_data.save_h5_bundle`. Parameters ---------- path : pathlib.Path Path to the `.h5` file. channel : str, default="height_trace" Provided for API compatibility with :func:`~playNano.io.loader.load_afm_stack` but ignored when reading the bundle. Returns ------- playNano.afm_stack.AFMImageStack Fully reconstructed AFM image stack with provenance, processed steps, and masks restored. Raises ------ ValueError If required datasets are missing or JSON decoding fails. If required datasets (``frame_metadata_json`` or ``provenance_json``) are missing or contain invalid JSON. """ with h5py.File(str(path), "r") as f: data = f["data"][()].astype(np.float32) pixel_size_nm = float(f.attrs["pixel_size_nm"]) channel = str(f.attrs["channel"]) processed = { n: ds[()].astype(np.float32) for n, ds in f.get("processed", {}).items() } masks = {n: ds[()].astype(bool) for n, ds in f.get("masks", {}).items()} if "frame_metadata_json" not in f: raise ValueError(f"{path} missing 'frame_metadata_json'") if "provenance_json" not in f: raise ValueError(f"{path} missing 'provenance_json'") try: frame_metadata = json.loads(f["frame_metadata_json"][()].decode("utf-8")) provenance = json.loads(f["provenance_json"][()].decode("utf-8")) except Exception as e: raise ValueError(f"{path}: invalid JSON metadata: {e}") from None state_backups = None if "state_backups_json" in f: try: state_backups = json.loads(f["state_backups_json"][()].decode("utf-8")) except Exception as e: raise ValueError(f"{path}: invalid 'state_backups_json': {e}") from None stack = AFMImageStack( data=data, pixel_size_nm=pixel_size_nm, channel=channel, file_path=path, frame_metadata=frame_metadata, ) stack.processed = processed stack.masks = masks if state_backups is not None: stack.state_backups = state_backups # Attach provenance and mark as bundle saved_prov = provenance.copy() saved_prov.setdefault("bundle", {}).update( bundle_file=str(path), bundle_type="h5" ) # then replace stack.provenance wholesale stack.provenance = saved_prov return stack
[docs] def load_ome_tiff_stack(path: Path, channel: str = "height_trace") -> AFMImageStack: """ Load an OME-TIFF bundle into an :class:`~playNano.afm_stack.AFMImageStack`. Attempts to parse OME-XML and custom metadata tags to reconstruct pixel size, timestamps, and provenance. Falls back gracefully if certain metadata are unavailable. Parameters ---------- path : pathlib.Path Path to the `.ome.tif` file created by :func:`~playNano.io.export_data.save_ome_tiff_stack`. channel : str, optional Fallback channel name if none is found in OME metadata. Returns ------- playNano.afm_stack.AFMImageStack Reconstructed AFMImageStack with: - ``data`` : 3D ``float32`` array ``(T, H, W)`` - ``pixel_size_nm`` : float, derived from OME physical size - ``channel`` : str, from OME Channel or fallback - ``frame_metadata`` : list of dicts containing timestamps - ``provenance`` : dict reconstructed from custom or embedded tags Raises ------ ValueError If the image array shape is unsupported or essential metadata cannot be parsed. """ import xml.etree.ElementTree as ET # read image + ome metadata with tifffile.TiffFile(path) as tif: img = tif.asarray() ome_xml = tif.ome_metadata description_tag = tif.pages[0].tags.get("ImageDescription") metadata_dict = {} if description_tag is not None: try: metadata_dict = json.loads(description_tag.value) except Exception: metadata_dict = {} # Try to read the custom tag (example tag 65000) custom_tag_id = 65000 custom_tag_data = None if custom_tag_id in tif.pages[0].tags: try: custom_tag_data = json.loads( tif.pages[0].tags[custom_tag_id].value.decode("utf-8") ) except Exception as e: logger.warning(f"Could not decode custom tag {custom_tag_id}: {e}") # Normalize dimensions if img.ndim == 5: data = img[:, 0, 0, :, :].astype(np.float32) elif img.ndim == 3: data = img.astype(np.float32) else: raise ValueError(f"Unexpected OME-TIFF shape: {img.shape}") # Defaults ps_nm = 1.0 timestamps = list(range(data.shape[0])) channel_name = channel # Parse OME-XML for physical sizes, timestamps, and channel name try: root = ET.fromstring(ome_xml) ns = {"ome": "http://www.openmicroscopy.org/Schemas/OME/2016-06"} pixels = root.find(".//ome:Pixels", namespaces=ns) if pixels is not None and pixels.attrib.get("PhysicalSizeX"): ps_nm = float(pixels.attrib["PhysicalSizeX"]) * 1e3 # µm → nm planes = root.findall(".//ome:Plane", namespaces=ns) time_points = [ float(p.attrib.get("DeltaT", i)) for i, p in enumerate(planes) ] if time_points: timestamps = time_points channel_elem = root.find(".//ome:Channel", namespaces=ns) if channel_elem is not None and "Name" in channel_elem.attrib: channel_name = channel_elem.attrib["Name"] except Exception as e: logger.warning(f"Failed to parse OME-XML metadata for {path}: {e}") frame_metadata = [{"timestamp": t} for t in timestamps] stack = AFMImageStack( data=data, pixel_size_nm=ps_nm, channel=channel_name, file_path=path, frame_metadata=frame_metadata, ) provenance_clean = {} if custom_tag_data is not None: provenance_clean = custom_tag_data elif "UserDataProvenance" in metadata_dict: try: provenance_clean = json.loads(metadata_dict["UserDataProvenance"]) except Exception as e: logger.warning(f"Could not decode provenance from {path}: {e}") # Always add bundle info provenance_clean.setdefault("bundle", {}).update( bundle_file=str(path), bundle_type="ome-tiff" ) stack.provenance = provenance_clean return stack