Source code for playnano.utils.io_utils

"""Utility functions for IO operations in playNano."""

import logging
from pathlib import Path

import numpy as np

INVALID_CHARS = r'\/:*?"<>|'
INVALID_FOLDER_CHARS = r'*?"<>|'

logger = logging.getLogger(__name__)


height_units = ["m", "cm", "mm", "um", "nm", "pm"]


[docs] def pad_to_square(img: np.ndarray, border_color: int = 0) -> np.ndarray: """Pad a 2D grayscale image to a square canvas by centring it.""" h, w = img.shape[:2] size = max(h, w) canvas = np.full((size, size), border_color, dtype=img.dtype) y = (size - h) // 2 x = (size - w) // 2 canvas[y : y + h, x : x + w] = img # noqa return canvas
[docs] def guess_height_data_units(stack: np.ndarray) -> str: """ Guess the most likely units of AFM height data from the data range. Parameters ---------- stack : np.ndarray AFM height data array, typically 2D or 3D. Non-finite values (NaN or infinity) are ignored when determining the range. Returns ------- str A string indicating the guessed unit of the height data, one of: 'pm' (picometers), 'nm' (nanometers), 'um' (micrometers), 'mm' (millimeters), or 'm' (meters). Raises ------ ValueError If the input array contains no finite values. Notes ----- The unit is estimated based on the numeric range of the data as follows: - Range > 1e4 : 'pm' - 1e-2 < Range <= 1e4 : 'nm' - 1e-4 < Range <= 1e-2 : 'um' - 1e-5 < Range <= 1e-4 : 'mm' - Range <= 1e-5 : 'm' Examples -------- >>> import numpy as np >>> data = np.array([[0, 5e-9], [1e-8, 2e-8]]) >>> guess_height_data_units(data) 'nm' """ finite = stack[np.isfinite(stack)] if finite.size == 0: raise ValueError("No finite values in data.") z_range = finite.max() - finite.min() if z_range > 1e4: return "pm" elif 1e-2 < z_range <= 1e4: return "nm" elif 1e-4 < z_range <= 1e-2: return "um" elif 1e-5 < z_range <= 1e-4: return "mm" else: return "m"
[docs] def convert_height_units_to_nm(data: np.ndarray, unit: str) -> np.ndarray: """ Convert AFM height data from the guessed unit to nanometers. Parameters ---------- data : np.ndarray Input height data array, typically 2D or 3D. unit : str Unit string as returned by `guess_height_data_units`. Must be one of: 'pm', 'nm', 'um', 'mm', or 'm'. Returns ------- np.ndarray The input data converted to nanometers. Raises ------ ValueError If the provided unit string is not recognized. """ unit_to_multiplier = { "pm": 1e-3, # 1 pm = 1e-3 nm "nm": 1.0, # already in nm "um": 1e3, # 1 µm = 1000 nm "mm": 1e6, # 1 mm = 1e6 nm "m": 1e9, # 1 m = 1e9 nm } if unit not in unit_to_multiplier: raise ValueError( f"Unrecognized unit '{unit}'. Must be one of: {list(unit_to_multiplier)}" ) return data * unit_to_multiplier[unit]
[docs] def normalize_to_uint8(image: np.ndarray) -> np.ndarray: """ Normalize a float image to the uint8 [0, 255] range, handling NaNs and Infs. Parameters ---------- image : np.ndarray Input image as a NumPy array of floats. May contain NaNs or infinite values. Returns ------- np.ndarray Normalized image as a uint8 NumPy array with values in the range [0, 255]. """ # Replace NaNs and Infs with 0 image = np.nan_to_num(image, nan=0.0, posinf=0.0, neginf=0.0) # Compute min and max min_val = np.min(image) max_val = np.max(image) # Avoid division by zero if max_val - min_val == 0: return np.zeros_like(image, dtype=np.uint8) # Normalize to [0, 255] norm = (image - min_val) / (max_val - min_val) * 255 return norm.astype(np.uint8)
[docs] def sanitize_output_name(name: str, default: str) -> str: """ Sanitize output file names by removing extensions and stripping whitespace. Parameters ---------- name : str The output file name provided by the user. default : str Default name to use if `name` is empty or None. Returns ------- str Sanitized base file name without extension. """ if not name: return default name = name.strip() # Remove extension if any try: name = Path(name).with_suffix("").name except ValueError: return default if any(c in name for c in INVALID_CHARS): raise ValueError(f"Invalid characters in output name: {INVALID_CHARS}") return name
[docs] def prepare_output_directory(folder: str | None, default: str = "output") -> Path: """ Validate, resolve, and create the output directory if it doesn't exist. Parameters ---------- folder : str or None User-provided output folder path. If None, use `default`. default : str, optional Default folder name to use if `folder` not specified. Returns ------- Path A Path object pointing to the created output directory. Raises ------ ValueError If any part of the folder path contains invalid characters. """ if folder is None: folder = default elif not isinstance(folder, str): try: folder = str(folder) except Exception as e: raise ValueError(f"Invalid folder path: {e}") from e folder = folder.strip() if folder else default folder_path = Path(folder).resolve() for part in folder_path.parts: if any(c in part for c in INVALID_FOLDER_CHARS): raise ValueError( f"Invalid characters in output folder path: {INVALID_FOLDER_CHARS}" ) folder_path.mkdir(parents=True, exist_ok=True) return folder_path
[docs] def compute_zscale_range( data: np.ndarray, zmin: float | str = "auto", zmax: float | str = "auto", lower_percentile: int = 1, upper_percentile: int = 99, ) -> tuple[float, float]: """ Compute robust Z-scale bounds (height or intensity range) for normalization. Parameters ---------- data : np.ndarray 2D or 3D array of AFM image data. zmin : float, "auto", or None Lower bound: "auto" uses percentile, None uses data min, float uses value. zmax : float or "auto" Upper bound: "auto" uses percentile, None uses data max, float uses value. lower_percentile : int, optional Percentile to use for lower bound when zmin == "auto". Default is 1. upper_percentile : int, optional Percentile to use for upper bound when zmax == "auto". Default is 99. Returns ------- (float, float) zmin and zmax values suitable for normalization. Raises ------ ValueError If zmin > zmax after processing or invalid input types. """ flat = data.ravel() flat = flat[np.isfinite(flat)] # Process zmin if zmin == "auto": zmin_val = np.percentile(flat, lower_percentile) elif zmin is None: zmin_val = np.min(flat) else: try: zmin_val = float(zmin) except (TypeError, ValueError): raise ValueError("zmin must be a float, 'auto', or None.") from None # Process zmax if zmax == "auto": zmax_val = np.percentile(flat, upper_percentile) elif zmax is None: zmax_val = np.max(flat) else: try: zmax_val = float(zmax) except (TypeError, ValueError): raise ValueError("zmax must be a float, 'auto', or None.") from None # Validation if zmin_val > zmax_val: raise ValueError("zmin must be less than or equal to zmax.") from None logger.debug(f"[Z-scaling] zmin={zmin_val:.3f} nm, zmax={zmax_val:.3f} nm") return zmin_val, zmax_val
[docs] def make_json_safe(obj): """Recursively convert NumPy types and non-JSON objects into serializable ones.""" if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, (np.integer, np.int64, np.int32)): return int(obj) elif isinstance(obj, (np.floating, np.float32, np.float64)): return float(obj) elif isinstance(obj, dict): return {k: make_json_safe(v) for k, v in obj.items()} elif isinstance(obj, list): return [make_json_safe(v) for v in obj] elif isinstance(obj, tuple): return tuple(make_json_safe(v) for v in obj) elif callable(obj): # convert functions to their names return obj.__name__ else: return obj
[docs] def decode_hdf5_attr(attr: bytes | str) -> str: """ Decode an attribute that may be bytes or a string. Parameters ---------- attr : bytes or str The attribute to decode. Returns ------- str The decoded string. """ if isinstance(attr, bytes): return attr.decode("utf-8") return str(attr)