"""Utility functions for IO operations in playNano."""
import logging
from pathlib import Path
import numpy as np
INVALID_CHARS = r'\/:*?"<>|'
INVALID_FOLDER_CHARS = r'*?"<>|'
logger = logging.getLogger(__name__)
height_units = ["m", "cm", "mm", "um", "nm", "pm"]
[docs]
def pad_to_square(img: np.ndarray, border_color: int = 0) -> np.ndarray:
"""Pad a 2D grayscale image to a square canvas by centring it."""
h, w = img.shape[:2]
size = max(h, w)
canvas = np.full((size, size), border_color, dtype=img.dtype)
y = (size - h) // 2
x = (size - w) // 2
canvas[y : y + h, x : x + w] = img # noqa
return canvas
[docs]
def guess_height_data_units(stack: np.ndarray) -> str:
"""
Guess the most likely units of AFM height data from the data range.
Parameters
----------
stack : np.ndarray
AFM height data array, typically 2D or 3D. Non-finite values
(NaN or infinity) are ignored when determining the range.
Returns
-------
str
A string indicating the guessed unit of the height data, one of:
'pm' (picometers), 'nm' (nanometers), 'um' (micrometers),
'mm' (millimeters), or 'm' (meters).
Raises
------
ValueError
If the input array contains no finite values.
Notes
-----
The unit is estimated based on the numeric range of the data as follows:
- Range > 1e4 : 'pm'
- 1e-2 < Range <= 1e4 : 'nm'
- 1e-4 < Range <= 1e-2 : 'um'
- 1e-5 < Range <= 1e-4 : 'mm'
- Range <= 1e-5 : 'm'
Examples
--------
>>> import numpy as np
>>> data = np.array([[0, 5e-9], [1e-8, 2e-8]])
>>> guess_height_data_units(data)
'nm'
"""
finite = stack[np.isfinite(stack)]
if finite.size == 0:
raise ValueError("No finite values in data.")
z_range = finite.max() - finite.min()
if z_range > 1e4:
return "pm"
elif 1e-2 < z_range <= 1e4:
return "nm"
elif 1e-4 < z_range <= 1e-2:
return "um"
elif 1e-5 < z_range <= 1e-4:
return "mm"
else:
return "m"
[docs]
def convert_height_units_to_nm(data: np.ndarray, unit: str) -> np.ndarray:
"""
Convert AFM height data from the guessed unit to nanometers.
Parameters
----------
data : np.ndarray
Input height data array, typically 2D or 3D.
unit : str
Unit string as returned by `guess_height_data_units`. Must be one of:
'pm', 'nm', 'um', 'mm', or 'm'.
Returns
-------
np.ndarray
The input data converted to nanometers.
Raises
------
ValueError
If the provided unit string is not recognized.
"""
unit_to_multiplier = {
"pm": 1e-3, # 1 pm = 1e-3 nm
"nm": 1.0, # already in nm
"um": 1e3, # 1 µm = 1000 nm
"mm": 1e6, # 1 mm = 1e6 nm
"m": 1e9, # 1 m = 1e9 nm
}
if unit not in unit_to_multiplier:
raise ValueError(
f"Unrecognized unit '{unit}'. Must be one of: {list(unit_to_multiplier)}"
)
return data * unit_to_multiplier[unit]
[docs]
def normalize_to_uint8(image: np.ndarray) -> np.ndarray:
"""
Normalize a float image to the uint8 [0, 255] range, handling NaNs and Infs.
Parameters
----------
image : np.ndarray
Input image as a NumPy array of floats. May contain NaNs or infinite values.
Returns
-------
np.ndarray
Normalized image as a uint8 NumPy array with values in the range [0, 255].
"""
# Replace NaNs and Infs with 0
image = np.nan_to_num(image, nan=0.0, posinf=0.0, neginf=0.0)
# Compute min and max
min_val = np.min(image)
max_val = np.max(image)
# Avoid division by zero
if max_val - min_val == 0:
return np.zeros_like(image, dtype=np.uint8)
# Normalize to [0, 255]
norm = (image - min_val) / (max_val - min_val) * 255
return norm.astype(np.uint8)
[docs]
def sanitize_output_name(name: str, default: str) -> str:
"""
Sanitize output file names by removing extensions and stripping whitespace.
Parameters
----------
name : str
The output file name provided by the user.
default : str
Default name to use if `name` is empty or None.
Returns
-------
str
Sanitized base file name without extension.
"""
if not name:
return default
name = name.strip()
# Remove extension if any
try:
name = Path(name).with_suffix("").name
except ValueError:
return default
if any(c in name for c in INVALID_CHARS):
raise ValueError(f"Invalid characters in output name: {INVALID_CHARS}")
return name
[docs]
def prepare_output_directory(folder: str | None, default: str = "output") -> Path:
"""
Validate, resolve, and create the output directory if it doesn't exist.
Parameters
----------
folder : str or None
User-provided output folder path. If None, use `default`.
default : str, optional
Default folder name to use if `folder` not specified.
Returns
-------
Path
A Path object pointing to the created output directory.
Raises
------
ValueError
If any part of the folder path contains invalid characters.
"""
if folder is None:
folder = default
elif not isinstance(folder, str):
try:
folder = str(folder)
except Exception as e:
raise ValueError(f"Invalid folder path: {e}") from e
folder = folder.strip() if folder else default
folder_path = Path(folder).resolve()
for part in folder_path.parts:
if any(c in part for c in INVALID_FOLDER_CHARS):
raise ValueError(
f"Invalid characters in output folder path: {INVALID_FOLDER_CHARS}"
)
folder_path.mkdir(parents=True, exist_ok=True)
return folder_path
[docs]
def compute_zscale_range(
data: np.ndarray,
zmin: float | str = "auto",
zmax: float | str = "auto",
lower_percentile: int = 1,
upper_percentile: int = 99,
) -> tuple[float, float]:
"""
Compute robust Z-scale bounds (height or intensity range) for normalization.
Parameters
----------
data : np.ndarray
2D or 3D array of AFM image data.
zmin : float, "auto", or None
Lower bound: "auto" uses percentile, None uses data min, float uses value.
zmax : float or "auto"
Upper bound: "auto" uses percentile, None uses data max, float uses value.
lower_percentile : int, optional
Percentile to use for lower bound when zmin == "auto". Default is 1.
upper_percentile : int, optional
Percentile to use for upper bound when zmax == "auto". Default is 99.
Returns
-------
(float, float)
zmin and zmax values suitable for normalization.
Raises
------
ValueError
If zmin > zmax after processing or invalid input types.
"""
flat = data.ravel()
flat = flat[np.isfinite(flat)]
# Process zmin
if zmin == "auto":
zmin_val = np.percentile(flat, lower_percentile)
elif zmin is None:
zmin_val = np.min(flat)
else:
try:
zmin_val = float(zmin)
except (TypeError, ValueError):
raise ValueError("zmin must be a float, 'auto', or None.") from None
# Process zmax
if zmax == "auto":
zmax_val = np.percentile(flat, upper_percentile)
elif zmax is None:
zmax_val = np.max(flat)
else:
try:
zmax_val = float(zmax)
except (TypeError, ValueError):
raise ValueError("zmax must be a float, 'auto', or None.") from None
# Validation
if zmin_val > zmax_val:
raise ValueError("zmin must be less than or equal to zmax.") from None
logger.debug(f"[Z-scaling] zmin={zmin_val:.3f} nm, zmax={zmax_val:.3f} nm")
return zmin_val, zmax_val
[docs]
def make_json_safe(obj):
"""Recursively convert NumPy types and non-JSON objects into serializable ones."""
if isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, (np.integer, np.int64, np.int32)):
return int(obj)
elif isinstance(obj, (np.floating, np.float32, np.float64)):
return float(obj)
elif isinstance(obj, dict):
return {k: make_json_safe(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [make_json_safe(v) for v in obj]
elif isinstance(obj, tuple):
return tuple(make_json_safe(v) for v in obj)
elif callable(obj): # convert functions to their names
return obj.__name__
else:
return obj
[docs]
def decode_hdf5_attr(attr: bytes | str) -> str:
"""
Decode an attribute that may be bytes or a string.
Parameters
----------
attr : bytes or str
The attribute to decode.
Returns
-------
str
The decoded string.
"""
if isinstance(attr, bytes):
return attr.decode("utf-8")
return str(attr)