Source code for cyto_dl.image.io.bioio_loader

import re
from typing import List

import numpy as np
from bioio import BioImage
from monai.data import MetaTensor
from monai.transforms import Transform

from cyto_dl.utils.arg_checking import get_dtype


[docs]class BioIOImageLoaderd(Transform):
    """Enumerates scenes and timepoints for dictionary with format.

    {path_key: path, channel_key: channel, scene_key: scene, timepoint_key: timepoint}.
    Differs from monai_bio_reader in that reading kwargs are passed in the dictionary, instead of fixed at
    initialization. The filepath will be saved in the dictionary as 'filename_or_obj' (with or without metadata depending on `include_meta_in_filename`).
    """

    def __init__(
        self,
        path_key: str = "path",
        scene_key: str = "scene",
        resolution_key: str = "resolution",
        kwargs_keys: List[str] = ["dimension_order_out", "C", "T"],
        out_key: str = "raw",
        allow_missing_keys=False,
        dtype: np.dtype = np.float16,
        dask_load: bool = True,
        include_meta_in_filename: bool = False,
    ):
        """
        Parameters
        ----------
        path_key : str = "path"
            Key for the path to the image
        scene_key : str = "scene"
            Key for the scene number
        kwargs_keys : List = ["dimension_order_out", "C", "T"]
            Keys for the kwargs to pass to BioImage.get_image_dask_data. Values in the csv can be comma separated list.
        out_key : str = "raw"
            Key for the output image
        allow_missing_keys : bool = False
            Whether to allow missing keys in the data dictionary
        dtype : np.dtype = np.float16
            Data type to cast the image to
        dask_load: bool = True
            Whether to use dask to load images. If False, full images are loaded into memory before extracting specified scenes/timepoints.
        include_meta_in_filename: bool = False
            Whether to include metadata in the filename. Useful when loading multi-dimensional images with different kwargs.
        """
        super().__init__()
        self.path_key = path_key
        self.kwargs_keys = kwargs_keys
        self.allow_missing_keys = allow_missing_keys
        self.out_key = out_key
        self.resolution_key = resolution_key
        self.scene_key = scene_key
        self.dtype = get_dtype(dtype)
        self.dask_load = dask_load
        self.include_meta_in_filename = include_meta_in_filename

[docs]    def split_args(self, arg):
        if isinstance(arg, str) and "," in arg:
            return list(map(int, arg.split(",")))
        return arg

    def _get_filename(self, path, kwargs):
        if self.include_meta_in_filename:
            path = path.split(".")[0] + "_" + "_".join([f"{k}_{v}" for k, v in kwargs.items()])
        # remove illegal characters from filename
        path = re.sub(r'[<>:"|?*]', "", path)
        return path

    def __call__(self, data):
        # copying prevents the dataset from being modified inplace - important when using partially cached datasets so that the memory use doesn't increase over time
        data = data.copy()
        if self.path_key not in data and not self.allow_missing_keys:
            raise KeyError(f"Missing key {self.path_key} in data dictionary")
        path = data[self.path_key]
        img = BioImage(path)
        if self.scene_key in data:
            img.set_scene(data[self.scene_key])
        if self.resolution_key in data:
            img.set_resolution_level(data[self.resolution_key])
        kwargs = {k: self.split_args(data[k]) for k in self.kwargs_keys if k in data}
        if self.dask_load:
            img = img.get_image_dask_data(**kwargs).compute()
        else:
            img = img.get_image_data(**kwargs)
        img = img.astype(self.dtype)
        if self.scene_key in data:
            kwargs["scene"] = data[self.scene_key]
        kwargs.update({"filename_or_obj": self._get_filename(path, kwargs)})

        data[self.out_key] = MetaTensor(img, meta=kwargs)
        return data