Source code for aicsimageio.readers.array_like_reader

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
from typing import Any, Dict, List, Optional, Tuple, Union

import dask.array as da
import numpy as np
import xarray as xr

from .. import constants, exceptions
from ..dimensions import (
    DEFAULT_DIMENSION_ORDER,
    DEFAULT_DIMENSION_ORDER_WITH_SAMPLES,
    DimensionNames,
)
from ..metadata import utils as metadata_utils
from ..types import MetaArrayLike, PhysicalPixelSizes
from .reader import Reader

###############################################################################

log = logging.getLogger(__name__)

###############################################################################


[docs]class ArrayLikeReader(Reader): """ A catch all for numpy, dask, or xarray to Reader interface. See Notes for more details. Parameters ---------- image: Union[List[MetaArrayLike], MetaArrayLike] A single, numpy ndarray, dask Array, or xarray DataArray, or list of many. If provided a list, each item in the list will be exposed through the scene API. If provided an xarray DataArray alone or as an element of the list, the dim_order and channel_names kwargs are ignored if there non-xarray default dimension (or channel coordinate) information attached the xarray object. The channel and dimension information are updated independent of the other. If either is using xarray default values, they will be replaced by AICSImageIO defaults will be added. dim_order: Optional[Union[List[str], str]] A string of dimensions to be applied to all array(s) or a list of string dimension names to be mapped onto the list of arrays provided to image. I.E. "TYX". Default: None (guess dimensions for single array or multiple arrays) channel_names: Optional[Union[List[str], List[List[str]]]] A list of string channel names to be applied to all array(s) or a list of lists of string channel names to be mapped onto the list of arrays provided to image. Default: None (create OME channel IDs for names for single or multiple arrays) physical_pixel_sizes: Optional[ Union[List[float], Dict[str, float], PhysicalPixelSizes] ] A specification of this image's physical pixel sizes. Can be provided as a list, dict or PhysicalPixelSizes object. If a list is passed, the assumed order is [Z, Y, X]. If a dict is passed, it must contain "Z", "Y" and "X" as keys. Default: None Raises ------ exceptions.ConflictingArgumentsError Raised when the number of scenes provided is different from the number of items provided to the metadata parameters if as a list. exceptions.ConflictingArgumentsError Raised when channel_names is provided but the channel dimension was either not guessed or not provided in dim_order. ValueError Provided dim_order string or channel_names are not the same length as the number of dimensions or the size of the channel dimensions for the array at the matching index. Notes ----- If you want to combine multiple numpy ndarrays or dask arrays with xarray DataArrays and attach metadata to all non-xarray.DataArrays, you can do so, the metadata for the xarray DataArray scenes will simply be ignored if it the non-xarray defaults. In such cases, it is recommended that you provide metadata for those scenes as None. I.E. >>> some_metadata_attached_xr = ... ... some_np = ... ... some_dask = ... ... reader = ArrayLikeReader( ... image=[some_metadata_attached_xr, some_np, some_dask], ... dim_order=[None, "CTYX", "ZTYX"], ... channel_names=[None, ["A", "B", C"], None], ... ) Will create a three scene ArrayLikeReader with the metadata and coordinate information filled xarray DataArray as the first scene, a numpy array with CTYX as the dimensions and ["A", "B", "C"] as the channel names, and a dask array with ZTYX as the dimensions and no channel names (as there is no channel dimension). """ @staticmethod def _is_supported_image( # type: ignore image: Union[List[MetaArrayLike], MetaArrayLike], *args: Any, **kwargs: Any ) -> bool: if isinstance(image, list): return all( isinstance(scene, (np.ndarray, da.Array, xr.DataArray)) for scene in image ) return isinstance(image, (np.ndarray, da.Array, xr.DataArray)) @staticmethod def _guess_dim_order(shape: Tuple[int, ...]) -> str: """ Given an image shape attempts to guess the dimension order. Parameters ---------- shape: Tuple[int, ...] Tuple of the image array's dimensions. Returns ------- dim_order: str The guessed dimension order. Raises ------ exceptions.InvalidDimensionOrderingError Raised when the shape has more than dimensions than the ArrayLikeReader has a default order available to support. """ if len(shape) > len(DEFAULT_DIMENSION_ORDER): if len(shape) > len(DEFAULT_DIMENSION_ORDER_WITH_SAMPLES): raise exceptions.InvalidDimensionOrderingError( "Unable to guess dimension order for array-like images with " + f"more than {len(DEFAULT_DIMENSION_ORDER_WITH_SAMPLES)} " + f"dimensions. Found {len(shape)} dimensions." ) return DEFAULT_DIMENSION_ORDER_WITH_SAMPLES[ len(DEFAULT_DIMENSION_ORDER_WITH_SAMPLES) - len(shape) : ] return DEFAULT_DIMENSION_ORDER[len(DEFAULT_DIMENSION_ORDER) - len(shape) :] def __init__( self, image: Union[List[MetaArrayLike], MetaArrayLike], dim_order: Optional[Union[List[str], str]] = None, channel_names: Optional[Union[List[str], List[List[str]]]] = None, physical_pixel_sizes: Optional[ Union[List[float], Dict[str, float], PhysicalPixelSizes] ] = None, **kwargs: Any, ): # Enforce valid image if not self._is_supported_image(image): raise exceptions.UnsupportedFileFormatError( self.__class__.__name__, str(type(image)) ) # General note # Any time we do a `channel_names[0]` it's because the type check for # channel names is a List[List[str]], so by checking the first element we should # be getting back a list or a string. Anything else will error. # The result of all of this validation and pre-compute work is that at the end # of this init, we should have n-number of constructed xarray objects created # from the parameters provided that we then just pass-through to for all other # standard Reader operations # If metadata is attached as lists # Enforcing matching shape if isinstance(image, list): if isinstance(dim_order, list): # Check dim order if len(dim_order) != len(image): raise exceptions.ConflictingArgumentsError( f"ArrayLikeReader received a list of arrays to use as scenes " f"but the provided list of dimension order strings is of " f"different length. " f"Number of provided scenes: {len(image)}, " f"Number of provided dimension order strings: " f"{len(dim_order)}" ) # Check channel names if channel_names is not None: if isinstance(channel_names[0], list): if len(channel_names) != len(image): raise exceptions.ConflictingArgumentsError( f"ArrayLikeReader received a list of arrays to use as " f"scenes but the provided list of channel names is " f"of different length. " f"Number of provided scenes: {len(image)}, " f"Number of provided channel names: " f"{len(channel_names)}" ) # If metadata is attached as singles # but many scenes provided, expand if isinstance(image, list): if dim_order is None or isinstance(dim_order, str): dim_order = [dim_order for i in range(len(image))] # type: ignore if channel_names is None or isinstance(channel_names[0], str): channel_names = [ # type: ignore channel_names for i in range(len(image)) ] # Set all kwargs to lists for standard interface if not isinstance(image, list): image = [image] if not isinstance(dim_order, list): dim_order = [dim_order] # type: ignore if channel_names is None: channel_names = [channel_names] # type: ignore # Also wrap the channel names list if they were provided # but only a single scene was elif len(image) == 1 and not isinstance(channel_names[0], list): channel_names = [channel_names] # type: ignore # Store image(s) self._all_scenes = image # Validate and store dims self._scene_dims_list = [] for i, dims_string in enumerate(dim_order): this_scene = self._all_scenes[i] # Provided None, guess if dims_string is None: # Get dims from xarray if isinstance(this_scene, xr.DataArray): # Guess for xarray when they guessed # http://xarray.pydata.org/en/stable/data-structures.html?highlight=dim_n#creating-a-dataarray # See "dim_N" if this_scene.dims[0] == "dim_0": log.debug( "Overrode the default xarray dimensions for AICSImage " "guessed dimensions." ) # Rename the dimensions from "dim_N" to just the guess dim # Update scene list in place self._all_scenes[i] = this_scene.rename( { f"dim_{d_index}": d for d_index, d in enumerate( self._guess_dim_order(this_scene.shape) ) } ) # Guess for non xarray data self._scene_dims_list.append(self._guess_dim_order(this_scene.shape)) # Provided Some, validate else: if len(dims_string) == len(this_scene.shape): # Update dims for xarray if isinstance(this_scene, xr.DataArray): # Guess for xarray when they guessed # http://xarray.pydata.org/en/stable/data-structures.html?highlight=dim_n#creating-a-dataarray # See "dim_N" if this_scene.dims[0] == "dim_0": log.debug( "Overrode the default xarray dimensions for AICSImage " "provided dimensions." ) # Rename the dimensions from "dim_N" to just the guess dim # Update scene list in place self._all_scenes[i] = this_scene.rename( { f"dim_{d_index}": d for d_index, d in enumerate(dims_string) } ) self._scene_dims_list.append(dims_string) else: raise ValueError( f"Provided dimension string does not have the same amount of " f"dimensions as the matching provided array. " f"Provided array shape: {this_scene.shape}, " f"Provided dimension string: {dims_string}" ) # Validate and store channel_names self._scene_channel_names = [] for s_index, this_scene_channel_names in enumerate( channel_names # type: ignore ): this_scene = self._all_scenes[s_index] this_scene_dims = self._scene_dims_list[s_index] # Create channel names is needed if DimensionNames.Channel in this_scene_dims: channel_dim_index = this_scene_dims.index(DimensionNames.Channel) # Handle xarray missing channel names if isinstance(this_scene, xr.DataArray): if DimensionNames.Channel not in this_scene.coords: # Use provided if this_scene_channel_names is not None: this_scene.coords[ DimensionNames.Channel ] = this_scene_channel_names # Generate else: set_channel_names = [] for c_index in range(this_scene.shape[channel_dim_index]): image_id = metadata_utils.generate_ome_image_id(s_index) set_channel_names.append( metadata_utils.generate_ome_channel_id( image_id=image_id, channel_id=c_index ) ) this_scene.coords[ DimensionNames.Channel ] = set_channel_names # Provided None, generate if this_scene_channel_names is None: this_scene_channels = [] for c_index in range(this_scene.shape[channel_dim_index]): image_id = metadata_utils.generate_ome_image_id(s_index) this_scene_channels.append( metadata_utils.generate_ome_channel_id( image_id=image_id, channel_id=c_index ) ) self._scene_channel_names.append(this_scene_channels) # Provided Some, validate else: if ( len(this_scene_channel_names) != this_scene.shape[channel_dim_index] ): raise ValueError( f"Provided channel names list does not match the size of " f"channel dimension for the provided array. " f"Provided array shape: {this_scene.shape}, " f"Channel dimension size: " f"{this_scene.shape[channel_dim_index]}, " f"Provided channel names: {this_scene_channel_names}" ) else: self._scene_channel_names.append(this_scene_channel_names) # Raise error when channel names were provided when they shouldn't have been else: if this_scene_channel_names is not None: raise ValueError( f"Received channel names for array without channel dimension. " f"Provided array shape: {this_scene.shape}, " f"Provided (or guessed) dimensions: {this_scene_dims}, " f"Provided channel names: {this_scene_channel_names}" ) else: self._scene_channel_names.append(this_scene_channel_names) # Construct full xarrays # All data arrays in this list are dask Arrays self._xr_darrays = [] for scene_data, dims, this_scene_channel_names in zip( self._all_scenes, self._scene_dims_list, self._scene_channel_names ): # Handle simple case of provided a DataArray if isinstance(scene_data, xr.DataArray): # Set metadata to point at existing attrs scene_data.attrs[constants.METADATA_UNPROCESSED] = scene_data.attrs # If the data backing the xarray is dask # just append if isinstance(scene_data.data, da.Array): self._xr_darrays.append(scene_data) # If the data backing the xarray is numpy # copy the array but use dask from numpy else: self._xr_darrays.append( scene_data.copy(data=da.from_array(scene_data.data)) ) # Handle non-xarray cases else: dims_list = list(dims) coords = {} if DimensionNames.Channel in dims_list: coords[DimensionNames.Channel] = this_scene_channel_names # Handle dask if isinstance(scene_data, np.ndarray): scene_data = da.from_array(scene_data) # Append the dask backed array self._xr_darrays.append( xr.DataArray( data=scene_data, dims=dims_list, coords=coords, attrs={constants.METADATA_UNPROCESSED: None}, ) ) if isinstance(physical_pixel_sizes, PhysicalPixelSizes): self._physical_pixel_sizes = physical_pixel_sizes elif isinstance(physical_pixel_sizes, (list, tuple)): self._physical_pixel_sizes = PhysicalPixelSizes(*physical_pixel_sizes) elif isinstance(physical_pixel_sizes, dict): self._physical_pixel_sizes = PhysicalPixelSizes(**physical_pixel_sizes) else: self._physical_pixel_sizes = PhysicalPixelSizes(None, None, None) @property def scenes(self) -> Tuple[str, ...]: if self._scenes is None: self._scenes = tuple( metadata_utils.generate_ome_image_id(i) for i in range(len(self._all_scenes)) ) return self._scenes def _read_delayed(self) -> xr.DataArray: return self._xr_darrays[self.current_scene_index] def _read_immediate(self) -> xr.DataArray: return self._xr_darrays[self.current_scene_index].copy( data=self._xr_darrays[self.current_scene_index].data.compute() ) @property def physical_pixel_sizes(self) -> PhysicalPixelSizes: """ Returns ------- sizes: PhysicalPixelSizes Using available metadata, the floats representing physical pixel sizes for dimensions Z, Y, and X. Notes ----- We currently do not handle unit attachment to these values. Please see the file metadata for unit information. """ return self._physical_pixel_sizes