Source code for cupy_xarray.cog3pio

"""
`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory.
"""

import os
from collections.abc import Iterable

import cupy as cp
import numpy as np
import xarray as xr
from cog3pio import CudaCogReader
from xarray.backends import BackendEntrypoint


# %%
[docs] class Cog3pioBackendEntrypoint(BackendEntrypoint): """ Xarray backend to read GeoTIFF files using 'cog3pio' engine. When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the optional ``device_id`` parameter can be set to the CUDA GPU id to do the decoding on. Examples -------- Read a GeoTIFF from a HTTP url into an :class:`xarray.DataArray`: >>> import xarray as xr >>> # Read GeoTIFF into an xarray.DataArray >>> dataarray: xr.DataArray = xr.open_dataarray( ... filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte.tif", ... engine="cog3pio", ... device_id=0, # cuda:0 ... ) >>> dataarray.sizes Frozen({'band': 1, 'y': 20, 'x': 20}) >>> dataarray.dtype dtype('uint8') """ description = "Use .tif files in Xarray" open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id") url = "https://cog3pio.readthedocs.io/en/latest/" def open_dataset( self, filename_or_obj: str, *, drop_variables: str | Iterable[str] | None = None, device_id: int | None = None, # other backend specific keyword arguments # `chunks` and `cache` DO NOT go here, they are handled by xarray mask_and_scale=None, ) -> xr.Dataset: """ Backend open_dataset method used by Xarray in :py:func:`xarray.open_dataset`. Parameters ---------- filename_or_obj : str File path or url to a TIFF (.tif) image file that can be read by cog3pio's nvTIFF backend. device_id : int | None CUDA device ID on which to place the created cupy array. Default is None, which means device_id will be inferred via :py:func:`cupy.cuda.runtime.getDevice`. Returns ------- xarray.Dataset """ if device_id is None: device_id: int = cp.cuda.runtime.getDevice() with cp.cuda.Stream(ptds=True): cog = CudaCogReader(path=filename_or_obj, device_id=device_id) array_: cp.ndarray = cp.from_dlpack(cog) # 1-D Array x_coords, y_coords = cog.xy_coords() # TODO consider using rasterix height, width = (len(y_coords), len(x_coords)) channels: int = len(array_) // (height * width) # TODO make API to get proper 3-D shape directly, or use cuTENSOR array_ = array_.reshape(height, width, channels) # HWC array = array_.transpose(2, 0, 1) # CHW dataarray: xr.DataArray = xr.DataArray( data=array, coords={ "band": np.arange(channels, dtype=np.uint8), "y": y_coords, "x": x_coords, }, name=None, attrs=None, ) return dataarray.to_dataset(name="raster") def guess_can_open(self, filename_or_obj): try: _, ext = os.path.splitext(filename_or_obj) except TypeError: return False return ext in {".tif", ".tiff"}