Source code for cupy_xarray.cog3pio
"""
`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory.
"""
import os
from collections.abc import Iterable
import cupy as cp # type: ignore[import-untyped]
import numpy as np
import xarray as xr
from cog3pio import CudaCogReader
from xarray.backends import BackendEntrypoint
# %%
[docs]
class Cog3pioBackendEntrypoint(BackendEntrypoint):
"""
Xarray backend to read GeoTIFF files using 'cog3pio' engine.
When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the optional
``device_id`` parameter can be set to the CUDA GPU id to do the decoding on.
Examples
--------
Read a GeoTIFF from a HTTP url into an [xarray.DataArray][]:
>>> import xarray as xr
>>> # Read GeoTIFF into an xarray.DataArray
>>> dataarray: xr.DataArray = xr.open_dataarray(
... filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte_zstd.tif",
... engine="cog3pio",
... device_id=0, # cuda:0
... )
>>> dataarray.sizes
Frozen({'band': 1, 'y': 20, 'x': 20})
>>> dataarray.dtype
dtype('uint8')
"""
description = "Use .tif files in Xarray"
open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id")
url = "https://github.com/weiji14/cog3pio"
def open_dataset( # type: ignore[override]
self,
filename_or_obj: str,
*,
drop_variables: str | Iterable[str] | None = None,
device_id: int | None = None,
# other backend specific keyword arguments
# `chunks` and `cache` DO NOT go here, they are handled by xarray
mask_and_scale=None,
) -> xr.Dataset:
"""
Backend open_dataset method used by Xarray in [xarray.open_dataset][].
Parameters
----------
filename_or_obj : str
File path or url to a TIFF (.tif) image file that can be read by the
nvTIFF or image-tiff backend library.
device_id : int | None
CUDA device ID on which to place the created cupy array. Default is None,
which means device_id will be inferred via
:py:func:`cupy.cuda.runtime.getDevice`.
Returns
-------
xarray.Dataset
"""
if device_id is None:
device_id: int = cp.cuda.runtime.getDevice()
with cp.cuda.Stream(ptds=True):
cog = CudaCogReader(path=filename_or_obj, device_id=device_id)
array_: cp.ndarray = cp.from_dlpack(cog) # 1-D Array
x_coords, y_coords = cog.xy_coords() # TODO consider using rasterix
height, width = (len(y_coords), len(x_coords))
channels: int = len(array_) // (height * width)
# TODO make API to get proper 3-D shape directly, or use cuTENSOR
array_ = array_.reshape(height, width, channels) # HWC
array = array_.transpose(2, 0, 1) # CHW
dataarray: xr.DataArray = xr.DataArray(
data=array,
coords={
"band": np.arange(channels, dtype=np.uint8),
"y": y_coords,
"x": x_coords,
},
name=None,
attrs=None,
)
return dataarray.to_dataset(name="raster")
def guess_can_open(self, filename_or_obj):
try:
_, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
return ext in {".tif", ".tiff"}