Skip to content

processor module

The processor module

cellSize(input, unit='km', meta=None)

Calculate pixel size (area), the input has to be in the projection of 'EPSG:4326'. If not, it can be reprojected by "project" function

Parameters:

Name Type Description Default
input

rasterio image or data array

required
unit ~AnyStr

string, default is "km", the unit to calculate area

'km'
meta Optional[~AnyStr]

optional dict, metadata in case input is data array

None

Examples:

img = raster.rast('./Sample_data/temperature.tif') cellArea = processor.cellSize(img, unit='km')

Returns:

Type Description
raster

a raster of area in selected unit.

Source code in geonate/processor.py
def cellSize(input, unit: AnyStr='km', meta: Optional[AnyStr]=None):
    '''
    Calculate pixel size (area), the input has to be in the projection of 'EPSG:4326'. If not, it can be reprojected by "project" function

    Parameters:
        input: rasterio image or data array
        unit: string, default is "km", the unit to calculate area
        meta: optional dict, metadata in case input is data array

    Example:
       img = raster.rast('./Sample_data/temperature.tif')
       cellArea = processor.cellSize(img, unit='km')    

    Returns:
        raster: a raster of area in selected unit.

    '''
    import rasterio
    import numpy as np
    from .common import array2raster

    ### Check input data
    if isinstance(input, rasterio.DatasetReader):
        if len(input.shape) == 2:
            dataset = input.read()
            meta = input.meta
        else:
            dataset = input.read(1)
            meta = input.meta
    elif isinstance(input, np.ndarray):
        if meta is None:
            raise ValueError('Please provide input metadata')
        else:
            if len(input) > 2:
                dataset = input[0, :, : ]
            else:
                dataset = input
            meta = meta
    else:
        raise ValueError('Input data is not supported')

    ### Read metadata
    transform = meta['transform']
    pix_width = transform[0]
    upper_X = transform[2]
    upper_Y = transform[5]
    rows = meta['height']
    cols = meta['width']
    lower_X = upper_X + transform[0] * cols
    lower_Y = upper_Y + transform[4] * rows

    lats = np.linspace(upper_Y, lower_Y, rows + 1)

    a = 6378137.0  # Equatorial radius
    b = 6356752.3142  # Polar radius

    # Degrees to radians
    lats = lats * np.pi/180

    # Intermediate vars
    e = np.sqrt(1-(b/a)**2)
    sinlats = np.sin(lats)
    zm = 1 - e * sinlats
    zp = 1 + e * sinlats

    # Distance between meridians
    q = pix_width/360

    # Compute areas for each latitude in square km
    areas_to_equator = np.pi * b**2 * ((2*np.arctanh(e*sinlats) / (2*e) + sinlats / (zp*zm))) / 10**6
    areas_between_lats = np.diff(areas_to_equator)
    areas_cells = np.abs(areas_between_lats) * q

    # Create empty array to store output
    cellArea = np.zeros_like(dataset, dtype=np.float32)

    # Assign estimated cell area to every pixel
    if len(cellArea.shape) == 2:
        for i in range(0, cellArea.shape[1]):
            cellArea[:, i] = areas_cells.flatten()
    else:
        for i in range(0, cellArea.shape[2]):
            cellArea[:, :, i] = areas_cells.flatten()

    ### Update metadata
    meta.update({'dtype': np.float32, 'count': 1})

    ### Convert unit (if applicable)
    if (unit.lower() == 'km') or (unit.lower() == 'kilometer'):
        outArea = cellArea
    elif (unit.lower() == 'm') or (unit.lower() == 'meter'):
        outArea = cellArea * 1_000_000
    elif (unit.lower() == 'ha') or (unit.lower() == 'hectare'):
        outArea = cellArea * 10_000

    # Output

    outArea = array2raster(cellArea, meta)

    return outArea, meta

crop(input, reference, invert=False, nodata=True)

Crops a raster file based on a reference shapefile or raster file. Optionally inverts the crop.

Parameters:

Name Type Description Default
input raster

The input raster file.

required
reference shapefile | raster

The reference shapefile (GeoDataFrame) or raster file (DatasetReader) to define the crop boundary.

required
invert bool

If True, inverts the crop to mask out the area within the boundary. Defaults to False.

False
nodata bool

If True, handles nodata values by converting the input to float32 and setting nodata to NaN. Defaults to True.

True

Returns:

Type Description
A clipped raster (raster)

The cropped raster file.

Source code in geonate/processor.py
def crop(input, reference, invert=False, nodata=True):
    """
    Crops a raster file based on a reference shapefile or raster file. Optionally inverts the crop.

    Args:
        input (raster): The input raster file.
        reference (shapefile | raster): The reference shapefile (GeoDataFrame) or raster file (DatasetReader) to define the crop boundary.
        invert (bool, optional): If True, inverts the crop to mask out the area within the boundary. Defaults to False.
        nodata (bool, optional): If True, handles nodata values by converting the input to float32 and setting nodata to NaN. Defaults to True.

    Returns:
        A clipped raster (raster): The cropped raster file.

    """
    import rasterio
    import geopandas as gpd
    import numpy as np
    from rasterio.transform import Affine
    from shapely.geometry import mapping
    from shapely.geometry import box
    from .common import array2raster

    # Condition to process nodata
    if nodata is True:
        # Convert datatype of input to float32 to store NA value
        arr = input.read().astype(np.float32)
        meta = input.meta
        meta.update({'dtype': np.float32})
        input_image = array2raster(arr, meta)
    else: 
        input_image = input

    ### Define boundary
    # Reference is shapefile
    if isinstance(reference, gpd.GeoDataFrame):
        minx, miny, maxx, maxy = reference.total_bounds
        # define box
        bbox = box(minx, miny, maxx, maxy)
        poly_bound = gpd.GeoDataFrame({'geometry': [bbox]}, crs=reference.crs)

    # Reference is raster
    elif isinstance(reference, rasterio.DatasetReader):
        minx, miny, maxx, maxy = reference.bounds
        # define box
        bbox = box(minx, miny, maxx, maxy)
        poly_bound = gpd.GeoDataFrame({'geometry': [bbox]}, crs=reference.crs)

    # Others
    else:
        raise ValueError('Reference data is not supported')   

    ### Invert crop
    #### Condition for nodata
    if nodata is True:
        if invert is True:
            clipped, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly_bound.geometry.apply(mapping), invert=True, nodata= np.nan)
        else:
            clipped, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly_bound.geometry.apply(mapping), crop=True, invert=False, nodata= np.nan)

        # Update metadata
        meta  = input.meta
        meta.update({
            'height': clipped.shape[1],
            'width': clipped.shape[2],
            'transform': geotranform,
            'dtype': np.float32,
            'nodata': np.nan
            })
    #
    else:
        if invert is True:
            clipped, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly_bound.geometry.apply(mapping), invert=True, nodata= 0)
        else:
            clipped, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly_bound.geometry.apply(mapping), crop=True, invert=False, nodata= 0)

        # Update metadata
        meta  = input.meta
        meta.update({
            'height': clipped.shape[1],
            'width': clipped.shape[2],
            'transform': geotranform,
            'nodata': 0
            })

    # Convert array to raster
    clipped_raster = array2raster(clipped, meta)

    return clipped_raster

extractValues(input, roi, field, dataframe=True, names=None, prefix=None, tail=True)

Extract pixel values from a raster image based on regions of interest (ROI) defined in a shapefile.

Parameters:

Name Type Description Default
input raster

Rasterio image as input

required
roi shapefile

Shapefile where GCP points are located, read by geopandas.

required
field AnyStr

Field name in shapefile GCP to extract label values, e.g., 'class'. But this field must store number instead of string.

required
dataframe bool

Whether to return a dataframe or separate X, y arrays. Defaults to True.

True
names list

Expected names for each column in the dataframe. Defaults to None.

None
prefix AnyStr

Prefix for each band name. Defaults to None.

None
tail bool

Whether to place the class value at the end or front of the dataframe. Defaults to True.

True

Returns:

Type Description
DataFrame or Tuple

Dataframe if dataframe=True, otherwise X and y arrays for training a model.

Source code in geonate/processor.py
def extractValues(input, roi, field, dataframe: Optional[bool]=True, names: Optional[list]=None, prefix: Optional[AnyStr]=None, tail=True):
    """
    Extract pixel values from a raster image based on regions of interest (ROI) defined in a shapefile.

    Args:
        input (raster): Rasterio image as input
        roi (shapefile): Shapefile where GCP points are located, read by geopandas.
        field (AnyStr): Field name in shapefile GCP to extract label values, e.g., 'class'. **But this field must store number instead of string**.
        dataframe (bool, optional): Whether to return a dataframe or separate X, y arrays. Defaults to True.
        names (list, optional): Expected names for each column in the dataframe. Defaults to None.
        prefix (AnyStr, optional): Prefix for each band name. Defaults to None.
        tail (bool, optional): Whether to place the class value at the end or front of the dataframe. Defaults to True.

    Returns:
        DataFrame or Tuple: Dataframe if dataframe=True, otherwise X and y arrays for training a model.

    """
    import os
    import rasterio
    from rasterio.plot import reshape_as_image
    import numpy as np
    from shapely.geometry import mapping
    import pandas as pd
    from .common import array2raster

    # *****************************************
    # Define input image
    # Other data type
    if not isinstance(input, rasterio.DatasetReader):
        raise ValueError('Input data is not supported')
    # Input is raster
    else:
        # Convert datatype of input to float32 to store NA value
        arr = input.read().astype(np.float32)
        meta = input.meta
        meta.update({'dtype': np.float32})
        input_image = array2raster(arr, meta)

    # *****************************************
    # Convert shapefile to shapely geometry
    geoms = roi.geometry.values

    # Extract some metadata information
    nbands = input_image.count
    dtype_X = np.float32()
    dtype_y = np.float32()

    # Create empty array to contain X and y arrays
    X = np.array([], dtype= dtype_X).reshape(0, nbands)
    y = np.array([], dtype= dtype_y)

    # Run loop over each features in shapefile to extract pixel values
    for index, geom in enumerate(geoms):
        poly = [mapping(geom)]

        # Crop image based on feature
        cropped, transform = rasterio.mask.mask(input_image, poly, crop=True, nodata=np.nan)

        # Reshape dataset in form of (values, bands)
        cropped_reshape = reshape_as_image(cropped)
        reshapped = cropped_reshape.reshape(-1, nbands)

        # Append 1D array y
        y = np.append(y, [roi[field][index]] * reshapped.shape[0])

        # vertical stack 2D array X
        X = np.vstack((X, reshapped))

    # Remove NA value from data
    data = np.hstack((X, y.reshape(y.shape[0], 1))).astype(np.float32)
    data_na = data[~np.isnan(data).any(axis=1)]
    data_nodata = data_na[~(data_na == np.nan).any(axis=1)]

    X_na = data_nodata[ :, 0:nbands]
    y_na = data_nodata[ : , nbands]

    # return dataframe
    if dataframe is True:
        y_na_reshape = y_na.reshape(-1,1)

        # class tail
        if tail is True:
            arr = np.hstack([X_na, y_na_reshape])
        else:
            arr = np.hstack([y_na_reshape, X_na])

        # Name is not given
        if names is None:
            if prefix is None:
                names_band = [f'B{i}' for i in range(1, input_image.count +1)]
                name_class = [str(field)]
                if tail is True:
                    names_list = names_band + name_class
                else:
                    names_list = name_class + names_band
            else:
                names_band = [f'{prefix}{i}' for i in range(1, input_image.count +1)]
                name_class = [str(field)]
                if tail is True:
                    names_list = names_band + name_class
                else:
                    names_list = name_class + names_band
            data = pd.DataFrame(arr, columns=names_list)            
            return data

        # Name is given
        else:
            if len(names) != (nbands + 1):
                raise ValueError('Length of name should be equal to number of bands plus 1')
            else:
                if prefix is None:
                    names_list = names
                else:
                    names_list = [f'{prefix}{name_i}' for name_i in names]
            data = pd.DataFrame(arr, columns=names_list)            
            return data

    # Do not return dataframe
    else:
        return X_na, y_na

layestack(input)

Stacks multiple raster files or rasterio DatasetReader objects into a single multi-band raster.

Parameters:

Name Type Description Default
input list

List of file paths to the input raster files or rasterio DatasetReader objects.

required

Returns:

Type Description
Stacked raster (raster)

Stacked raster image.

Source code in geonate/processor.py
def layestack(input):
    """
    Stacks multiple raster files or rasterio DatasetReader objects into a single multi-band raster.

    Parameters:
        input (list): List of file paths to the input raster files or rasterio DatasetReader objects.

    Returns:
        Stacked raster (raster): Stacked raster image.

    """
    import numpy as np
    from .geonate import rast
    from .common import array2raster, check_datatype_consistency, check_extension_consistency

    # Initialize some parameters and variables
    file2stack = []
    stacked_array = []
    nbands = len(input)

    consistency, datatype = check_datatype_consistency(input)

    # If input is list of file paths
    if (consistency is True) and datatype == "<class 'str'>":
        consistency_ext, extension = check_extension_consistency(input)

        # If input is a list of tif files
        if (consistency_ext is True) and (extension == 'tif'):
            # Stack each band 
            for i, bandi in enumerate(input):
                tmp = rast(input[i])
                ds = tmp.read(1) # Read each raster and read data array
                meta = tmp.meta 
                file2stack.append(ds) # stack each band in a list of data
        # Other data extension
        else:
            raise ValueError('Data type is not supported')

    # If input is local raster files 
    elif (consistency is True) and datatype == "<class 'rasterio.io.DatasetReader'>":
        # Stack each band 
        for i, bandi in enumerate(input):
            ds = input[i].read(1) # Read each band
            meta = bandi.meta 
            file2stack.append(ds) # stack each band in a list of data
    else:
        raise ValueError('Data type is not supported')    

    # convert list to array and update nbands
    stacked_array = np.stack(file2stack, axis=0) 
    meta.update({'count': nbands})

    # Convert array to raster
    stacked_image = array2raster(stacked_array, meta)

    return stacked_image

mask(input, reference, invert=False, nodata=True)

Masks a raster file based on a reference shapefile or raster file. Optionally inverts the mask.

Parameters:

Name Type Description Default
input raster

The input raster file.

required
reference shapefile | raster

The reference shapefile (GeoDataFrame) or raster file (DatasetReader) to define the mask boundary.

required
invert bool

If True, inverts the mask to mask out the area within the boundary. Defaults to False.

False
nodata bool

If True, handles nodata values by converting the input to float32 and setting nodata to NaN. Defaults to True.

True

Returns:

Type Description
A clipped and masked raster (raster)

The cropped and masked raster file.

Source code in geonate/processor.py
def mask(input, reference, invert=False, nodata=True):
    """ 
    Masks a raster file based on a reference shapefile or raster file. Optionally inverts the mask.

    Args:
        input (raster): The input raster file.
        reference (shapefile | raster): The reference shapefile (GeoDataFrame) or raster file (DatasetReader) to define the mask boundary.
        invert (bool, optional): If True, inverts the mask to mask out the area within the boundary. Defaults to False.
        nodata (bool, optional): If True, handles nodata values by converting the input to float32 and setting nodata to NaN. Defaults to True.

    Returns:
        A clipped and masked raster (raster): The cropped and masked raster file.

    """
    import numpy as np
    import rasterio
    import shapely
    from shapely.geometry import mapping
    import geopandas as gpd
    from .common import array2raster

    ##########################################
    #### Define boundary 
    if (isinstance(reference, gpd.GeoDataFrame)):
        poly = reference
        transform_poly = reference.transform
        crs_poly = reference.crs

    # Raster format
    elif isinstance(reference, rasterio.DatasetReader):
        ds_reference = reference.read(1)                                        # Extract only first band, transform, and crs
        transform_poly = reference.meta['transform']
        crs_poly = reference.meta['crs']

        # Create mask from all value different from Nodata
        masked = np.where(np.isnan(ds_reference), np.nan, 1)            # Replace values different than NA by 1
        masked_convert = masked.astype(np.float32)                             # Redefine data type of float32 to store n.nan

        # Create generator that yields (geometry, value) pairs for each shape found in the masked_convert
        shp = rasterio.features.shapes(masked_convert, mask= ~np.isnan(masked_convert), transform= transform_poly)
        poly = []
        values = []

        # Iterate over mask = 1, convert to shapely geometry and append 
        for shape, value in shp:
            if value == 1:
                poly.append(shapely.geometry.shape(shape))                         # convert a GeoJSON-like dictionary object into a Shapely geometry object
                values.append(value)

        # Create poly from mask
        poly = gpd.GeoDataFrame({'geometry': poly, 'value': values})
        poly.set_crs(crs_poly.to_string(), inplace=True)

    else:
        raise ValueError('Reference data is not supported')

    ##########################################
    ### Define nodata
    if nodata is True:
        # Convert datatype of input to float32 to store NA value
        arr = input.read().astype(np.float32)
        meta = input.meta
        meta.update({'dtype': np.float32})
        input_image = array2raster(arr, meta)

        ### Invert mask
        if invert is True:
            masked_img, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly.geometry.apply(mapping), crop=True, invert=True, nodata=np.nan)
        else:
            masked_img, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly.geometry.apply(mapping), crop=True, nodata= np.nan)

        meta  = input_image.meta
        meta.update({
            'height': masked_img.shape[1],
            'width': masked_img.shape[2],
            'transform': geotranform,
            'dtype': np.float32,
            'nodata': np.nan})

    else: 
        input_image = input

        ### Invert mask
        if invert is True:
            masked_img, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly.geometry.apply(mapping), crop=True, invert=True, nodata= 0)
        else:
            masked_img, geotranform = rasterio.mask.mask(dataset=input_image, shapes= poly.geometry.apply(mapping), crop=True, nodata= 0)

        meta  = input_image.meta
        meta.update({
            'height': masked_img.shape[1],
            'width': masked_img.shape[2],
            'transform': geotranform,
            'nodata': 0})

    ### Convert array back to raster         
    masked_raster = array2raster(masked_img, meta)

    return masked_raster

match(input, reference, method='near', nodata=None, **kwargs)

Match input image to the reference image in terms of projection, resolution, and bound extent.

Parameters:

Name Type Description Default
input raster

Rasterio objective needs to match the reference.

required
reference raster

Rasterio object taken as reference to match the input image.

required
method AnyStr

String defines resampling method (if applicable) to resample if having different resolution (Method similar to resample). Defaults to 'near'.

'near'
**kwargs optional

All parameters that can be passed to rasterio.warp.reproject function.

{}

Returns:

Type Description
raster

Matched raster image with the same projection, resolution, and extent as the reference image.

Source code in geonate/processor.py
def match(input, reference, method='near', nodata=None, **kwargs):
    """
    Match input image to the reference image in terms of projection, resolution, and bound extent.

    Args:
        input (raster): Rasterio objective needs to match the reference.
        reference (raster): Rasterio object taken as reference to match the input image.
        method (AnyStr, optional): String defines resampling method (if applicable) to resample if having different resolution (Method similar to resample). Defaults to 'near'.
        **kwargs (optional): All parameters that can be passed to rasterio.warp.reproject function.

    Returns:
        raster: Matched raster image with the same projection, resolution, and extent as the reference image.

    """
    import rasterio
    from rasterio import warp
    from rasterio.transform import from_bounds
    import numpy as np
    from .common import get_extent_local, array2raster

    # *****************************************
    ### Define input image
    # input is raster
    if isinstance(input, rasterio.DatasetReader):
        input_image = input.read()
        src_meta = input.meta
        src_crs = input.crs
        src_count = input.count
        src_transform = input.transform
        src_dtype = input.dtypes[0]
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    ### Define reference image
    if isinstance(reference, rasterio.DatasetReader):
        reference_image = reference.read()
        reference_meta = reference.meta
        reference_transform = reference.transform
        reference_width = reference.width
        reference_height = reference.height
        reference_crs = reference.crs
        reference_profile = reference.profile.copy()
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    # Update profile for output
    output_meta = src_meta.copy()
    output_meta.update({
        'crs': reference_crs,
        'transform': reference_transform,
        'width': reference_width,
        'height': reference_height, 
        'compress': 'lzw',
        'dtype': src_dtype
        })

    # Allocate destination array
    dst_data = np.empty(shape=(src_count, reference_height, reference_width), dtype= input.dtypes[0])

    # *****************************************
    # Resampling method
    if method.lower() == 'near' or method.lower() == 'nearest':
        resampleAlg = warp.Resampling.nearest
    elif method.lower() == 'mean' or method.lower() == 'average':
        resampleAlg = warp.Resampling.average
    elif method.lower() == 'max':
        resampleAlg = warp.Resampling.max
    elif method.lower() == 'min':
        resampleAlg = warp.Resampling.min
    elif (method.lower() == 'median') or (method.lower() == 'med'):
        resampleAlg = warp.Resampling.med
    elif method.lower() == 'mode':
        resampleAlg = warp.Resampling.mode
    elif method.lower() == 'q1':
        resampleAlg = warp.Resampling.q1
    elif method.lower() == 'q3':
        resampleAlg = warp.Resampling.q3
    elif method.lower() == 'rsm':
        resampleAlg = warp.Resampling.rms
    elif method.lower() == 'sum':
        resampleAlg = warp.Resampling.sum
    elif method.lower() == 'cubic':
        resampleAlg = warp.Resampling.cubic
    elif method.lower() == 'spline':
        resampleAlg = warp.Resampling.cubic_spline
    elif method.lower() == 'bilinear':
        resampleAlg = warp.Resampling.bilinear
    elif method.lower() == 'gauss':
        resampleAlg = warp.Resampling.gauss
    elif method.lower() == 'lanczos':
        resampleAlg = warp.Resampling.lanczos
    else:
        raise ValueError('The resampling method is not supported, available methods raster.Resampling.')

    # *****************************************
    # Reproject each band
    for i in range(src_count):
        warp.reproject(
            source= input_image[i],
            destination= dst_data[i],
            src_transform= src_transform,
            src_crs= src_crs,
            dst_transform= reference_transform,
            dst_crs= reference_crs,
            resampling= resampleAlg, **kwargs
        )

        # *****************************************
        # Mask out other values
        if (nodata is None):
            dst_data = dst_data
            output_meta.update({
                'dtype': input.dtypes[0]
                })
        elif (isinstance(nodata, (int, float))):
            dst_data = np.where(dst_data == nodata, np.nan, dst_data)
            dst_data = dst_data.astype(np.float32)

            output_meta.update({
                'dtype': np.float32,
                'nodata': np.nan
                })            
        else: 
            raise ValueError('NoData is not supported (int or float)')        

        # *****************************************
        # Convert to raster        
        match_raster = array2raster(dst_data, output_meta)

        return match_raster

match_boundary(input, reference, method='near', **kwargs)

Match input image to the reference image to return an image within the bigger boundary.

Parameters:

Name Type Description Default
input raster

Rasterio objective needs to match the reference.

required
reference raster

Rasterio object taken as reference to match the input image.

required
method AnyStr

String defines resampling method (if applicable) to resample if having different resolution (Method similar to resample). Defaults to 'near'.

'near'
**kwargs optional

All parameters that can be passed to rasterio.warp.reproject function.

{}

Returns:

Type Description
raster

Matched raster image with the same projection, resolution, and extent as the reference image.

Source code in geonate/processor.py
def match_boundary(input, reference, method='near', **kwargs):
    """
    Match input image to the reference image to return an image within the bigger boundary.

    Args:
        input (raster): Rasterio objective needs to match the reference.
        reference (raster): Rasterio object taken as reference to match the input image.
        method (AnyStr, optional): String defines resampling method (if applicable) to resample if having different resolution (Method similar to resample). Defaults to 'near'.
        **kwargs (optional): All parameters that can be passed to rasterio.warp.reproject function.

    Returns:
        raster: Matched raster image with the same projection, resolution, and extent as the reference image.

    """
    import rasterio
    from rasterio import warp
    from rasterio.transform import from_bounds
    import numpy as np
    from .common import get_extent_local, array2raster

    # *****************************************
    ### Define input image
    # input is raster
    if isinstance(input, rasterio.DatasetReader):
        input_image = input.read()
        meta = input.meta
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    ### Define reference image
    if isinstance(reference, rasterio.DatasetReader):
        reference_image = reference.read()
        meta_reference = reference.meta
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    ### Check CRS and Resolution
    if (meta["crs"] != meta_reference['crs']) or (meta['transform'][0] != meta_reference['transform'][0]):
        raise ValueError('Input and reference images have different Projection and Resolution')
    # If having the same CRS and Resolution
    else:

        # *****************************************
        # Get general extent from two images
        ext_input = get_extent_local(input)[0]
        ext_reference = get_extent_local(reference)[0]

        ext = ext_input
        ext = (
            min(ext[0], ext_reference[0]),
            min(ext[1], ext_reference[1]),
            max(ext[2], ext_reference[2]),
            max(ext[3], ext_reference[3])
            )

        # *****************************************
        # Calculate new height & width and new transform
        resolution = meta_reference['transform'][0]    
        width_new = int((ext[2]  - ext[0]) / resolution)
        height_new = int((ext[3] - ext[1]) / resolution)

        transform_new = from_bounds(ext[0], ext[1], ext[2], ext[3], width_new, height_new)

        # *****************************************
        # Resampling method
        if method.lower() == 'near' or method.lower() == 'nearest':
            resampleAlg = warp.Resampling.nearest
        elif method.lower() == 'mean' or method.lower() == 'average':
            resampleAlg = warp.Resampling.average
        elif method.lower() == 'max':
            resampleAlg = warp.Resampling.max
        elif method.lower() == 'min':
            resampleAlg = warp.Resampling.min
        elif (method.lower() == 'median') or (method.lower() == 'med'):
            resampleAlg = warp.Resampling.med
        elif method.lower() == 'mode':
            resampleAlg = warp.Resampling.mode
        elif method.lower() == 'q1':
            resampleAlg = warp.Resampling.q1
        elif method.lower() == 'q3':
            resampleAlg = warp.Resampling.q3
        elif method.lower() == 'rsm':
            resampleAlg = warp.Resampling.rms
        elif method.lower() == 'sum':
            resampleAlg = warp.Resampling.sum
        elif method.lower() == 'cubic':
            resampleAlg = warp.Resampling.cubic
        elif method.lower() == 'spline':
            resampleAlg = warp.Resampling.cubic_spline
        elif method.lower() == 'bilinear':
            resampleAlg = warp.Resampling.bilinear
        elif method.lower() == 'gauss':
            resampleAlg = warp.Resampling.gauss
        elif method.lower() == 'lanczos':
            resampleAlg = warp.Resampling.lanczos
        else:
            raise ValueError('The resampling method is not supported, available methods raster.Resampling.')

        # *****************************************
        # Reproject to match
        if len(input_image.shape) > 2:
            nbands = input_image.shape[0]
        else:
            nbands = 1
        # Run over each band
        matched = np.empty((nbands, height_new, width_new), dtype=np.float32)
        for band in range(0, nbands):
            if nbands <= 1:
                ds = input_image
            else:
                ds = input_image[band, : , : ]
            warp.reproject(source=ds, destination=matched[band, :, :], src_transform= meta['transform'], dst_transform= transform_new, src_crs=meta['crs'], dst_crs=meta_reference['crs'], resampling= resampleAlg, **kwargs)

        # *****************************************
        # Mask out other values
        match_masked = np.where(matched == 0, np.nan, matched)
        match_masked = match_masked.astype(np.float32)

        # *****************************************
        # Update metadata and Convert to raster
        meta_update = meta.copy()
        meta_update.update({
            'crs': meta_reference['crs'],
            'transform': transform_new,
            'width': width_new,
            'height': height_new,
            'dtype': np.float32
        })
        match_raster = array2raster(match_masked, meta_update)

        return match_raster

merge(input)

Merges multiple raster files into a single raster file by computing the average values at overlapped areas.

Parameters:

Name Type Description Default
input list

List of input raster files.

required

Returns:

Type Description
A merged raster (raster)

The merged raster file.

Source code in geonate/processor.py
def merge(input: list):
    """
    Merges multiple raster files into a single raster file by computing the average values at overlapped areas.

    Args:
        input (list): List of input raster files.

    Returns:
        A merged raster (raster): The merged raster file.

    """
    from rasterio import merge 
    from .common import array2raster

    # Initialize empty list to store all input files and stack input into it 
    merged_files = []
    for tmp in input:
        merged_files.append(tmp)

    # Compute sum and count numbers of images, and average values at overlapped areas
    mosaic_sum, out_trans = merge.merge(merged_files, method= merge.copy_sum)
    mosaic_count, out_trans = merge.merge(merged_files, method= merge.copy_count)
    mosaic_average = mosaic_sum / mosaic_count

    # Update metadata with new transform and image dimensions
    meta = merged_files[0].meta
    meta.update({"driver": "GTiff",
                            "height": mosaic_average.shape[1],
                            "width": mosaic_average.shape[2],
                            "transform": out_trans})

    # Convert array to raster file
    merged_raster = array2raster(mosaic_average, meta)

    return merged_raster    

mergeVRT(input, output, compress=True, silent=True)

Merge multiple geotif files using gdal VRT for better performance speed

Parameters:

Name Type Description Default
input list

List of input geotif files

required
output AnyStr

Path of output tif file

required
compress bool

Whether compress the output data or not. Defaults to True.

True
silent bool

Show or do not show file processing log. Defaults to True.

True

Returns:

Type Description
None

The function does not return any local variable. It writes raster file to local drive.

Source code in geonate/processor.py
def mergeVRT(input: AnyStr, output: AnyStr, compress: bool=True, silent=True):
    """Merge multiple geotif files using gdal VRT for better performance speed

    Args:
        input (list): List of input geotif files
        output (AnyStr): Path of output tif file
        compress (bool, optional): Whether compress the output data or not. Defaults to True.
        silent (bool, optional): Show or do not show file processing log. Defaults to True.

    Return:
        None: The function does not return any local variable. It writes raster file to local drive.

    """
    import os
    from osgeo import gdal
    #  Create a temp vrt file
    vrt_file = 'merged.vrt'

    if compress is True:
        vrt_options = gdal.BuildVRTOptions()
        gdal.BuildVRT(vrt_file, input, options=vrt_options)
        gdal.Translate(output, vrt_file, format='GTiff', creationOptions=['COMPRESS=LZW'])

    else:
        gdal.BuildVRT(vrt_file, input)
        gdal.Translate(output, vrt_file)

    os.remove(vrt_file)
    if silent is True:
        pass
    else:
        print(f"Finished merge raster files, the output is at {output}")

normalized(input)

Normalize raster data to rearrange raster values from 0 to 1

Parameters:

Name Type Description Default
input DatasetReader | np.ndarray

Rasterio image or data array.

required

Returns:

Type Description
raster | data array

Data array or raster depends on the input files.

Source code in geonate/processor.py
def normalized(input):
    """
    Normalize raster data to rearrange raster values from 0 to 1

    Args:
        input (DatasetReader | np.ndarray): Rasterio image or data array.

    Returns:
        raster | data array: Data array or raster depends on the input files.

    """
    import rasterio
    import numpy as np
    import pandas as pd
    from .common import array2raster

    ### Check input data
    if isinstance(input, rasterio.DatasetReader):
        dataset = input.read()
        meta = input.meta
    elif isinstance(input, np.ndarray):
        dataset = input
        meta = meta
    else:
        raise ValueError('Input data is not supported')

    ### Find max min values
    maxValue = np.nanmax(dataset)
    minValue = np.nanmin(dataset)

    ### Create empty data array to store output
    normalized = np.zeros_like(dataset, dtype=np.float32)

    ### Run normalization for each 
    for i in range(0, dataset.shape[0]):
        band = dataset[i, : , : ]
        band_norm = (band.astype(float)  - minValue) / (maxValue  - minValue)
        normalized[i, : , : ] = band_norm
        band_norm = None        # set to None after the iteration

    ### Define output
    if isinstance(input, rasterio.DatasetReader):
        meta.update({'dtype': np.float32})
        normalized_raster = array2raster(normalized, meta)
    else:
        normalized_raster = normalized

    return normalized_raster

normalizedDifference(input, band1, band2)

Calculate normalized difference index

Parameters:

Name Type Description Default
input Raster | Array

Rasterio object or data array, input with multiple bands.

required
band1 numeric

Order of the first band in the input.

required
band2 numeric

Order of the second band in the input.

required

Returns:

Type Description
raster | dataArray

Normalized difference result in raster or data array depending on input, containing all image pixel values

Source code in geonate/processor.py
def normalizedDifference(input, band1, band2):
    """
    Calculate normalized difference index

    Args:
        input (Raster | Array): Rasterio object or data array, input with multiple bands.
        band1 (numeric): Order of the first band in the input.
        band2 (numeric): Order of the second band in the input.

    Returns:
        raster | dataArray: Normalized difference result in raster or data array depending on input, containing all image pixel values

    """
    import numpy as np
    import rasterio
    from .common import array2raster

    # *****************************************
    # Define data input
    # input is raster
    if isinstance(input, rasterio.DatasetReader):
        dataset = input.read()
        meta = input.meta
    # input is array
    elif isinstance(input, np.ndarray):
        dataset = input
        meta = None
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    # Extract band values
    ds_band1 = dataset[band1+1, : , : ]
    ds_band2 = dataset[band2+1, : , : ]

    # *****************************************
    # Calculate index and Remove outliers, also
    normalized_index  = (ds_band1.astype(float) - ds_band2.astype(float)) / (ds_band1 + ds_band2)
    normalized_index = normalized_index.astype(np.float32)

    normalized_index[(normalized_index < -1) | (normalized_index > 1)] = np.nan

    # *****************************************
    # Define output 
    if isinstance(input, rasterio.DatasetReader):
        meta.update({'dtype': np.float32, 'count': 1})                                                  #  update datatype in metadata 
        normalized_output = array2raster(normalized_index, meta)
    elif isinstance(input, np.ndarray):
        normalized_output = normalized_index
    else:
        normalized_output = []

    return normalized_output

pca(input, n_component=3, **kwargs)

Perform Principal Component Analysis (PCA) on multispectral data for data Dimension Reduction.

Parameters:

Name Type Description Default
input rasterio.DatasetReader or np.ndarray

Multispectral input data. Can be a raster image or a numpy array.

required
n_component int

Number of principal components to compute. Default is 3.

3
**kwargs optional

All parameters in sklearn.decomposition.PCA()

{}

Returns:

Type Description
np.ndarray or rasterio.DatasetReader

PCA-transformed data in the same format as the input.

Source code in geonate/processor.py
def pca(input, n_component: int=3, **kwargs):
    """
    Perform Principal Component Analysis (PCA) on multispectral data for data Dimension Reduction.

    Args:
        input (rasterio.DatasetReader or np.ndarray): Multispectral input data. Can be a raster image or a numpy array.
        n_component (int): Number of principal components to compute. Default is 3.
        **kwargs (optional): All parameters in sklearn.decomposition.PCA()

    Returns:
        np.ndarray or rasterio.DatasetReader: PCA-transformed data in the same format as the input.

    """
    import numpy as np
    import rasterio
    from sklearn.decomposition import PCA
    from .common import array2raster, reshape_raster

    # Identify datatype and define input data
    # Raster image
    if isinstance(input, rasterio.DatasetReader):
        arr = input.read()
        height, width = input.shape
        nbands =  input.count
        meta = input.meta
    # Data Array
    elif isinstance(input, np.ndarray):
        if len(input.shape) < 3:
            raise ValueError('Input must be multispectral data (multi-band)')
        else:
            arr = input
            nbands, height, width = input.shape

    else: 
        raise ValueError('Input is not supported')

    # Reshape from raster to image format, and from 3D to 2D
    ds = reshape_raster(arr, mode='image')
    ds_reshaped = ds.reshape((-1, nbands)) # -1 means this dim will be determined by other dims

    # Define PCA model and fit the PCA model
    pca_model = PCA(n_components= n_component, **kwargs)
    pca_fit = pca_model.fit_transform(ds_reshaped, **kwargs)

    # Reshape the result to the new shape
    pca_img = pca_fit.reshape((height, width, n_component))

    # Reshape from image to raster format 
    pca_raster = reshape_raster(pca_img, mode='raster')

    # Return output based on input similar to input
    if isinstance(input, np.ndarray):
        return pca_raster

    # Convert to rasterio object
    elif isinstance(input, rasterio.DatasetReader):
        meta.update({'count': n_component})
        pca_rast = array2raster(pca_raster, meta)
        return pca_rast

reclassify(input, breakpoints, classes)

Reclassify image with discrete or continuous values

Parameters:

Name Type Description Default
input raster | array

Raster or data array input

required
breakpoints list

Number list, defines a breakpoint value for reclassifcation, e.g., [ -1, 0, 1]

required
classes list

Number list, define classes, number of classes equal number of breakpoints minus 1

required

Returns:

Type Description
raster | dataArray

Reclassified result in raster or data array depending on input, containing all image pixel values

Source code in geonate/processor.py
def reclassify(input, breakpoints, classes):
    """
    Reclassify image with discrete or continuous values

    Args:
        input (raster | array): Raster or data array input
        breakpoints (list): Number list, defines a breakpoint value for reclassifcation, e.g., [ -1, 0, 1]
        classes (list): Number list, define classes, number of classes equal number of breakpoints minus 1

    Returns:
        raster | dataArray: Reclassified result in raster or data array depending on input, containing all image pixel values

    """
    import rasterio
    import numpy as np
    from .common import array2raster

    # *****************************************
    # Check input data
    # input is raster
    if isinstance(input, rasterio.DatasetReader):
        if len(input.shape) == 2:
            dataset = input.read()
            meta = input.meta
        elif len(input.shape) == 3:
            if  input.shape[0] > 1:
                raise ValueError('Input data has more than one band')
            else:
                dataset = input.read(1)
                meta = input.meta
    # input is array
    elif isinstance(input, np.ndarray):
        if (len(input.shape)) > 2 and (input.shape[0] > 1):
            raise ValueError('Input data has more than one band')
        else:
            dataset = input
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    # Create unique values and empty data array to store reclassified result 
    uniques = np.unique(dataset)
    reclassified = np.zeros_like(dataset)

    # *****************************************
    # If image has discrete values
    if len(uniques) == len(classes): 
        if len(breakpoints) == len(classes):
            for i in range(len(classes)):
                reclassified[dataset == breakpoints[i]] = classes[i]
        elif len(breakpoints) == (len(classes)-1):
            for i in range(len(classes)):
                reclassified[(dataset >= breakpoints[i]) & (dataset < breakpoints[i+1])] = classes[i]
        else:
            raise ValueError('Number of classes must be equal to number of breakpoints minus 1')

    # If image has continuous values
    else:
        if len(breakpoints) == (len(classes)+1):
            for i in range(len(classes)):
                reclassified[(dataset >= breakpoints[i]) & (dataset < breakpoints[i+1])] = classes[i]
        else:
            raise ValueError('Number of classes must be equal to number of breakpoints minus 1')

    # *****************************************
    # Define output
    if isinstance(input, rasterio.DatasetReader):
        reclassified_raster = array2raster(reclassified, meta)
    else:
        reclassified_raster = reclassified

    return reclassified_raster

reproject(input, reference, method='near', res=None, **kwargs)

Reprojects and resamples a given raster image to a specified coordinate reference system (CRS) and resolution.

Parameters:

Name Type Description Default
input raster

The input raster image to be reprojected.

required
reference raster | shapefile

The reference CRS for reprojection. It can be a string (e.g., 'EPSG:4326') or a rasterio DatasetReader object. If None, the input CRS is used.

required
method AnyStr

The resampling method to use. Default is 'near'. Supported methods include 'nearest', 'average', 'max', 'min', 'median', 'mode', 'q1', 'q3', 'rms', 'sum', 'cubic', 'cubic_spline', 'bilinear', 'gauss', 'lanczos'.

'near'
res numeric

The output resolution. If None, the input resolution is used.

None
**kwargs optional

All parameters that can be passed to rasterio.warp.reproject function.

{}

Returns:

Type Description
raster

The reprojected raster image

Source code in geonate/processor.py
def reproject(input, reference, method: Optional[AnyStr]='near', res: Optional[float]=None, **kwargs):
    """
    Reprojects and resamples a given raster image to a specified coordinate reference system (CRS) and resolution.

    Args:
        input (raster): The input raster image to be reprojected.
        reference (raster | shapefile): The reference CRS for reprojection. It can be a string (e.g., 'EPSG:4326') or a rasterio DatasetReader object. If None, the input CRS is used.
        method (AnyStr, optional): The resampling method to use. Default is 'near'. Supported methods include 'nearest', 'average', 'max', 'min', 'median', 'mode', 'q1', 'q3', 'rms', 'sum', 'cubic', 'cubic_spline', 'bilinear', 'gauss', 'lanczos'.
        res (numeric, optional): The output resolution. If None, the input resolution is used.
        **kwargs (optional): All parameters that can be passed to rasterio.warp.reproject function.

    Returns:
        raster: The reprojected raster image

    """
    import numpy as np
    import rasterio
    from rasterio import warp
    from .common import array2raster

    # *********************************************
    # Define input image
    input_image = input.read()
    meta = input.meta
    left, bottom, right, top = input.bounds

    # *********************************************
    # Determine parameters and new transform
    # Reference string of EPSG
    if isinstance(reference, str):
        dst_crs = reference
        if res is None:
            raise ValueError('Please provide output resolution')
        else:
            xsize, ysize = res, res
        # Transform to new transform
        transform_new, width_new, height_new = warp.calculate_default_transform(src_crs=meta['crs'], dst_crs=dst_crs, \
                                                                                                                                                height=meta['height'], width=meta['width'], \
                                                                                                                                                resolution=(xsize, ysize), \
                                                                                                                                                left=left, bottom=bottom, right=right, top=top)
    # Take all paras from reference image
    elif isinstance(reference, rasterio.DatasetReader):
        dst_crs = reference.crs
        if res is None:
            xsize, ysize = reference.res
        else:
            xsize, ysize = res, res
        # Transform to new transform
        transform_new, width_new, height_new = warp.calculate_default_transform(src_crs=meta['crs'], dst_crs=dst_crs, \
                                                                                                                                                height=meta['height'], width=meta['width'], \
                                                                                                                                                resolution=(xsize, ysize), \
                                                                                                                                                left=left, bottom=bottom, right=right, top=top)
    # Other cases
    else:
        raise ValueError('Please define correct reference, it is CRS string or an image reference')

    # *******************************************
    # Update metadata
    meta_update = meta.copy()
    meta_update.update({
        'crs': dst_crs,
        'transform': transform_new,
        'width': width_new,
        'height': height_new,
    })
    # *******************************************
    # Resampling method
    if method.lower() == 'near' or method.lower() == 'nearest':
        resampleAlg = warp.Resampling.nearest
    elif method.lower() == 'mean' or method.lower() == 'average':
        resampleAlg = warp.Resampling.average
    elif method.lower() == 'max':
        resampleAlg = warp.Resampling.max
    elif method.lower() == 'min':
        resampleAlg = warp.Resampling.min
    elif (method.lower() == 'median') or (method.lower() == 'med'):
        resampleAlg = warp.Resampling.med
    elif method.lower() == 'mode':
        resampleAlg = warp.Resampling.mode
    elif method.lower() == 'q1':
        resampleAlg = warp.Resampling.q1
    elif method.lower() == 'q3':
        resampleAlg = warp.Resampling.q3
    elif method.lower() == 'rsm':
        resampleAlg = warp.Resampling.rms
    elif method.lower() == 'sum':
        resampleAlg = warp.Resampling.sum
    elif method.lower() == 'cubic':
        resampleAlg = warp.Resampling.cubic
    elif method.lower() == 'spline':
        resampleAlg = warp.Resampling.cubic_spline
    elif method.lower() == 'bilinear':
        resampleAlg = warp.Resampling.bilinear
    elif method.lower() == 'gauss':
        resampleAlg = warp.Resampling.gauss
    elif method.lower() == 'lanczos':
        resampleAlg = warp.Resampling.lanczos
    else:
        raise ValueError('The resampling method is not supported, available methods rasterio.warp.Resampling')

    # ***************************************
    # Running reproject 
    projected_array = np.empty((input_image.shape[0], height_new, width_new), dtype= meta['dtype'])
    for band in range(0, input_image.shape[0]):
        ds = input_image[band, : , : ]
        warp.reproject(source=ds, destination=projected_array[(band), :, :], \
                       src_transform= meta['transform'], dst_transform=transform_new, \
                        src_crs=meta['crs'], dst_crs=dst_crs, \
                        resampling= resampleAlg,
                        **kwargs)

    # *****************************************
    # Convert array back to raster
    reprojected = array2raster(projected_array, meta_update)

    return reprojected

resample(input, factor, mode='aggregate', method='near', **kwargs)

Resample raster image based on factor

Parameters:

Name Type Description Default
input DatasetReader

Input rasterio image.

required
factor numeric

Resampling factor compared to original image (e.g., 2, 4, 6).

required
mode str

Resample mode ["aggregate", "disaggregate"]. Defaults to 'aggregate'.

'aggregate'
method str

Resampling method (e.g., 'nearest', 'cubic', 'bilinear', 'average'). Defaults to 'near'.

'near'
**kwargs optional

All parameters that can be passed to rasterio.warp.reproject function.

{}

Returns:

Type Description
raster

Resampled raster image.

Source code in geonate/processor.py
def resample(input, factor, mode='aggregate', method='near', **kwargs):
    """
    Resample raster image based on factor

    Args:
        input (DatasetReader): Input rasterio image.
        factor (numeric): Resampling factor compared to original image (e.g., 2, 4, 6).
        mode (str, optional): Resample mode ["aggregate", "disaggregate"]. Defaults to 'aggregate'.
        method (str, optional): Resampling method (e.g., 'nearest', 'cubic', 'bilinear', 'average'). Defaults to 'near'.
        **kwargs (optional): All parameters that can be passed to rasterio.warp.reproject function.

    Returns:
        raster: Resampled raster image.        

    """
    import rasterio
    from rasterio import warp
    import numpy as np
    from .common import array2raster

    # *****************************************
    ### Define input image
    # input is raster
    if isinstance(input, rasterio.DatasetReader):
        dataset = input.read()
        meta = input.meta
        left, bottom, right, top = input.bounds
        nbands = input.count
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    #### Calculate new rows and columns
    if (mode.lower() == 'aggregate') or (mode.lower() == 'agg') or (mode.lower() == 'a'):
        new_height = meta['height'] // factor
        new_width = meta['width'] // factor

    elif (mode.lower() == 'disaggregate') or (mode.lower() == 'disagg') or (mode.lower() == 'd'):
        new_height = meta['height'] * factor
        new_width = meta['width'] * factor

    else:
        raise ValueError('Resample method is not supported ["aggregate", "disaggregate"]')

    # *****************************************
    # Calculate new transform
    transform_new, width, height = warp.calculate_default_transform(src_crs=meta['crs'], dst_crs=meta['crs'], width=new_width, height=new_height, left=left, bottom=bottom, right=right, top=top)

    # *****************************************
    # Resampling method
    if method.lower() == 'near' or method.lower() == 'nearest':
        resampleAlg = warp.Resampling.nearest
    elif method.lower() == 'mean' or method.lower() == 'average':
        resampleAlg = warp.Resampling.average
    elif method.lower() == 'max':
        resampleAlg = warp.Resampling.max
    elif method.lower() == 'min':
        resampleAlg = warp.Resampling.min
    elif (method.lower() == 'median') or (method.lower() == 'med'):
        resampleAlg = warp.Resampling.med
    elif method.lower() == 'mode':
        resampleAlg = warp.Resampling.mode
    elif method.lower() == 'q1':
        resampleAlg = warp.Resampling.q1
    elif method.lower() == 'q3':
        resampleAlg = warp.Resampling.q3
    elif method.lower() == 'rsm':
        resampleAlg = warp.Resampling.rms
    elif method.lower() == 'sum':
        resampleAlg = warp.Resampling.sum
    elif method.lower() == 'cubic':
        resampleAlg = warp.Resampling.cubic
    elif method.lower() == 'spline':
        resampleAlg = warp.Resampling.cubic_spline
    elif method.lower() == 'bilinear':
        resampleAlg = warp.Resampling.bilinear
    elif method.lower() == 'gauss':
        resampleAlg = warp.Resampling.gauss
    elif method.lower() == 'lanczos':
        resampleAlg = warp.Resampling.lanczos
    else:
        raise ValueError('The resampling method is not supported, available methods raster.Resampling.')

    # *****************************************
    # Define and Update the metadata for the destination raster
    metadata = meta.copy()
    metadata.update({
        'transform': transform_new,
        'width': new_width,
        'height': new_height, 
        'dtype': np.float32
    })

    # *****************************************
    # Run Resampling for each band
    resampled = np.empty((nbands, new_height, new_width), dtype=np.float32)

    for band in range(0, nbands):
        if nbands <= 1:
            ds = dataset
        else:
            ds = dataset[band, : , : ]

        warp.reproject(source=ds, destination=resampled[band, :, :], \
                       src_transform= meta['transform'], dst_transform= transform_new, \
                        src_crs=meta['crs'], dst_crs=input.crs, resampling= resampleAlg, **kwargs)

    # *****************************************
    # Convert array to raster 
    resampled_raster = array2raster(resampled, metadata)

    return resampled_raster

values(input, na_rm=True, names=None, prefix=None)

Extract all pixel values of image and create dataframe from them, each band is a column

Parameters:

Name Type Description Default
input raster | array

Rasterio raster image or data array.

required
na_rm bool

Remove or do not remove NA value from output dataframe. Defaults to True.

True
names list

Given expected names for each column in the dataframe, if not, default name will be assigned. Defaults to None.

None
prefix AnyStr

Given character before each band name. Defaults to None.

None

Returns:

Type Description
DataFrame

Dataframe stores all pixel values across all image bands.

Source code in geonate/processor.py
def values(input, na_rm: Optional[bool]=True, names: Optional[list]=None, prefix: Optional[AnyStr]=None):
    """
    Extract all pixel values of image and create dataframe from them, each band is a column

    Args:
        input (raster | array): Rasterio raster image or data array.
        na_rm (bool, optional): Remove or do not remove NA value from output dataframe. Defaults to True.
        names (list, optional): Given expected names for each column in the dataframe, if not, default name will be assigned. Defaults to None.
        prefix (AnyStr, optional): Given character before each band name. Defaults to None.

    Returns:
        DataFrame: Dataframe stores all pixel values across all image bands.

    """
    import rasterio
    import numpy as np
    import pandas as pd

    # *****************************************
    # Check input data
    # input is raster
    if isinstance(input, rasterio.DatasetReader):
        dataset = input.read()
    # input is array
    elif isinstance(input, np.ndarray):
        dataset = input
    # Other input
    else:
        raise ValueError('Input data is not supported')

    # *****************************************
    # Define parameters
    nbands = dataset.shape[0]
    bands_array = [dataset[band, : , : ].flatten() for band in range(0, nbands)]

    # *****************************************
    # Assign column names in case name is given 
    if names is not None:
        if len(names) != nbands:
            raise ValueError('Length of name should be equal to number of bands')
        else:
            if prefix is None:
                data = pd.DataFrame(np.array(bands_array).T, columns=names)
            else:
                names_new = [f'{prefix}{name}' for name in names]
                data = pd.DataFrame(np.array(bands_array).T, columns=names_new)
    # If name is not given
    else:
        if prefix is None:
            data = pd.DataFrame(np.array(bands_array).T, columns=[f'B{i}' for i in range(1,nbands +1)])
        else:
            data = pd.DataFrame(np.array(bands_array).T, columns=[f'{prefix}{i}' for i in range(1, nbands +1)])

    # *****************************************
    # Remove NA values or not
    if na_rm is True: 
        data_out = data.dropna().reset_index(drop=True)
    else:
        data_out = data

    return data_out