classify module¶

Classification module.

`Gaussian_Naive_Bayes` ¶

A class to implement Gaussian Naive Bayes classifier for training, tuning, and classification tasks.

Source code in geonate/classify.py

class Gaussian_Naive_Bayes:
    """
    A class to implement Gaussian Naive Bayes classifier for training, tuning, and classification tasks.

    """
    def __init__(self, X_train, y_train, X_test, y_test):
        """
        Initialize the Gaussian_Naive_Bayes class with training and testing data.

        Args:
            X_train (array-like): Training feature data.
            y_train (array-like): Training target data.
            X_test (array-like): Testing feature data.
            y_test (array-like): Testing target data.

        """
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.initial_gnb = None
        self.tuned_gnb = None

        # Automatically run the initial model 
        self.model()

    # Initial model and validation
    def model(self, **kwargs):
        """
        Train and validate the initial Gaussian Naive Bayes model.

        Args:
            **kwargs: Additional keyword arguments for GaussianNB.

        Returns:
            GaussianNB: The trained Gaussian Naive Bayes model.

        """
        from sklearn.naive_bayes import GaussianNB
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        # Initialize model and fit the model
        gnb = GaussianNB(**kwargs)
        gnb.fit(self.X_train, self.y_train)

        # Validate the initial model and return validation metrics
        y_pred = gnb.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, y_pred)
        self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
        self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.classification_report = classification_report(self.y_test, y_pred)

        self.initial_gnb = gnb
        return self.initial_gnb

    # Tune the best parameters for classifier using random search or grid search methods
    def tune(self, method="random", var_smoothing=[1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0], n_iter=5, cv=5, n_job=-1):
        """
        Tune the Gaussian Naive Bayes model using random search or grid search methods.

        Args:
            method (str): The tuning method to use ('random' or 'grid').
            var_smoothing (list): List of var_smoothing values to try.
            n_iter (int): Number of iterations for random search.
            cv (int): Number of cross-validation folds.
            n_job (int): Number of jobs to run in parallel.

        Returns:
            The tuned Gaussian Naive Bayes model.

        """
        from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        paras = [{
            'var_smoothing': var_smoothing
        }]

        if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
            random_searched = RandomizedSearchCV(estimator= self.initial_gnb, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
            random_searched.fit(self.X_train, self.y_train)
            tuned_model = random_searched
            self.tuned_gnb = tuned_model

        elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
            grid_search = GridSearchCV(estimator= self.initial_gnb, param_grid= paras, cv=cv, scoring='accuracy', verbose=True, n_jobs=-1)
            grid_search.fit(self.X_train, self.y_train)

            tuned_model = grid_search
            self.tuned_gnb = tuned_model

        else:
            raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

        # Validate the initial model and return validation metrics
        tuned_y_pred = tuned_model.predict(self.X_test)
        self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

        return self.tuned_gnb

    # Classify image 
    def classify(self, src, model=None):
        """
        Classify an image using the Gaussian Naive Bayes model.

        Args:
            src (rasterio.DatasetReader): The source image to classify.
            model (GaussianNB, optional): The model to use for classification. If None, the tuned model or initial model will be used.

        Returns:
            The classified image.

        """
        import rasterio
        from geonate.common import reshape_raster, array2raster

        # Define the random forest model to use
        if model is not None:
            GNB_model = model
        else: 
            GNB_model = self.tuned_gnb if self.tuned_gnb is not None else self.initial_gnb

        # Define input parameters
        if not isinstance(src, rasterio.DatasetReader):
            raise ValueError('Source image is not supported')
        else: 
            src_meta = src.meta
            nbands = src.count
            src_height = src.height
            src_width = src.width            
            src_rast = src.read()

            # Reshape and flatten data
            src_img = reshape_raster(src_rast, mode='image')
            ds = src_img.reshape((-1, nbands))

            # Predict labels using the defined model
            pred_labels = GNB_model.predict(ds)

            # Reshape data and convert to raster format
            pred_result = pred_labels.reshape(src_height, src_width)

            src_meta.update({'count': 1})
            classified = array2raster(pred_result, metadata=src_meta)            

            return classified

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

Initialize the Gaussian_Naive_Bayes class with training and testing data.

Parameters:

Name	Type	Description	Default
`X_train`	`array-like`	Training feature data.	required
`y_train`	`array-like`	Training target data.	required
`X_test`	`array-like`	Testing feature data.	required
`y_test`	`array-like`	Testing target data.	required

Source code in geonate/classify.py

def __init__(self, X_train, y_train, X_test, y_test):
    """
    Initialize the Gaussian_Naive_Bayes class with training and testing data.

    Args:
        X_train (array-like): Training feature data.
        y_train (array-like): Training target data.
        X_test (array-like): Testing feature data.
        y_test (array-like): Testing target data.

    """
    self.X_train = X_train
    self.y_train = y_train
    self.X_test = X_test
    self.y_test = y_test
    self.initial_gnb = None
    self.tuned_gnb = None

    # Automatically run the initial model 
    self.model()

`classify(self, src, model=None)` ¶

Classify an image using the Gaussian Naive Bayes model.

Parameters:

Name	Type	Description	Default
`src`	`rasterio.DatasetReader`	The source image to classify.	required
`model`	`GaussianNB`	The model to use for classification. If None, the tuned model or initial model will be used.	`None`

Returns:

Type	Description
	The classified image.

Source code in geonate/classify.py

def classify(self, src, model=None):
    """
    Classify an image using the Gaussian Naive Bayes model.

    Args:
        src (rasterio.DatasetReader): The source image to classify.
        model (GaussianNB, optional): The model to use for classification. If None, the tuned model or initial model will be used.

    Returns:
        The classified image.

    """
    import rasterio
    from geonate.common import reshape_raster, array2raster

    # Define the random forest model to use
    if model is not None:
        GNB_model = model
    else: 
        GNB_model = self.tuned_gnb if self.tuned_gnb is not None else self.initial_gnb

    # Define input parameters
    if not isinstance(src, rasterio.DatasetReader):
        raise ValueError('Source image is not supported')
    else: 
        src_meta = src.meta
        nbands = src.count
        src_height = src.height
        src_width = src.width            
        src_rast = src.read()

        # Reshape and flatten data
        src_img = reshape_raster(src_rast, mode='image')
        ds = src_img.reshape((-1, nbands))

        # Predict labels using the defined model
        pred_labels = GNB_model.predict(ds)

        # Reshape data and convert to raster format
        pred_result = pred_labels.reshape(src_height, src_width)

        src_meta.update({'count': 1})
        classified = array2raster(pred_result, metadata=src_meta)            

        return classified

`model(self, **kwargs)` ¶

Train and validate the initial Gaussian Naive Bayes model.

Parameters:

Name	Type	Description	Default
`**kwargs`		Additional keyword arguments for GaussianNB.	`{}`

Returns:

Type	Description
`GaussianNB`	The trained Gaussian Naive Bayes model.

Source code in geonate/classify.py

def model(self, **kwargs):
    """
    Train and validate the initial Gaussian Naive Bayes model.

    Args:
        **kwargs: Additional keyword arguments for GaussianNB.

    Returns:
        GaussianNB: The trained Gaussian Naive Bayes model.

    """
    from sklearn.naive_bayes import GaussianNB
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    # Initialize model and fit the model
    gnb = GaussianNB(**kwargs)
    gnb.fit(self.X_train, self.y_train)

    # Validate the initial model and return validation metrics
    y_pred = gnb.predict(self.X_test)
    self.accuracy = accuracy_score(self.y_test, y_pred)
    self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
    self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.classification_report = classification_report(self.y_test, y_pred)

    self.initial_gnb = gnb
    return self.initial_gnb

`tune(self, method='random', var_smoothing=[1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0], n_iter=5, cv=5, n_job=-1)` ¶

Tune the Gaussian Naive Bayes model using random search or grid search methods.

Parameters:

Name	Type	Description	Default
`method`	`str`	The tuning method to use ('random' or 'grid').	`'random'`
`var_smoothing`	`list`	List of var_smoothing values to try.	`[1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0]`
`n_iter`	`int`	Number of iterations for random search.	`5`
`cv`	`int`	Number of cross-validation folds.	`5`
`n_job`	`int`	Number of jobs to run in parallel.	`-1`

Returns:

Type	Description
	The tuned Gaussian Naive Bayes model.

Source code in geonate/classify.py

def tune(self, method="random", var_smoothing=[1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0], n_iter=5, cv=5, n_job=-1):
    """
    Tune the Gaussian Naive Bayes model using random search or grid search methods.

    Args:
        method (str): The tuning method to use ('random' or 'grid').
        var_smoothing (list): List of var_smoothing values to try.
        n_iter (int): Number of iterations for random search.
        cv (int): Number of cross-validation folds.
        n_job (int): Number of jobs to run in parallel.

    Returns:
        The tuned Gaussian Naive Bayes model.

    """
    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    paras = [{
        'var_smoothing': var_smoothing
    }]

    if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
        random_searched = RandomizedSearchCV(estimator= self.initial_gnb, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
        random_searched.fit(self.X_train, self.y_train)
        tuned_model = random_searched
        self.tuned_gnb = tuned_model

    elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
        grid_search = GridSearchCV(estimator= self.initial_gnb, param_grid= paras, cv=cv, scoring='accuracy', verbose=True, n_jobs=-1)
        grid_search.fit(self.X_train, self.y_train)

        tuned_model = grid_search
        self.tuned_gnb = tuned_model

    else:
        raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

    # Validate the initial model and return validation metrics
    tuned_y_pred = tuned_model.predict(self.X_test)
    self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

    return self.tuned_gnb

`KNN` ¶

A class that encapsulates a K-Nearest Neighbors (KNN) model for classification tasks, including model training, hyperparameter tuning using grid or random search, and classification of image data.

Attributes:

Name	Type	Description
`X_train`	`ndarray or DataFrame`	The training features for model fitting.
`y_train`	`ndarray or Series`	The training labels for model fitting.
`X_test`	`ndarray or DataFrame`	The test features for model validation.
`y_test`	`ndarray or Series`	The test labels for model validation.
`initial_knn`	`KNeighborsClassifier`	The initial KNN model (untuned).
`tuned_knn`	`KNeighborsClassifier`	The KNN model after tuning using grid or random search.
`accuracy`	`float`	Accuracy of the initial (naive) KNN model.
`confusion_matrix`	`ndarray`	Confusion matrix of the initial (naive) KNN model.
`confusion_matrix_percent`	`ndarray`	Percent-based confusion matrix for the initial (naive) model.
`tuned_accuracy`	`float`	Accuracy of the tuned KNN model.
`tuned_confusion_matrix`	`ndarray`	Confusion matrix of the tuned KNN model.
`tuned_confusion_matrix_percent`	`ndarray`	Percent-based confusion matrix for the tuned model.

Source code in geonate/classify.py

class KNN:
    """
    A class that encapsulates a K-Nearest Neighbors (KNN) model for classification tasks, including model training,
    hyperparameter tuning using grid or random search, and classification of image data.

    Attributes:
        X_train (ndarray or DataFrame): The training features for model fitting.
        y_train (ndarray or Series): The training labels for model fitting.
        X_test (ndarray or DataFrame): The test features for model validation.
        y_test (ndarray or Series): The test labels for model validation.
        initial_knn (KNeighborsClassifier, optional): The initial KNN model (untuned).
        tuned_knn (KNeighborsClassifier, optional): The KNN model after tuning using grid or random search.
        accuracy (float, optional): Accuracy of the initial (naive) KNN model.
        confusion_matrix (ndarray, optional): Confusion matrix of the initial (naive) KNN model.
        confusion_matrix_percent (ndarray, optional): Percent-based confusion matrix for the initial (naive) model.
        tuned_accuracy (float, optional): Accuracy of the tuned KNN model.
        tuned_confusion_matrix (ndarray, optional): Confusion matrix of the tuned KNN model.
        tuned_confusion_matrix_percent (ndarray, optional): Percent-based confusion matrix for the tuned model.

    """
    def __init__(self, X_train, y_train, X_test, y_test):
        """
        Initializes the KNN class with the provided training and testing data.

        Args:
            X_train (ndarray or DataFrame): The training features for model fitting.
            y_train (ndarray or Series): The training labels for model fitting.
            X_test (ndarray or DataFrame): The test features for model validation.
            y_test (ndarray or Series): The test labels for model validation.

        """
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.initial_knn = None
        self.tuned_knn = None

        # Automatically run the initial model 
        self.model()

    # Initial model and validation
    def model(self, **kwargs):
        """
        Trains and validates the initial KNN model.

        Args:
            **kwargs: Additional keyword arguments for the KNeighborsClassifier model.

        Returns:
            KNeighborsClassifier: The trained KNN model.

        """
        from sklearn.neighbors import KNeighborsClassifier
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        # Initialize model and fit the model
        knn = KNeighborsClassifier(**kwargs)
        knn.fit(self.X_train, self.y_train)

        # Validate the initial model and return validation metrics
        y_pred = knn.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, y_pred)
        self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
        self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.classification_report = classification_report(self.y_test, y_pred)

        self.initial_knn = knn
        return self.initial_knn

    # Tune the best parameters for classifier using random search or grid search methods
    def tune(self, method="random", n_neighbors=[3, 5, 7, 9, 11], weights= ['uniform', 'distance'],n_iter=5, cv=5, n_job=-1):
        """
        Tunes the best parameters for the KNN classifier using random search or grid search methods.

        Args:
            method (str): The tuning method to use ('random' or 'grid'). Default is 'random'.
            n_neighbors (list): List of values for the number of neighbors to use. Default is [3, 5, 7, 9, 11].
            weights (list): List of weight functions used in prediction. Default is ['uniform', 'distance'].
            n_iter (int): Number of parameter settings that are sampled in random search. Default is 5.
            cv (int): Number of cross-validation folds. Default is 5.
            n_jobs (int): Number of jobs to run in parallel. Default is -1 (use all processors).

        Returns:
            KNeighborsClassifier: The tuned KNN model.

        """
        from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        paras = [{
            'n_neighbors': n_neighbors,
            'weights': weights
        }]

        if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
            random_searched = RandomizedSearchCV(estimator= self.initial_knn, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
            random_searched.fit(self.X_train, self.y_train)
            tuned_model = random_searched
            self.tuned_knn = tuned_model

        elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
            grid_search = GridSearchCV(estimator= self.initial_knn, param_grid= paras, cv=cv, verbose=True, n_jobs=-1, scoring='accuracy')
            grid_search.fit(self.X_train, self.y_train)

            tuned_model = grid_search
            self.tuned_knn = tuned_model

        else:
            raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

        # Validate the initial model and return validation metrics
        tuned_y_pred = tuned_model.predict(self.X_test)
        self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

        return self.tuned_knn

    # Classify image 
    def classify(self, src, model=None):
        """
        Classifies an image using the trained or tuned KNN model.

        Args:
            src (rasterio.DatasetReader): A rasterio object representing the image to classify.
            model (KNeighborsClassifier, optional): Trained KNN model to classify the image. If None, uses the tuned KNN model if available, otherwise the naive KNN model.

        Returns:
            rasterio.DatasetReader: The classified image as a raster object.

        """
        import rasterio
        from geonate.common import reshape_raster, array2raster

        # Define the model to use
        if model is not None:
            KNN_model = model
        else: 
            KNN_model = self.tuned_knn if self.tuned_knn is not None else self.initial_knn

        # Define input parameters
        if not isinstance(src, rasterio.DatasetReader):
            raise ValueError('Source image is not supported')
        else: 
            src_meta = src.meta
            nbands = src.count
            src_height = src.height
            src_width = src.width            
            src_rast = src.read()

            # Reshape and flatten data
            src_img = reshape_raster(src_rast, mode='image')
            ds = src_img.reshape((-1, nbands))

            # Predict labels using the define model
            pred_labels = KNN_model.predict(ds)

            # Reshape data and convert to raster format
            pred_result = pred_labels.reshape(src_height, src_width)

            src_meta.update({'count': 1})
            classified = array2raster(pred_result, metadata=src_meta)            

            return classified

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

Initializes the KNN class with the provided training and testing data.

Parameters:

Name	Type	Description	Default
`X_train`	`ndarray or DataFrame`	The training features for model fitting.	required
`y_train`	`ndarray or Series`	The training labels for model fitting.	required
`X_test`	`ndarray or DataFrame`	The test features for model validation.	required
`y_test`	`ndarray or Series`	The test labels for model validation.	required

Source code in geonate/classify.py

def __init__(self, X_train, y_train, X_test, y_test):
    """
    Initializes the KNN class with the provided training and testing data.

    Args:
        X_train (ndarray or DataFrame): The training features for model fitting.
        y_train (ndarray or Series): The training labels for model fitting.
        X_test (ndarray or DataFrame): The test features for model validation.
        y_test (ndarray or Series): The test labels for model validation.

    """
    self.X_train = X_train
    self.y_train = y_train
    self.X_test = X_test
    self.y_test = y_test
    self.initial_knn = None
    self.tuned_knn = None

    # Automatically run the initial model 
    self.model()

`classify(self, src, model=None)` ¶

Classifies an image using the trained or tuned KNN model.

Parameters:

Name	Type	Description	Default
`src`	`rasterio.DatasetReader`	A rasterio object representing the image to classify.	required
`model`	`KNeighborsClassifier`	Trained KNN model to classify the image. If None, uses the tuned KNN model if available, otherwise the naive KNN model.	`None`

Returns:

Type	Description
`rasterio.DatasetReader`	The classified image as a raster object.

Source code in geonate/classify.py

def classify(self, src, model=None):
    """
    Classifies an image using the trained or tuned KNN model.

    Args:
        src (rasterio.DatasetReader): A rasterio object representing the image to classify.
        model (KNeighborsClassifier, optional): Trained KNN model to classify the image. If None, uses the tuned KNN model if available, otherwise the naive KNN model.

    Returns:
        rasterio.DatasetReader: The classified image as a raster object.

    """
    import rasterio
    from geonate.common import reshape_raster, array2raster

    # Define the model to use
    if model is not None:
        KNN_model = model
    else: 
        KNN_model = self.tuned_knn if self.tuned_knn is not None else self.initial_knn

    # Define input parameters
    if not isinstance(src, rasterio.DatasetReader):
        raise ValueError('Source image is not supported')
    else: 
        src_meta = src.meta
        nbands = src.count
        src_height = src.height
        src_width = src.width            
        src_rast = src.read()

        # Reshape and flatten data
        src_img = reshape_raster(src_rast, mode='image')
        ds = src_img.reshape((-1, nbands))

        # Predict labels using the define model
        pred_labels = KNN_model.predict(ds)

        # Reshape data and convert to raster format
        pred_result = pred_labels.reshape(src_height, src_width)

        src_meta.update({'count': 1})
        classified = array2raster(pred_result, metadata=src_meta)            

        return classified

`model(self, **kwargs)` ¶

Trains and validates the initial KNN model.

Parameters:

Name	Type	Description	Default
`**kwargs`		Additional keyword arguments for the KNeighborsClassifier model.	`{}`

Returns:

Type	Description
`KNeighborsClassifier`	The trained KNN model.

Source code in geonate/classify.py

def model(self, **kwargs):
    """
    Trains and validates the initial KNN model.

    Args:
        **kwargs: Additional keyword arguments for the KNeighborsClassifier model.

    Returns:
        KNeighborsClassifier: The trained KNN model.

    """
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    # Initialize model and fit the model
    knn = KNeighborsClassifier(**kwargs)
    knn.fit(self.X_train, self.y_train)

    # Validate the initial model and return validation metrics
    y_pred = knn.predict(self.X_test)
    self.accuracy = accuracy_score(self.y_test, y_pred)
    self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
    self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.classification_report = classification_report(self.y_test, y_pred)

    self.initial_knn = knn
    return self.initial_knn

`tune(self, method='random', n_neighbors=[3, 5, 7, 9, 11], weights=['uniform', 'distance'], n_iter=5, cv=5, n_job=-1)` ¶

Tunes the best parameters for the KNN classifier using random search or grid search methods.

Parameters:

Name	Type	Description	Default
`method`	`str`	The tuning method to use ('random' or 'grid'). Default is 'random'.	`'random'`
`n_neighbors`	`list`	List of values for the number of neighbors to use. Default is [3, 5, 7, 9, 11].	`[3, 5, 7, 9, 11]`
`weights`	`list`	List of weight functions used in prediction. Default is ['uniform', 'distance'].	`['uniform', 'distance']`
`n_iter`	`int`	Number of parameter settings that are sampled in random search. Default is 5.	`5`
`cv`	`int`	Number of cross-validation folds. Default is 5.	`5`
`n_jobs`	`int`	Number of jobs to run in parallel. Default is -1 (use all processors).	required

Returns:

Type	Description
`KNeighborsClassifier`	The tuned KNN model.

Source code in geonate/classify.py

def tune(self, method="random", n_neighbors=[3, 5, 7, 9, 11], weights= ['uniform', 'distance'],n_iter=5, cv=5, n_job=-1):
    """
    Tunes the best parameters for the KNN classifier using random search or grid search methods.

    Args:
        method (str): The tuning method to use ('random' or 'grid'). Default is 'random'.
        n_neighbors (list): List of values for the number of neighbors to use. Default is [3, 5, 7, 9, 11].
        weights (list): List of weight functions used in prediction. Default is ['uniform', 'distance'].
        n_iter (int): Number of parameter settings that are sampled in random search. Default is 5.
        cv (int): Number of cross-validation folds. Default is 5.
        n_jobs (int): Number of jobs to run in parallel. Default is -1 (use all processors).

    Returns:
        KNeighborsClassifier: The tuned KNN model.

    """
    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    paras = [{
        'n_neighbors': n_neighbors,
        'weights': weights
    }]

    if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
        random_searched = RandomizedSearchCV(estimator= self.initial_knn, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
        random_searched.fit(self.X_train, self.y_train)
        tuned_model = random_searched
        self.tuned_knn = tuned_model

    elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
        grid_search = GridSearchCV(estimator= self.initial_knn, param_grid= paras, cv=cv, verbose=True, n_jobs=-1, scoring='accuracy')
        grid_search.fit(self.X_train, self.y_train)

        tuned_model = grid_search
        self.tuned_knn = tuned_model

    else:
        raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

    # Validate the initial model and return validation metrics
    tuned_y_pred = tuned_model.predict(self.X_test)
    self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

    return self.tuned_knn

`RandomForest` ¶

A class that encapsulates a Random Forest model for classification tasks, including model training, hyperparameter tuning using grid or random search, and classification of image data.

Attributes:

Name	Type	Description
`X_train`	`ndarray or DataFrame`	The training features for model fitting.
`y_train`	`ndarray or Series`	The training labels for model fitting.
`X_test`	`ndarray or DataFrame`	The test features for model validation.
`y_test`	`ndarray or Series`	The test labels for model validation.
`initial_rf`	`RandomForestClassifier`	The initial Random Forest model (untuned).
`tuned_rf`	`RandomForestClassifier`	The Random Forest model after tuning using grid or random search.
`accuracy`	`float`	Accuracy of the initial (naive) Random Forest model.
`confusion_matrix`	`ndarray`	Confusion matrix of the initial (naive) Random Forest model.
`confusion_matrix_percent`	`ndarray`	Percent-based confusion matrix for the initial (naive) model.
`tuned_accuracy`	`float`	Accuracy of the tuned Random Forest model.
`tuned_confusion_matrix`	`ndarray`	Confusion matrix of the tuned Random Forest model.
`tuned_confusion_matrix_percent`	`ndarray`	Percent-based confusion matrix for the tuned model.

Source code in geonate/classify.py

class RandomForest:
    """
    A class that encapsulates a Random Forest model for classification tasks, including model training,
    hyperparameter tuning using grid or random search, and classification of image data.

    Attributes:
        X_train (ndarray or DataFrame): The training features for model fitting.
        y_train (ndarray or Series): The training labels for model fitting.
        X_test (ndarray or DataFrame): The test features for model validation.
        y_test (ndarray or Series): The test labels for model validation.
        initial_rf (RandomForestClassifier, optional): The initial Random Forest model (untuned).
        tuned_rf (RandomForestClassifier, optional): The Random Forest model after tuning using grid or random search.
        accuracy (float, optional): Accuracy of the initial (naive) Random Forest model.
        confusion_matrix (ndarray, optional): Confusion matrix of the initial (naive) Random Forest model.
        confusion_matrix_percent (ndarray, optional): Percent-based confusion matrix for the initial (naive) model.
        tuned_accuracy (float, optional): Accuracy of the tuned Random Forest model.
        tuned_confusion_matrix (ndarray, optional): Confusion matrix of the tuned Random Forest model.
        tuned_confusion_matrix_percent (ndarray, optional): Percent-based confusion matrix for the tuned model.

    """
    def __init__(self, X_train, y_train, X_test, y_test):
        """
        Initializes the RandomForest class with the provided training and testing data.

        Args:
            X_train (ndarray or DataFrame): The training features for model fitting.
            y_train (ndarray or Series): The training labels for model fitting.
            X_test (ndarray or DataFrame): The test features for model validation.
            y_test (ndarray or Series): The test labels for model validation.
        """
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.initial_rf = None
        self.tuned_rf = None

        # Automatically run the initial model 
        self.model()

    # Initial model and validation
    def model(self, n_estimators=100,**kwargs):
        """
        Trains a random forest classifier with the provided hyperparameters and validates it.

        Args:
            n_estimators (int, optional): The number of trees in the forest. Default is 100.
            **kwargs: Additional keyword arguments passed to the RandomForestClassifier.

        Returns:
            RandomForestClassifier: The trained (naive) random forest classifier.

        """
        from sklearn.ensemble import RandomForestClassifier
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        # Initialize model and fit the model
        rf = RandomForestClassifier(n_estimators= n_estimators, **kwargs)
        rf.fit(self.X_train, self.y_train)

        # Validate the initial model and return validation metrics
        y_pred = rf.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, y_pred)
        self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
        self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.classification_report = classification_report(self.y_test, y_pred)

        self.initial_rf = rf
        return self.initial_rf

    # Tune the best parameters for classifier using random search or grid search methods
    def tune(self, method="random", n_estimators=[100, 200, 300, 500, 1000], max_depth=[None, 10, 20, 30, 50], min_samples_split=[2, 5, 10, 20], min_samples_leaf=[1, 2, 3, 5], max_features= ['sqrt'], n_iter=5, cv=5, n_job=-1):
        """
        Tunes the Random Forest model's hyperparameters using grid or random search.

        Args:
            method (str, optional): The method used for hyperparameter search. Can be 'random' or 'grid'. Default is 'random'.
            n_estimators (list, optional): List of values for the number of trees to search over. Default is [100, 200, 300, 500, 1000].
            max_depth (list, optional): List of values for the maximum depth of trees. Default is [None].
            min_samples_split (list, optional): List of values for the minimum number of samples required to split an internal node. Default is [2, 5, 10, 20].
            min_samples_leaf (list, optional): List of values for the minimum number of samples required to be at a leaf node. Default is [1, 2, 3, 5].
            max_features (list, optional): List of values for the number of features to consider when looking for the best split. Default is ['sqrt'].
            n_iter (int, optional): The number of iterations for RandomizedSearchCV. Default is 3.
            cv (int, optional): Cross-validation generator or an iterable. Default is 3.
            n_jobs (int, optional): The number of jobs to run in parallel. Default is -1 (use all processors).

        Returns:
            RandomForestClassifier: The tuned Random Forest classifier.

        """
        from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        paras = [{
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'max_features': max_features
        }]

        if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
            random_searched = RandomizedSearchCV(estimator= self.initial_rf, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
            random_searched.fit(self.X_train, self.y_train)
            tuned_model = random_searched
            self.tuned_rf = tuned_model

        elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
            grid_search = GridSearchCV(estimator= self.initial_rf, param_grid= paras, cv=cv, scoring='accuracy', verbose=True, n_jobs=-1)
            grid_search.fit(self.X_train, self.y_train)

            tuned_model = grid_search
            self.tuned_rf = tuned_model

        else:
            raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

        # Validate the initial model and return validation metrics
        tunded_y_pred = tuned_model.predict(self.X_test)
        self.tuned_accuracy = accuracy_score(self.y_test, tunded_y_pred)
        self.tuned_confusion_matrix = confusion_matrix(self.y_test, tunded_y_pred)
        self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.tuned_classification_report = classification_report(self.y_test, tunded_y_pred)

        return self.tuned_rf

    # Classify image 
    def classify(self, src, model=None):
        """
        Classifies an image using the trained or tuned Random Forest model.

        Args:
            src (rasterio.DatasetReader): A rasterio object representing the image to classify.
            model (object): trained Random forest model to classify image.

        Returns:
            rasterio.DatasetReader: The classified image as a raster object.

        """
        import rasterio
        from .common import reshape_raster, array2raster

        # Define the model to use
        if model is not None:
            RF_model = model
        else: 
            RF_model = self.tuned_rf if self.tuned_rf is not None else self.initial_rf

        # Define input parameters
        if not isinstance(src, rasterio.DatasetReader):
            raise ValueError('Source image is not supported')
        else: 
            src_meta = src.meta
            nbands = src.count
            src_height = src.height
            src_width = src.width            
            src_rast = src.read()

            # Reshape and flatten data
            src_img = reshape_raster(src_rast, mode='image')
            ds = src_img.reshape((-1, nbands))

            # Predict labels using the define model
            pred_labels = RF_model.predict(ds)

            # Reshape data and convert to raster format
            pred_result = pred_labels.reshape(src_height, src_width)

            src_meta.update({'count': 1})
            classified = array2raster(pred_result, metadata=src_meta)            

            return classified

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

Initializes the RandomForest class with the provided training and testing data.

Parameters:

Name	Type	Description	Default
`X_train`	`ndarray or DataFrame`	The training features for model fitting.	required
`y_train`	`ndarray or Series`	The training labels for model fitting.	required
`X_test`	`ndarray or DataFrame`	The test features for model validation.	required
`y_test`	`ndarray or Series`	The test labels for model validation.	required

Source code in geonate/classify.py

def __init__(self, X_train, y_train, X_test, y_test):
    """
    Initializes the RandomForest class with the provided training and testing data.

    Args:
        X_train (ndarray or DataFrame): The training features for model fitting.
        y_train (ndarray or Series): The training labels for model fitting.
        X_test (ndarray or DataFrame): The test features for model validation.
        y_test (ndarray or Series): The test labels for model validation.
    """
    self.X_train = X_train
    self.y_train = y_train
    self.X_test = X_test
    self.y_test = y_test
    self.initial_rf = None
    self.tuned_rf = None

    # Automatically run the initial model 
    self.model()

`classify(self, src, model=None)` ¶

Classifies an image using the trained or tuned Random Forest model.

Parameters:

Name	Type	Description	Default
`src`	`rasterio.DatasetReader`	A rasterio object representing the image to classify.	required
`model`	`object`	trained Random forest model to classify image.	`None`

Returns:

Type	Description
`rasterio.DatasetReader`	The classified image as a raster object.

Source code in geonate/classify.py

def classify(self, src, model=None):
    """
    Classifies an image using the trained or tuned Random Forest model.

    Args:
        src (rasterio.DatasetReader): A rasterio object representing the image to classify.
        model (object): trained Random forest model to classify image.

    Returns:
        rasterio.DatasetReader: The classified image as a raster object.

    """
    import rasterio
    from .common import reshape_raster, array2raster

    # Define the model to use
    if model is not None:
        RF_model = model
    else: 
        RF_model = self.tuned_rf if self.tuned_rf is not None else self.initial_rf

    # Define input parameters
    if not isinstance(src, rasterio.DatasetReader):
        raise ValueError('Source image is not supported')
    else: 
        src_meta = src.meta
        nbands = src.count
        src_height = src.height
        src_width = src.width            
        src_rast = src.read()

        # Reshape and flatten data
        src_img = reshape_raster(src_rast, mode='image')
        ds = src_img.reshape((-1, nbands))

        # Predict labels using the define model
        pred_labels = RF_model.predict(ds)

        # Reshape data and convert to raster format
        pred_result = pred_labels.reshape(src_height, src_width)

        src_meta.update({'count': 1})
        classified = array2raster(pred_result, metadata=src_meta)            

        return classified

`model(self, n_estimators=100, **kwargs)` ¶

Trains a random forest classifier with the provided hyperparameters and validates it.

Parameters:

Name	Type	Description	Default
`n_estimators`	`int`	The number of trees in the forest. Default is 100.	`100`
`**kwargs`		Additional keyword arguments passed to the RandomForestClassifier.	`{}`

Returns:

Type	Description
`RandomForestClassifier`	The trained (naive) random forest classifier.

Source code in geonate/classify.py

def model(self, n_estimators=100,**kwargs):
    """
    Trains a random forest classifier with the provided hyperparameters and validates it.

    Args:
        n_estimators (int, optional): The number of trees in the forest. Default is 100.
        **kwargs: Additional keyword arguments passed to the RandomForestClassifier.

    Returns:
        RandomForestClassifier: The trained (naive) random forest classifier.

    """
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    # Initialize model and fit the model
    rf = RandomForestClassifier(n_estimators= n_estimators, **kwargs)
    rf.fit(self.X_train, self.y_train)

    # Validate the initial model and return validation metrics
    y_pred = rf.predict(self.X_test)
    self.accuracy = accuracy_score(self.y_test, y_pred)
    self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
    self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.classification_report = classification_report(self.y_test, y_pred)

    self.initial_rf = rf
    return self.initial_rf

`tune(self, method='random', n_estimators=[100, 200, 300, 500, 1000], max_depth=[None, 10, 20, 30, 50], min_samples_split=[2, 5, 10, 20], min_samples_leaf=[1, 2, 3, 5], max_features=['sqrt'], n_iter=5, cv=5, n_job=-1)` ¶

Tunes the Random Forest model's hyperparameters using grid or random search.

Parameters:

Name	Type	Description	Default
`method`	`str`	The method used for hyperparameter search. Can be 'random' or 'grid'. Default is 'random'.	`'random'`
`n_estimators`	`list`	List of values for the number of trees to search over. Default is [100, 200, 300, 500, 1000].	`[100, 200, 300, 500, 1000]`
`max_depth`	`list`	List of values for the maximum depth of trees. Default is [None].	`[None, 10, 20, 30, 50]`
`min_samples_split`	`list`	List of values for the minimum number of samples required to split an internal node. Default is [2, 5, 10, 20].	`[2, 5, 10, 20]`
`min_samples_leaf`	`list`	List of values for the minimum number of samples required to be at a leaf node. Default is [1, 2, 3, 5].	`[1, 2, 3, 5]`
`max_features`	`list`	List of values for the number of features to consider when looking for the best split. Default is ['sqrt'].	`['sqrt']`
`n_iter`	`int`	The number of iterations for RandomizedSearchCV. Default is 3.	`5`
`cv`	`int`	Cross-validation generator or an iterable. Default is 3.	`5`
`n_jobs`	`int`	The number of jobs to run in parallel. Default is -1 (use all processors).	required

Returns:

Type	Description
`RandomForestClassifier`	The tuned Random Forest classifier.

Source code in geonate/classify.py

def tune(self, method="random", n_estimators=[100, 200, 300, 500, 1000], max_depth=[None, 10, 20, 30, 50], min_samples_split=[2, 5, 10, 20], min_samples_leaf=[1, 2, 3, 5], max_features= ['sqrt'], n_iter=5, cv=5, n_job=-1):
    """
    Tunes the Random Forest model's hyperparameters using grid or random search.

    Args:
        method (str, optional): The method used for hyperparameter search. Can be 'random' or 'grid'. Default is 'random'.
        n_estimators (list, optional): List of values for the number of trees to search over. Default is [100, 200, 300, 500, 1000].
        max_depth (list, optional): List of values for the maximum depth of trees. Default is [None].
        min_samples_split (list, optional): List of values for the minimum number of samples required to split an internal node. Default is [2, 5, 10, 20].
        min_samples_leaf (list, optional): List of values for the minimum number of samples required to be at a leaf node. Default is [1, 2, 3, 5].
        max_features (list, optional): List of values for the number of features to consider when looking for the best split. Default is ['sqrt'].
        n_iter (int, optional): The number of iterations for RandomizedSearchCV. Default is 3.
        cv (int, optional): Cross-validation generator or an iterable. Default is 3.
        n_jobs (int, optional): The number of jobs to run in parallel. Default is -1 (use all processors).

    Returns:
        RandomForestClassifier: The tuned Random Forest classifier.

    """
    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    paras = [{
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'max_features': max_features
    }]

    if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
        random_searched = RandomizedSearchCV(estimator= self.initial_rf, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
        random_searched.fit(self.X_train, self.y_train)
        tuned_model = random_searched
        self.tuned_rf = tuned_model

    elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
        grid_search = GridSearchCV(estimator= self.initial_rf, param_grid= paras, cv=cv, scoring='accuracy', verbose=True, n_jobs=-1)
        grid_search.fit(self.X_train, self.y_train)

        tuned_model = grid_search
        self.tuned_rf = tuned_model

    else:
        raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

    # Validate the initial model and return validation metrics
    tunded_y_pred = tuned_model.predict(self.X_test)
    self.tuned_accuracy = accuracy_score(self.y_test, tunded_y_pred)
    self.tuned_confusion_matrix = confusion_matrix(self.y_test, tunded_y_pred)
    self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.tuned_classification_report = classification_report(self.y_test, tunded_y_pred)

    return self.tuned_rf

`SVM` ¶

A class that encapsulates a Support Vector Machine (SVM) model for classification tasks, including model training, hyperparameter tuning using grid or random search, and classification of image data.

Attributes:

Name	Type	Description
`X_train`	`ndarray or DataFrame`	The training features for model fitting.
`y_train`	`ndarray or Series`	The training labels for model fitting.
`X_test`	`ndarray or DataFrame`	The test features for model validation.
`y_test`	`ndarray or Series`	The test labels for model validation.
`initial_svm`	`SVC`	The initial SVM model (untuned).
`tuned_svm`	`SVC`	The SVM model after tuning using grid or random search.
`accuracy`	`float`	Accuracy of the initial (naive) SVM model.
`confusion_matrix`	`ndarray`	Confusion matrix of the initial (naive) SVM model.
`confusion_matrix_percent`	`ndarray`	Percent-based confusion matrix for the initial (naive) model.
`tuned_accuracy`	`float`	Accuracy of the tuned SVM model.
`tuned_confusion_matrix`	`ndarray`	Confusion matrix of the tuned SVM model.
`tuned_confusion_matrix_percent`	`ndarray`	Percent-based confusion matrix for the tuned model.

Source code in geonate/classify.py

class SVM:
    """
    A class that encapsulates a Support Vector Machine (SVM) model for classification tasks, including model training,
    hyperparameter tuning using grid or random search, and classification of image data.

    Attributes:
        X_train (ndarray or DataFrame): The training features for model fitting.
        y_train (ndarray or Series): The training labels for model fitting.
        X_test (ndarray or DataFrame): The test features for model validation.
        y_test (ndarray or Series): The test labels for model validation.
        initial_svm (SVC, optional): The initial SVM model (untuned).
        tuned_svm (SVC, optional): The SVM model after tuning using grid or random search.
        accuracy (float, optional): Accuracy of the initial (naive) SVM model.
        confusion_matrix (ndarray, optional): Confusion matrix of the initial (naive) SVM model.
        confusion_matrix_percent (ndarray, optional): Percent-based confusion matrix for the initial (naive) model.
        tuned_accuracy (float, optional): Accuracy of the tuned SVM model.
        tuned_confusion_matrix (ndarray, optional): Confusion matrix of the tuned SVM model.
        tuned_confusion_matrix_percent (ndarray, optional): Percent-based confusion matrix for the tuned model.

    """
    def __init__(self, X_train, y_train, X_test, y_test):
        """
        Initializes the SVM class with the provided training and testing data.

        Args:
            X_train (ndarray or DataFrame): The training features for model fitting.
            y_train (ndarray or Series): The training labels for model fitting.
            X_test (ndarray or DataFrame): The test features for model validation.
            y_test (ndarray or Series): The test labels for model validation.

        """
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.initial_svm = None
        self.tuned_svm = None

        # Automatically run the initial model 
        self.model()

    # Initial model and validation
    def model(self, kernel='rbf',**kwargs):
        """
        Trains and validates the initial SVM model.

        Args:
            kernel (str): Specifies the kernel type to be used in the algorithm. Default is 'rbf'.
            **kwargs: Additional keyword arguments for the SVC model.

        Returns:
            SVC: The trained SVM model.

        """
        from sklearn.svm import SVC
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        # Initialize model and fit the model
        svm = SVC(kernel=kernel, **kwargs)
        svm.fit(self.X_train, self.y_train)

        # Validate the initial model and return validation metrics
        y_pred = svm.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, y_pred)
        self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
        self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.classification_report = classification_report(self.y_test, y_pred)

        self.initial_svm = svm
        return self.initial_svm

    # Tune the best parameters for classifier using random search or grid search methods
    def tune(self, method="random", kernel=['rbf'], C=[1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000], gamma=[1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0], n_iter=5, cv=5, n_job=-1):
        """
        Tunes the best parameters for the SVM classifier using random search or grid search methods.

        Args:
            method (str): The tuning method to use ('random' or 'grid'). Default is 'random'.
            kernel (list): List of kernel types to be used in the algorithm. Default is ['rbf'].
            C (list): List of regularization parameters. Default is [1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000].
            gamma (list): List of kernel coefficient values. Default is [1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0].
            n_iter (int): Number of parameter settings that are sampled in random search. Default is 3.
            cv (int): Number of cross-validation folds. Default is 5.
            n_job (int): Number of jobs to run in parallel. Default is -1 (use all processors).

        Returns:
            SVC: The tuned SVM model.
        """
        from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        paras = [{
            'kernel': kernel,
            'C': C,
            'gamma': gamma
        }]

        if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
            random_searched = RandomizedSearchCV(estimator= self.initial_svm, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
            random_searched.fit(self.X_train, self.y_train)
            tuned_model = random_searched
            self.tuned_svm = tuned_model

        elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
            grid_search = GridSearchCV(estimator= self.initial_svm, param_grid= paras, cv=cv, n_jobs=-1, scoring='accuracy', verbose=True)
            grid_search.fit(self.X_train, self.y_train)

            tuned_model = grid_search
            self.tuned_svm = tuned_model

        else:
            raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

        # Validate the initial model and return validation metrics
        tuned_y_pred = tuned_model.predict(self.X_test)
        self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

        return self.tuned_svm

    # Classify image 
    def classify(self, src, model=None):
        """
        Classifies an image using the trained or tuned SVM model.

        Args:
            src (rasterio.DatasetReader): A rasterio object representing the image to classify.
            model (SVC, optional): Trained SVM model to classify the image. If None, uses the tuned SVM model if available, otherwise the naive SVM model.

        Returns:
            rasterio.DatasetReader: The classified image as a raster object.

        """
        import rasterio
        from .common import reshape_raster, array2raster

        # Define the model to use
        if model is not None:
            SVM_model = model
        else: 
            SVM_model = self.tuned_svm if self.tuned_svm is not None else self.initial_svm

        # Define input parameters
        if not isinstance(src, rasterio.DatasetReader):
            raise ValueError('Source image is not supported')
        else: 
            src_meta = src.meta
            nbands = src.count
            src_height = src.height
            src_width = src.width            
            src_rast = src.read()

            # Reshape and flatten data
            src_img = reshape_raster(src_rast, mode='image')
            ds = src_img.reshape((-1, nbands))

            # Predict labels using the define model
            pred_labels = SVM_model.predict(ds)

            # Reshape data and convert to raster format
            pred_result = pred_labels.reshape(src_height, src_width)

            src_meta.update({'count': 1})
            classified = array2raster(pred_result, metadata=src_meta)            

            return classified

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

Initializes the SVM class with the provided training and testing data.

Parameters:

Name	Type	Description	Default
`X_train`	`ndarray or DataFrame`	The training features for model fitting.	required
`y_train`	`ndarray or Series`	The training labels for model fitting.	required
`X_test`	`ndarray or DataFrame`	The test features for model validation.	required
`y_test`	`ndarray or Series`	The test labels for model validation.	required

Source code in geonate/classify.py

def __init__(self, X_train, y_train, X_test, y_test):
    """
    Initializes the SVM class with the provided training and testing data.

    Args:
        X_train (ndarray or DataFrame): The training features for model fitting.
        y_train (ndarray or Series): The training labels for model fitting.
        X_test (ndarray or DataFrame): The test features for model validation.
        y_test (ndarray or Series): The test labels for model validation.

    """
    self.X_train = X_train
    self.y_train = y_train
    self.X_test = X_test
    self.y_test = y_test
    self.initial_svm = None
    self.tuned_svm = None

    # Automatically run the initial model 
    self.model()

`classify(self, src, model=None)` ¶

Classifies an image using the trained or tuned SVM model.

Parameters:

Name	Type	Description	Default
`src`	`rasterio.DatasetReader`	A rasterio object representing the image to classify.	required
`model`	`SVC`	Trained SVM model to classify the image. If None, uses the tuned SVM model if available, otherwise the naive SVM model.	`None`

Returns:

Type	Description
`rasterio.DatasetReader`	The classified image as a raster object.

Source code in geonate/classify.py

def classify(self, src, model=None):
    """
    Classifies an image using the trained or tuned SVM model.

    Args:
        src (rasterio.DatasetReader): A rasterio object representing the image to classify.
        model (SVC, optional): Trained SVM model to classify the image. If None, uses the tuned SVM model if available, otherwise the naive SVM model.

    Returns:
        rasterio.DatasetReader: The classified image as a raster object.

    """
    import rasterio
    from .common import reshape_raster, array2raster

    # Define the model to use
    if model is not None:
        SVM_model = model
    else: 
        SVM_model = self.tuned_svm if self.tuned_svm is not None else self.initial_svm

    # Define input parameters
    if not isinstance(src, rasterio.DatasetReader):
        raise ValueError('Source image is not supported')
    else: 
        src_meta = src.meta
        nbands = src.count
        src_height = src.height
        src_width = src.width            
        src_rast = src.read()

        # Reshape and flatten data
        src_img = reshape_raster(src_rast, mode='image')
        ds = src_img.reshape((-1, nbands))

        # Predict labels using the define model
        pred_labels = SVM_model.predict(ds)

        # Reshape data and convert to raster format
        pred_result = pred_labels.reshape(src_height, src_width)

        src_meta.update({'count': 1})
        classified = array2raster(pred_result, metadata=src_meta)            

        return classified

`model(self, kernel='rbf', **kwargs)` ¶

Trains and validates the initial SVM model.

Parameters:

Name	Type	Description	Default
`kernel`	`str`	Specifies the kernel type to be used in the algorithm. Default is 'rbf'.	`'rbf'`
`**kwargs`		Additional keyword arguments for the SVC model.	`{}`

Returns:

Type	Description
`SVC`	The trained SVM model.

Source code in geonate/classify.py

def model(self, kernel='rbf',**kwargs):
    """
    Trains and validates the initial SVM model.

    Args:
        kernel (str): Specifies the kernel type to be used in the algorithm. Default is 'rbf'.
        **kwargs: Additional keyword arguments for the SVC model.

    Returns:
        SVC: The trained SVM model.

    """
    from sklearn.svm import SVC
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    # Initialize model and fit the model
    svm = SVC(kernel=kernel, **kwargs)
    svm.fit(self.X_train, self.y_train)

    # Validate the initial model and return validation metrics
    y_pred = svm.predict(self.X_test)
    self.accuracy = accuracy_score(self.y_test, y_pred)
    self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
    self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.classification_report = classification_report(self.y_test, y_pred)

    self.initial_svm = svm
    return self.initial_svm

`tune(self, method='random', kernel=['rbf'], C=[1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000], gamma=[1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0], n_iter=5, cv=5, n_job=-1)` ¶

Tunes the best parameters for the SVM classifier using random search or grid search methods.

Parameters:

Name	Type	Description	Default
`method`	`str`	The tuning method to use ('random' or 'grid'). Default is 'random'.	`'random'`
`kernel`	`list`	List of kernel types to be used in the algorithm. Default is ['rbf'].	`['rbf']`
`C`	`list`	List of regularization parameters. Default is [1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000].	`[1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000]`
`gamma`	`list`	List of kernel coefficient values. Default is [1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0].	`[1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0]`
`n_iter`	`int`	Number of parameter settings that are sampled in random search. Default is 3.	`5`
`cv`	`int`	Number of cross-validation folds. Default is 5.	`5`
`n_job`	`int`	Number of jobs to run in parallel. Default is -1 (use all processors).	`-1`

Returns:

Type	Description
`SVC`	The tuned SVM model.

Source code in geonate/classify.py

def tune(self, method="random", kernel=['rbf'], C=[1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000], gamma=[1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0], n_iter=5, cv=5, n_job=-1):
    """
    Tunes the best parameters for the SVM classifier using random search or grid search methods.

    Args:
        method (str): The tuning method to use ('random' or 'grid'). Default is 'random'.
        kernel (list): List of kernel types to be used in the algorithm. Default is ['rbf'].
        C (list): List of regularization parameters. Default is [1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000].
        gamma (list): List of kernel coefficient values. Default is [1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0].
        n_iter (int): Number of parameter settings that are sampled in random search. Default is 3.
        cv (int): Number of cross-validation folds. Default is 5.
        n_job (int): Number of jobs to run in parallel. Default is -1 (use all processors).

    Returns:
        SVC: The tuned SVM model.
    """
    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    paras = [{
        'kernel': kernel,
        'C': C,
        'gamma': gamma
    }]

    if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
        random_searched = RandomizedSearchCV(estimator= self.initial_svm, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
        random_searched.fit(self.X_train, self.y_train)
        tuned_model = random_searched
        self.tuned_svm = tuned_model

    elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
        grid_search = GridSearchCV(estimator= self.initial_svm, param_grid= paras, cv=cv, n_jobs=-1, scoring='accuracy', verbose=True)
        grid_search.fit(self.X_train, self.y_train)

        tuned_model = grid_search
        self.tuned_svm = tuned_model

    else:
        raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

    # Validate the initial model and return validation metrics
    tuned_y_pred = tuned_model.predict(self.X_test)
    self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

    return self.tuned_svm

`XGBoost` ¶

A wrapper class for XGBoost classification, including model training, hyperparameter tuning, and classification of raster images.

Attributes:

Name	Type	Description
`X_train`	`array-like`	Training feature set.
`y_train`	`array-like`	Training labels.
`X_test`	`array-like`	Testing feature set.
`y_test`	`array-like`	Testing labels.
`initial_xgb`	`XGBClassifier or None`	The initial trained XGBoost model.
`tuned_xgb`	`XGBClassifier or None`	The tuned XGBoost model (if tuning is performed).
`accuracy`	`float`	Accuracy of the initial model on the test set.
`confusion_matrix`	`ndarray`	Confusion matrix of the initial model.
`confusion_matrix_percent`	`ndarray`	Normalized confusion matrix as percentages.
`classification_report`	`str`	Classification report for the initial model.
`tuned_accuracy`	`float`	Accuracy of the tuned model.
`tuned_confusion_matrix`	`ndarray`	Confusion matrix of the tuned model.
`tuned_confusion_matrix_percent`	`ndarray`	Normalized confusion matrix for the tuned model.
`tuned_classification_report`	`str`	Classification report for the tuned model.

Source code in geonate/classify.py

class XGBoost:
    """
    A wrapper class for XGBoost classification, including model training, hyperparameter tuning, and classification of raster images.

    Attributes:
        X_train (array-like): Training feature set.
        y_train (array-like): Training labels.
        X_test (array-like): Testing feature set.
        y_test (array-like): Testing labels.
        initial_xgb (XGBClassifier or None): The initial trained XGBoost model.
        tuned_xgb (XGBClassifier or None): The tuned XGBoost model (if tuning is performed).
        accuracy (float): Accuracy of the initial model on the test set.
        confusion_matrix (ndarray): Confusion matrix of the initial model.
        confusion_matrix_percent (ndarray): Normalized confusion matrix as percentages.
        classification_report (str): Classification report for the initial model.
        tuned_accuracy (float): Accuracy of the tuned model.
        tuned_confusion_matrix (ndarray): Confusion matrix of the tuned model.
        tuned_confusion_matrix_percent (ndarray): Normalized confusion matrix for the tuned model.
        tuned_classification_report (str): Classification report for the tuned model.

    """
    def __init__(self, X_train, y_train, X_test, y_test):
        """
        Initializes the XGBoost classifier with training and testing data, and automatically trains an initial model.

        Args:
            X_train (array-like): Training feature set.
            y_train (array-like): Training labels.
            X_test (array-like): Testing feature set.
            y_test (array-like): Testing labels.

        """
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.initial_xgb = None
        self.tuned_xgb = None

        # Automatically run the initial model 
        self.model()

    # Initial model and validation
    def model(self, **kwargs):
        """
        Trains an initial XGBoost classifier using the provided training data and evaluates its performance on the test set.

        Args:
            **kwargs: Additional parameters to pass to XGBClassifier.

        Returns:
            XGBClassifier: The trained initial model.

        """
        from xgboost import XGBClassifier
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        # Initialize model and fit the model
        xgb = XGBClassifier(**kwargs)
        xgb.fit(self.X_train, self.y_train)

        # Validate the initial model and return validation metrics
        y_pred = xgb.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, y_pred)
        self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
        self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.classification_report = classification_report(self.y_test, y_pred)

        self.initial_xgb = xgb
        return self.initial_xgb

    # Tune the best parameters for classifier using random search or grid search methods
    def tune(self, method="random", n_estimators=[100, 200, 300, 500, 1000], max_depth=[3, 5, 7, 9], learning_rate=[0.0001, 0.001, 0.01, 0.1], subsample=[0.5, 0.7, 1], n_iter=5, cv=5, n_job=-1):
        """
        Tunes the hyperparameters of the XGBoost classifier using either RandomizedSearchCV or GridSearchCV.

        Args:
            method (str, optional): Search method, either "random" (default) or "grid".
            n_estimators (list, optional): List of values for the number of trees.
            max_depth (list, optional): List of values for the maximum tree depth.
            learning_rate (list, optional): List of learning rates.
            subsample (list, optional): List of subsampling ratios.
            n_iter (int, optional): Number of iterations for random search (ignored for grid search).
            cv (int, optional): Number of cross-validation folds.
            n_job (int, optional): Number of parallel jobs (currently not used in the function).

        Returns:
            Best estimator from tuning process (XGBClassifier).

        """
        from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
        from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

        paras = [{
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'learning_rate': learning_rate,
            'subsample': subsample
        }]

        if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
            random_searched = RandomizedSearchCV(estimator= self.initial_xgb, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
            random_searched.fit(self.X_train, self.y_train)
            tuned_model = random_searched
            self.tuned_xgb = tuned_model

        elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
            grid_search = GridSearchCV(estimator= self.initial_xgb, param_grid= paras, cv=cv, scoring='accuracy', verbose=True, n_jobs=-1)
            grid_search.fit(self.X_train, self.y_train)

            tuned_model = grid_search
            self.tuned_xgb = tuned_model

        else:
            raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

        # Validate the initial model and return validation metrics
        tuned_y_pred = tuned_model.predict(self.X_test)
        self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
        self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
        self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

        return self.tuned_xgb

    # Classify image 
    def classify(self, src, model=None):
        """
        Classifies an input raster image using the trained XGBoost model.

        Args:
            src (rasterio.DatasetReader): The source raster image to classify.
            model (XGBClassifier, optional): The model to use for classification. If not provided, the tuned model is used (or the initial model if tuning was not performed).

        Returns:
            rasterio.io.MemoryFile: The classified raster image.

        """
        import rasterio
        from geonate.common import reshape_raster, array2raster

        # Define the random forest model to use
        if model is not None:
            XGB_model = model
        else: 
            XGB_model = self.tuned_xgb if self.tuned_xgb is not None else self.initial_xgb

        # Define input parameters
        if not isinstance(src, rasterio.DatasetReader):
            raise ValueError('Source image is not supported')
        else: 
            src_meta = src.meta
            nbands = src.count
            src_height = src.height
            src_width = src.width            
            src_rast = src.read()

            # Reshape and flatten data
            src_img = reshape_raster(src_rast, mode='image')
            ds = src_img.reshape((-1, nbands))

            # Predict labels using the defined model
            pred_labels = XGB_model.predict(ds)

            # Reshape data and convert to raster format
            pred_result = pred_labels.reshape(src_height, src_width)

            src_meta.update({'count': 1})
            classified = array2raster(pred_result, metadata=src_meta)            

            return classified

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

Initializes the XGBoost classifier with training and testing data, and automatically trains an initial model.

Parameters:

Name	Type	Description	Default
`X_train`	`array-like`	Training feature set.	required
`y_train`	`array-like`	Training labels.	required
`X_test`	`array-like`	Testing feature set.	required
`y_test`	`array-like`	Testing labels.	required

Source code in geonate/classify.py

def __init__(self, X_train, y_train, X_test, y_test):
    """
    Initializes the XGBoost classifier with training and testing data, and automatically trains an initial model.

    Args:
        X_train (array-like): Training feature set.
        y_train (array-like): Training labels.
        X_test (array-like): Testing feature set.
        y_test (array-like): Testing labels.

    """
    self.X_train = X_train
    self.y_train = y_train
    self.X_test = X_test
    self.y_test = y_test
    self.initial_xgb = None
    self.tuned_xgb = None

    # Automatically run the initial model 
    self.model()

`classify(self, src, model=None)` ¶

Classifies an input raster image using the trained XGBoost model.

Parameters:

Name	Type	Description	Default
`src`	`rasterio.DatasetReader`	The source raster image to classify.	required
`model`	`XGBClassifier`	The model to use for classification. If not provided, the tuned model is used (or the initial model if tuning was not performed).	`None`

Returns:

Type	Description
`rasterio.io.MemoryFile`	The classified raster image.

Source code in geonate/classify.py

def classify(self, src, model=None):
    """
    Classifies an input raster image using the trained XGBoost model.

    Args:
        src (rasterio.DatasetReader): The source raster image to classify.
        model (XGBClassifier, optional): The model to use for classification. If not provided, the tuned model is used (or the initial model if tuning was not performed).

    Returns:
        rasterio.io.MemoryFile: The classified raster image.

    """
    import rasterio
    from geonate.common import reshape_raster, array2raster

    # Define the random forest model to use
    if model is not None:
        XGB_model = model
    else: 
        XGB_model = self.tuned_xgb if self.tuned_xgb is not None else self.initial_xgb

    # Define input parameters
    if not isinstance(src, rasterio.DatasetReader):
        raise ValueError('Source image is not supported')
    else: 
        src_meta = src.meta
        nbands = src.count
        src_height = src.height
        src_width = src.width            
        src_rast = src.read()

        # Reshape and flatten data
        src_img = reshape_raster(src_rast, mode='image')
        ds = src_img.reshape((-1, nbands))

        # Predict labels using the defined model
        pred_labels = XGB_model.predict(ds)

        # Reshape data and convert to raster format
        pred_result = pred_labels.reshape(src_height, src_width)

        src_meta.update({'count': 1})
        classified = array2raster(pred_result, metadata=src_meta)            

        return classified

`model(self, **kwargs)` ¶

Trains an initial XGBoost classifier using the provided training data and evaluates its performance on the test set.

Parameters:

Name	Type	Description	Default
`**kwargs`		Additional parameters to pass to XGBClassifier.	`{}`

Returns:

Type	Description
`XGBClassifier`	The trained initial model.

Source code in geonate/classify.py

def model(self, **kwargs):
    """
    Trains an initial XGBoost classifier using the provided training data and evaluates its performance on the test set.

    Args:
        **kwargs: Additional parameters to pass to XGBClassifier.

    Returns:
        XGBClassifier: The trained initial model.

    """
    from xgboost import XGBClassifier
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    # Initialize model and fit the model
    xgb = XGBClassifier(**kwargs)
    xgb.fit(self.X_train, self.y_train)

    # Validate the initial model and return validation metrics
    y_pred = xgb.predict(self.X_test)
    self.accuracy = accuracy_score(self.y_test, y_pred)
    self.confusion_matrix = confusion_matrix(self.y_test, y_pred)
    self.confusion_matrix_percent = self.confusion_matrix.astype(float) / self.confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.classification_report = classification_report(self.y_test, y_pred)

    self.initial_xgb = xgb
    return self.initial_xgb

`tune(self, method='random', n_estimators=[100, 200, 300, 500, 1000], max_depth=[3, 5, 7, 9], learning_rate=[0.0001, 0.001, 0.01, 0.1], subsample=[0.5, 0.7, 1], n_iter=5, cv=5, n_job=-1)` ¶

Tunes the hyperparameters of the XGBoost classifier using either RandomizedSearchCV or GridSearchCV.

Parameters:

Name	Type	Description	Default
`method`	`str`	Search method, either "random" (default) or "grid".	`'random'`
`n_estimators`	`list`	List of values for the number of trees.	`[100, 200, 300, 500, 1000]`
`max_depth`	`list`	List of values for the maximum tree depth.	`[3, 5, 7, 9]`
`learning_rate`	`list`	List of learning rates.	`[0.0001, 0.001, 0.01, 0.1]`
`subsample`	`list`	List of subsampling ratios.	`[0.5, 0.7, 1]`
`n_iter`	`int`	Number of iterations for random search (ignored for grid search).	`5`
`cv`	`int`	Number of cross-validation folds.	`5`
`n_job`	`int`	Number of parallel jobs (currently not used in the function).	`-1`

Returns:

Type	Description
	Best estimator from tuning process (XGBClassifier).

Source code in geonate/classify.py

def tune(self, method="random", n_estimators=[100, 200, 300, 500, 1000], max_depth=[3, 5, 7, 9], learning_rate=[0.0001, 0.001, 0.01, 0.1], subsample=[0.5, 0.7, 1], n_iter=5, cv=5, n_job=-1):
    """
    Tunes the hyperparameters of the XGBoost classifier using either RandomizedSearchCV or GridSearchCV.

    Args:
        method (str, optional): Search method, either "random" (default) or "grid".
        n_estimators (list, optional): List of values for the number of trees.
        max_depth (list, optional): List of values for the maximum tree depth.
        learning_rate (list, optional): List of learning rates.
        subsample (list, optional): List of subsampling ratios.
        n_iter (int, optional): Number of iterations for random search (ignored for grid search).
        cv (int, optional): Number of cross-validation folds.
        n_job (int, optional): Number of parallel jobs (currently not used in the function).

    Returns:
        Best estimator from tuning process (XGBClassifier).

    """
    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    paras = [{
        'n_estimators': n_estimators,
        'max_depth': max_depth,
        'learning_rate': learning_rate,
        'subsample': subsample
    }]

    if method.lower() == 'random' or method.lower() =='randomized' or method.lower() =='randomizedsearch' or method.lower() =='randomizedsearchcv':
        random_searched = RandomizedSearchCV(estimator= self.initial_xgb, param_distributions=paras, n_iter= n_iter, scoring='accuracy', verbose=True)
        random_searched.fit(self.X_train, self.y_train)
        tuned_model = random_searched
        self.tuned_xgb = tuned_model

    elif method.lower() == 'grid' or method.lower() == 'gridsearch' or method.lower() == 'gridsearchcv':
        grid_search = GridSearchCV(estimator= self.initial_xgb, param_grid= paras, cv=cv, scoring='accuracy', verbose=True, n_jobs=-1)
        grid_search.fit(self.X_train, self.y_train)

        tuned_model = grid_search
        self.tuned_xgb = tuned_model

    else:
        raise ValueError('Tune method is not supported, the current methods are "randomizedsearch" and "gridsearchcv"')

    # Validate the initial model and return validation metrics
    tuned_y_pred = tuned_model.predict(self.X_test)
    self.tuned_accuracy = accuracy_score(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix = confusion_matrix(self.y_test, tuned_y_pred)
    self.tuned_confusion_matrix_percent = self.tuned_confusion_matrix.astype(float) / self.tuned_confusion_matrix.sum(axis=1, keepdims=True) * 100
    self.tuned_classification_report = classification_report(self.y_test, tuned_y_pred)

    return self.tuned_xgb

`kmeans(input, n_cluster=3, max_iter=500, algorithm='lloyd', **kwargs)` ¶

Perform K-Means clustering for raster image. Kmeans is a fast and simple algorithm.

Parameters:

Name	Type	Description	Default
`input`	`rasterio.DatasetReader or np.ndarray`	Multispectral input data. Can be a raster image or a numpy array.	required
`n_cluster`	`int`	Number of clusters to form. Default is 3.	`3`
`max_iter`	`int`	Maximum number of iterations of the k-means algorithm for a single run. Default is 300.	`500`
`algorithm`	`str`	K-means algorithm to use. The available algorithms include "lloyd" and "elkan". "elkan" variation can be more efficient on some datasets with well-defined clusters, by using the triangle inequality. However it’s more memory intensive due to the allocation of an extra array of shape. Default is 'lloyd'.	`'lloyd'`
`**kwargs`		Additional keyword arguments to pass to the KMeans model.	`{}`

Returns:

Type	Description
`np.ndarray or rasterio.DatasetReader`	K-Means clustering result in the same format as the input.

Source code in geonate/classify.py

def kmeans(input, n_cluster=3, max_iter=500, algorithm='lloyd', **kwargs):
    """
    Perform K-Means clustering for raster image. Kmeans is a fast and simple algorithm. 

    Args:
        input (rasterio.DatasetReader or np.ndarray): Multispectral input data. Can be a raster image or a numpy array.
        n_cluster (int): Number of clusters to form. Default is 3.
        max_iter (int): Maximum number of iterations of the k-means algorithm for a single run. Default is 300.
        algorithm (str): K-means algorithm to use. The available algorithms include "lloyd" and "elkan". "elkan" variation can be more efficient on some datasets with well-defined clusters, by using the triangle inequality. However it’s more memory intensive due to the allocation of an extra array of shape. Default is 'lloyd'.
        **kwargs: Additional keyword arguments to pass to the KMeans model.

    Returns:
        np.ndarray or rasterio.DatasetReader: K-Means clustering result in the same format as the input.

    """
    import numpy as np
    import rasterio
    from sklearn.cluster import KMeans
    from .common import array2raster, reshape_raster

    # Identify datatype and define input data
    # Raster image
    if isinstance(input, rasterio.DatasetReader):
        arr = input.read()
        height, width = input.shape
        nbands =  input.count
        meta = input.meta
    # Data Array
    elif isinstance(input, np.ndarray):
        if len(input.shape) < 3:
            raise ValueError('Input must be multispectral data (multi-band)')
        else:
            arr = input
            nbands, height, width = input.shape

    else: 
        raise ValueError('Input is not supported')

    # Reshape from raster to image format, and from 3D to 2D
    arr_reshape_img = reshape_raster(arr, mode='image')
    print(arr_reshape_img.shape)
    img_reshaped = arr_reshape_img.reshape((-1, nbands))

    # Define KMeans model and fit the KMeans model
    kmean_model = KMeans(n_clusters= n_cluster, max_iter= max_iter, algorithm= algorithm, **kwargs)
    kmean_fit = kmean_model.fit(img_reshaped)

    # Extract labels and reshape based on input image
    labels = kmean_fit.labels_
    km_results = labels.reshape((height, width))

    # Return output based on input similar to input
    if isinstance(input, np.ndarray):
        return km_results

    elif isinstance(input, rasterio.DatasetReader):
        meta.update({'count': 1})
        km_results_rast = array2raster(km_results, meta)
        return km_results_rast

classify module¶

Gaussian_Naive_Bayes ¶

__init__(self, X_train, y_train, X_test, y_test) special ¶

classify(self, src, model=None) ¶

model(self, **kwargs) ¶

tune(self, method='random', var_smoothing=[1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0], n_iter=5, cv=5, n_job=-1) ¶

KNN ¶

__init__(self, X_train, y_train, X_test, y_test) special ¶

classify(self, src, model=None) ¶

model(self, **kwargs) ¶

tune(self, method='random', n_neighbors=[3, 5, 7, 9, 11], weights=['uniform', 'distance'], n_iter=5, cv=5, n_job=-1) ¶

RandomForest ¶

__init__(self, X_train, y_train, X_test, y_test) special ¶

classify(self, src, model=None) ¶

model(self, n_estimators=100, **kwargs) ¶

tune(self, method='random', n_estimators=[100, 200, 300, 500, 1000], max_depth=[None, 10, 20, 30, 50], min_samples_split=[2, 5, 10, 20], min_samples_leaf=[1, 2, 3, 5], max_features=['sqrt'], n_iter=5, cv=5, n_job=-1) ¶

SVM ¶

__init__(self, X_train, y_train, X_test, y_test) special ¶

classify(self, src, model=None) ¶

model(self, kernel='rbf', **kwargs) ¶

tune(self, method='random', kernel=['rbf'], C=[1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000], gamma=[1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0], n_iter=5, cv=5, n_job=-1) ¶

XGBoost ¶

__init__(self, X_train, y_train, X_test, y_test) special ¶

classify(self, src, model=None) ¶

model(self, **kwargs) ¶

tune(self, method='random', n_estimators=[100, 200, 300, 500, 1000], max_depth=[3, 5, 7, 9], learning_rate=[0.0001, 0.001, 0.01, 0.1], subsample=[0.5, 0.7, 1], n_iter=5, cv=5, n_job=-1) ¶

kmeans(input, n_cluster=3, max_iter=500, algorithm='lloyd', **kwargs) ¶

`Gaussian_Naive_Bayes` ¶

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

`classify(self, src, model=None)` ¶

`model(self, **kwargs)` ¶

`tune(self, method='random', var_smoothing=[1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0], n_iter=5, cv=5, n_job=-1)` ¶

`KNN` ¶

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

`classify(self, src, model=None)` ¶

`model(self, **kwargs)` ¶

`tune(self, method='random', n_neighbors=[3, 5, 7, 9, 11], weights=['uniform', 'distance'], n_iter=5, cv=5, n_job=-1)` ¶

`RandomForest` ¶

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

`classify(self, src, model=None)` ¶

`model(self, n_estimators=100, **kwargs)` ¶

`tune(self, method='random', n_estimators=[100, 200, 300, 500, 1000], max_depth=[None, 10, 20, 30, 50], min_samples_split=[2, 5, 10, 20], min_samples_leaf=[1, 2, 3, 5], max_features=['sqrt'], n_iter=5, cv=5, n_job=-1)` ¶

`SVM` ¶

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

`classify(self, src, model=None)` ¶

`model(self, kernel='rbf', **kwargs)` ¶

`tune(self, method='random', kernel=['rbf'], C=[1, 2, 4, 8, 10, 16, 32, 64, 100, 128, 1000], gamma=[1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1.0], n_iter=5, cv=5, n_job=-1)` ¶

`XGBoost` ¶

`init(self, X_train, y_train, X_test, y_test)` `special` ¶

`classify(self, src, model=None)` ¶

`model(self, **kwargs)` ¶

`tune(self, method='random', n_estimators=[100, 200, 300, 500, 1000], max_depth=[3, 5, 7, 9], learning_rate=[0.0001, 0.001, 0.01, 0.1], subsample=[0.5, 0.7, 1], n_iter=5, cv=5, n_job=-1)` ¶

`kmeans(input, n_cluster=3, max_iter=500, algorithm='lloyd', **kwargs)` ¶