API Reference

`add_north_arrow(ax, relative_position=(0.05, 0.05), arrow_length=0.05, text_offset=-0.02)`

Add a north arrow to the axis.

Source code in ShallowLearn/ImageHelper.py

def add_north_arrow(ax, relative_position=(0.05, 0.05), arrow_length=0.05, text_offset=-0.02):
    """Add a north arrow to the axis."""
    xlim, ylim = ax.get_xlim(), ax.get_ylim()

    x = xlim[0] + (xlim[1] - xlim[0]) * relative_position[0]
    y = ylim[0] + (ylim[1] - ylim[0]) * relative_position[1]

    ax.arrow(x, y, 0, arrow_length, head_width=0.02 * (xlim[1] - xlim[0]), head_length=0.03 * (ylim[1] - ylim[0]), fc='black', ec='black')
    ax.text(x, y + text_offset * (ylim[1] - ylim[0]), 'N', horizontalalignment='center', verticalalignment='center', fontsize=12, fontweight='bold', c = 'black')

`apply_mask(data, mask, fill_value=0)`

Applies a mask to the data array.

Parameters:

Name	Type	Description	Default
`data`	`ndarray`	The input data array.	required
`mask`	`ndarray`	The mask array.	required
`fill_value`	`float`	The value to use where the mask is False.	`0`

Returns:

Type	Description
	numpy.ndarray: The masked data array.

Source code in ShallowLearn/ImageHelper.py

def apply_mask(data, mask, fill_value=0):
    """
    Applies a mask to the data array.

    Args:
        data (numpy.ndarray): The input data array.
        mask (numpy.ndarray): The mask array.
        fill_value (float): The value to use where the mask is False.

    Returns:
        numpy.ndarray: The masked data array.
    """
    masked_data = np.where(mask, data, fill_value)
    return masked_data

`clip_array(arr)`

Clips an input multiband array to values between 0 and 10,000, removing any negative values.

Parameters: arr (np.ndarray): A NumPy array of any shape.

Returns: np.ndarray: A clipped NumPy array with the same shape as input, where all values are >= 0 and <= 10,000.

Source code in ShallowLearn/ImageHelper.py

def clip_array(arr):
    """
    Clips an input multiband array to values between 0 and 10,000, removing any negative values.

    Parameters:
    arr (np.ndarray): A NumPy array of any shape.

    Returns:
    np.ndarray: A clipped NumPy array with the same shape as input, where all values are >= 0 and <= 10,000.
    """
    # Use np.clip to clip values of the array to a min of 0 and a max of 10,000
    # np.clip automatically handles the removal of negative values by setting the minimum to 0
    clipped_arr = np.clip(arr, 0, 10000)

    return clipped_arr

`gen_mask(img, mask=9)`

Generates a mask for the input image based on the predicted mask.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`mask`	`int`	The mask value to consider. Default is 9.	`9`

Returns:

Type	Description
	numpy.ndarray: The mask array.

Source code in ShallowLearn/ImageHelper.py

def gen_mask(img, mask=9):
    """
    Generates a mask for the input image based on the predicted mask.

    Args:
        img (numpy.ndarray): The input image array.
        mask (int): The mask value to consider. Default is 9.

    Returns:
        numpy.ndarray: The mask array.

    """
    mask = predict_mask(img, mask)
    return mask

`generate_multichannel_mask(img, mask=None, mask_val=9)`

Generates a multichannel mask for the input image based on the predicted mask. Can also be used to generate a multichannel mask for a given mask.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`mask`	`int`	The mask value to consider. Default is 9.	`None`

Returns:

Type	Description
	numpy.ndarray: The multichannel mask array.

Source code in ShallowLearn/ImageHelper.py

def generate_multichannel_mask(img, mask=None, mask_val=9):
    """
    Generates a multichannel mask for the input image based on the predicted mask.
    Can also be used to generate a multichannel mask for a given mask.

    Args:
        img (numpy.ndarray): The input image array.
        mask (int): The mask value to consider. Default is 9.

    Returns:
        numpy.ndarray: The multichannel mask array.

    """
    if mask is None:
        mask = gen_mask(img, mask_val)

    reshaped_mask = np.repeat(mask[:, :, np.newaxis], img.shape[2], axis=2)
    final_mask = img * reshaped_mask
    rescaled_image = final_mask.copy()

    for i in range(final_mask.shape[2]):
        channel_min = final_mask[:, :, i].min()
        channel_max = final_mask[:, :, i].max()
        rescaled_image[:, :, i] = (final_mask[:, :, i] - channel_min) / (channel_max - channel_min) * 255
    return rescaled_image

`load_img(path, return_meta=False, clip=True)`

Loads an image from the specified path.

Parameters:

Name	Type	Description	Default
`path`	`str`	The path to the image file.	required

Returns:

Type	Description
	numpy.ndarray: The loaded image array.

Source code in ShallowLearn/ImageHelper.py

def load_img(path, return_meta=False, clip = True):
    """
    Loads an image from the specified path.

    Args:
        path (str): The path to the image file.

    Returns:
        numpy.ndarray: The loaded image array.

    """
    img = LoadData.LoadGeoTIFF(path).load()
    if "N0400" in path:
        img -= 1000     
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 0, 1)
    if clip:
        img = clip_array(img)

    if return_meta:
        return img, LoadData.LoadGeoTIFF(path).get_metadata(), LoadData.LoadGeoTIFF(path).get_bounds()
    return img

`median_without_zeros_or_nans(images)`

Computes the median of each band in each image, excluding zeros and NaN values.

images: numpy array, shape (i, x, y, z) The input 4D array of images.

medians: numpy array, shape (i, z) The median values for each band in each image, excluding zeros and NaNs.

Source code in ShallowLearn/ImageHelper.py

def median_without_zeros_or_nans(images):
    """
    Computes the median of each band in each image, excluding zeros and NaN values.

    Parameters:
    - images: numpy array, shape (i, x, y, z)
      The input 4D array of images.

    Returns:
    - medians: numpy array, shape (i, z)
      The median values for each band in each image, excluding zeros and NaNs.
    """
    num_images = images.shape[0]
    num_bands = images.shape[3]
    medians = np.zeros((num_images, num_bands))

    for i in range(num_images):
        for z in range(num_bands):
            band_data = images[i, :, :, z]
            # Mask zeros and NaNs
            masked_data = np.ma.masked_where((band_data == 0) | np.isnan(band_data), band_data)
            medians[i, z] = np.ma.median(masked_data)

    return medians

`plot_geotiff(image_data, bounds, title='Map with coordinates', ax=None)`

Plot GeoTIFF with UTM Coordinates and a scale bar.

Parameters: - image_data: 2D or 3D array containing the raster data - bounds: Tuple containing the bounding coordinates (left, right, bottom, top)

Source code in ShallowLearn/ImageHelper.py

def plot_geotiff(image_data, bounds, title="Map with coordinates", ax = None):
    """
    Plot GeoTIFF with UTM Coordinates and a scale bar.

    Parameters:
    - image_data: 2D or 3D array containing the raster data
    - bounds: Tuple containing the bounding coordinates (left, right, bottom, top)
    """
    from matplotlib_scalebar.scalebar import ScaleBar
    from matplotlib.ticker import ScalarFormatter

    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 10))
    left, bottom, right, top = bounds

    # If the image data is RGB and contains NaN values, convert it to RGBA
    if image_data.ndim == 3 and image_data.shape[2] == 3 and np.isnan(image_data).any():
        # Create an alpha channel with the same shape as one of the RGB channels
        alpha = np.ones_like(image_data[..., 0])

        # Where any of the RGB channels is NaN, set the alpha to 0
        alpha[np.isnan(image_data).any(axis=-1)] = 0

        # Create an RGBA image by stacking the RGB channels and the alpha channel along the last dimension
        image_data = np.dstack((image_data, alpha))

    ax.imshow(image_data, extent=[left, right, bottom, top])  # Adjust colormap as needed for non-RGBA data

    # Use ScalarFormatter to force no scientific notation
    formatter = ScalarFormatter()
    formatter.set_scientific(False)
    ax.xaxis.set_major_formatter(formatter)
    ax.yaxis.set_major_formatter(formatter)

    # Add a scale bar
    scalebar = ScaleBar(2, location='lower right', scale_loc='bottom', box_alpha=0)  # 1 pixel = 1 meter (assuming your image_data is in meters)
    ax.add_artist(scalebar)

    add_north_arrow(ax)

    ax.set_title(title)
    ax.set_xlabel("Easting (m)")
    ax.set_ylabel("Northing (m)")

`plot_histogram(img, plot=True, bins=50, min_value=1, title=None)`

Plots histogram for a specific channel in the input image.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`channel`	`int`	The index of the channel to be plotted. Default is 0.	required
`plot`	`bool`	Flag to indicate whether to plot the histogram or not. Default is True.	`True`
`bins`	`int`	The number of bins for the histogram. Default is 50.	`50`
`min_value`	`int`	The minimum value to consider for the histogram. Values below this threshold will be removed. Default is 1.	`1`
`title`	`str`	Title of the plot. Default is None.	`None`

Returns:

Type	Description
	None

Source code in ShallowLearn/ImageHelper.py

def plot_histogram(img,  plot=True, bins=50, min_value=1, title=None):
    """
    Plots histogram for a specific channel in the input image.

    Args:
        img (numpy.ndarray): The input image array.
        channel (int): The index of the channel to be plotted. Default is 0.
        plot (bool): Flag to indicate whether to plot the histogram or not. Default is True.
        bins (int): The number of bins for the histogram. Default is 50.
        min_value (int): The minimum value to consider for the histogram. Values below this threshold will be removed. Default is 1.
        title (str): Title of the plot. Default is None.

    Returns:
        None
    """


    if plot:
        channel_data = img.flatten()
        channel_data = channel_data[channel_data >= min_value]

        # Plotting the histogram
        plt.hist(channel_data, bins=bins, range=(0, np.max(channel_data)))

        # Customize the plot
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.title(title if title else f'Histogram')
        plt.show()

`plot_histograms(img, plot=True, bins=50, min_value=1, channel_legend=None, title=None, return_fig=False)`

Plots histograms for each channel in the input image using line plots.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`plot`	`bool`	Flag to indicate whether to plot the histograms or not. Default is True.	`True`
`bins`	`int`	The number of bins for the histograms. Default is 50.	`50`
`min_value`	`int`	The minimum value to consider for the histograms. Values below this threshold will be removed. Default is 1.	`1`
`channel_legend`	`dict`	Dictionary for channel legend. Keys are channel indices and values are channel names. Default is None.	`None`

Returns:

Type	Description
	None

Source code in ShallowLearn/ImageHelper.py

def plot_histograms(img, plot=True, bins=50, min_value=1, channel_legend=None, title = None, return_fig = False):
    """
    Plots histograms for each channel in the input image using line plots.

    Args:
        img (numpy.ndarray): The input image array.
        plot (bool): Flag to indicate whether to plot the histograms or not. Default is True.
        bins (int): The number of bins for the histograms. Default is 50.
        min_value (int): The minimum value to consider for the histograms. Values below this threshold will be removed. Default is 1.
        channel_legend (dict): Dictionary for channel legend. Keys are channel indices and values are channel names. Default is None.

    Returns:
        None

    """
    if len(img.shape) == 2:
        plot_histogram(img, plot=plot, bins=bins, min_value=min_value, title=title)
        return

    num_channels = img.shape[2]

    if plot:
        x = np.linspace(0, np.max(img), bins)
        for i in range(num_channels):
            channel_data = img[:, :, i].flatten()
            channel_data = channel_data[channel_data >= min_value]
            histogram, _ = np.histogram(channel_data, bins=bins, range=(0, np.max(img)))

            # Use the dictionary for legend if provided, else use default labeling
            label = channel_legend[i] if channel_legend and i in channel_legend else f'Channel {i + 1}'

            # Plot the histogram using line plot
            plt.plot(x, histogram, label=label)

        # Customize the plot
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.legend()

        if title is None:
            plt.title('Histogram of Each Channel')
        else:
            plt.title(title)
        if return_fig:
            return plt.gcf()
        plt.show()

`plot_hsv(img, plot=False)`

Converts the input image to the HSV color space and optionally plots the H channel.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`plot`	`bool`	Flag to indicate whether to plot the H channel or not. Default is False.	`False`

Returns:

Type	Description
	numpy.ndarray or None: The hsv array if plot=False, otherwise None.

Source code in ShallowLearn/ImageHelper.py

def plot_hsv(img, plot=False):
    """
    Converts the input image to the HSV color space and optionally plots the H channel.

    Args:
        img (numpy.ndarray): The input image array.
        plot (bool): Flag to indicate whether to plot the H channel or not. Default is False.

    Returns:
        numpy.ndarray or None: The hsv array if plot=False, otherwise None.

    """
    hsv_img = rgb2hsv(plot_rgb(img))
    h_channel = hsv_img[:, :, 0]

    if plot == False:
        return hsv_img
    # Plot the H channel
    plt.imshow(h_channel, cmap='hsv')
    plt.axis('off')
    plt.title('H Channel')
    plt.show()
    return None

`plot_lab(img, plot=False)`

Converts the input image to the LAB color space and optionally plots the L channel.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`plot`	`bool`	Flag to indicate whether to plot the L channel or not. Default is False.	`False`

Returns:

Type	Description
	numpy.ndarray or None: The lab array if plot=False, otherwise None.

Source code in ShallowLearn/ImageHelper.py

def plot_lab(img, plot=False):
    """
    Converts the input image to the LAB color space and optionally plots the L channel.

    Args:
        img (numpy.ndarray): The input image array.
        plot (bool): Flag to indicate whether to plot the L channel or not. Default is False.

    Returns:
        numpy.ndarray or None: The lab array if plot=False, otherwise None.

    """
    lab_img = rgb2lab(plot_rgb(img, plot=False))
    l_channel = lab_img[:, :, 0]
    if plot == False:
        return lab_img
    # Plot the L channel
    plt.imshow(l_channel, cmap='gray')
    plt.axis('off')
    plt.title('L Channel')
    plt.show()
    return None

`plot_rgb(img, bands=None, plot=False)`

Plots the RGB image using the specified bands.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`bands`	`list`	The list of band codes to use for the RGB channels. Default is ['B04', 'B03', 'B02'].	`None`
`plot`	`bool`	Whether to display the image plot using matplotlib. Default is False.	`False`

Returns:

Type	Description
	numpy array with selected bands: The RGB image array if plot is set to False.

Source code in ShallowLearn/ImageHelper.py

def plot_rgb(img, bands=None, plot=False):
    """
    Plots the RGB image using the specified bands.

    Args:
        img (numpy.ndarray): The input image array.
        bands (list): The list of band codes to use for the RGB channels. Default is ['B04', 'B03', 'B02'].
        plot (bool): Whether to display the image plot using matplotlib. Default is False.

    Returns:
        numpy array with selected bands: The RGB image array if plot is set to False.

    """
    if bands is None:
        bands = ['B04', 'B03', 'B02']  # Default band codes

    band_numbers = [band_mapping[band]['index'] for band in bands]

    img_shape = img.shape
    r = np.uint8(minmax_scale(img[:, :, band_numbers[0]].flatten(), feature_range=(0, 255), axis=0, copy=True)).reshape(
        img_shape[0], img_shape[1])
    g = np.uint8(minmax_scale(img[:, :, band_numbers[1]].flatten(), feature_range=(0, 255), axis=0, copy=True)).reshape(
        img_shape[0], img_shape[1])
    b = np.uint8(minmax_scale(img[:, :, band_numbers[2]].flatten(), feature_range=(0, 255), axis=0, copy=True)).reshape(
        img_shape[0], img_shape[1])
    rgb = np.dstack((r, g, b))

    if plot:
        plt.imshow(rgb)
        plt.show()
        return None

    return rgb

`plot_with_legend(array, value_dict)`

Plots a 2D array with a legend using distinct colors for discrete class labels.

Parameters: array (2D numpy array): The array to be plotted. value_dict (dict): A dictionary mapping values in the array to labels.

Source code in ShallowLearn/ImageHelper.py

def plot_with_legend(array, value_dict):
    """
    Plots a 2D array with a legend using distinct colors for discrete class labels.

    Parameters:
    array (2D numpy array): The array to be plotted.
    value_dict (dict): A dictionary mapping values in the array to labels.
    """

    # Create a colormap with a distinct color for each class
    n_classes = len(value_dict)
    cmap = plt.cm.get_cmap('Set3', n_classes)  # 'tab10' or 'Set3'

    # Create the plot
    plt.imshow(array, cmap=cmap)

    # Create a color map index for each discrete value
    colors = [cmap(i) for i in range(n_classes)]

    # Create a legend
    patches = [mpatches.Patch(color=colors[i], label=label) for i, (value, label) in enumerate(value_dict.items())]

    # Add the legend to the plot
    plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

    # Show the plot
    plt.show()

`plot_ycbcr(img, plot=False)`

Converts the input image to the YCbCr color space and optionally plots the Y channel.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`plot`	`bool`	Flag to indicate whether to plot the Y channel or not. Default is False.	`False`

Returns:

Type	Description
	numpy.ndarray or None: The ycbcr array if plot=False, otherwise None.

Source code in ShallowLearn/ImageHelper.py

def plot_ycbcr(img, plot=False):
    """
    Converts the input image to the YCbCr color space and optionally plots the Y channel.

    Args:
        img (numpy.ndarray): The input image array.
        plot (bool): Flag to indicate whether to plot the Y channel or not. Default is False.

    Returns:
        numpy.ndarray or None: The ycbcr array if plot=False, otherwise None.

    """
    ycbcr_img = rgb2ycbcr(plot_rgb(img))
    y_channel = ycbcr_img[:, :, 0]

    if plot == False:
        return ycbcr_img
    # Plot the Y channel
    plt.imshow(y_channel, cmap='gray')
    plt.axis('off')
    plt.title('Y Channel')
    plt.show()
    return None

`predict_mask(img, model=None, mask_val=None, estimator=None)`

Predicts the mask for the input image using a pre-trained model.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The input image array.	required
`model`	`str`	The path to the pre-trained model. Default is None and uses all 13 bands.	`None`
`mask_val`	`int`	The mask value to consider. Default is 9.	`None`

Returns:

Type	Description
	numpy.ndarray: The predicted mask for the image.

Source code in ShallowLearn/ImageHelper.py

def predict_mask(img, model = None,  mask_val=None, estimator = None):
    """
    Predicts the mask for the input image using a pre-trained model.

    Args:
        img (numpy.ndarray): The input image array.
        model (str): The path to the pre-trained model. Default is None and uses all 13 bands.
        mask_val (int): The mask value to consider. Default is 9.

    Returns:
        numpy.ndarray: The predicted mask for the image.

    """
    if model is None:
        loaded_pipeline = joblib.load('../Models/pipeline_pca2_k10.pkl')
    else:
        loaded_pipeline = joblib.load(model)

    if estimator is not None:
        loaded_pipeline = estimator

    img_shape = img.shape
    temp = img.reshape(img_shape[0] * img_shape[1], img_shape[2])
    if mask_val is None:
        pred = loaded_pipeline.predict(temp) 
    else:
        pred = loaded_pipeline.predict(temp) == mask_val
    return pred.reshape(img_shape[0], img_shape[1])

`remove_channel(img, channel)`

Removes a specified channel from a 3D image array.

Parameters:

Name	Type	Description	Default
`img`	`ndarray`	The 3D image array with shape (height, width, channels).	required
`channel`	`int`	The index of the channel to remove.	required

Returns:

Type	Description
	numpy.ndarray: The image array with the specified channel removed.

Raises:

Type	Description
`ValueError`	If the channel index is out of bounds.

Source code in ShallowLearn/ImageHelper.py

def remove_channel(img, channel):
    """
    Removes a specified channel from a 3D image array.

    Args:
        img (numpy.ndarray): The 3D image array with shape (height, width, channels).
        channel (int): The index of the channel to remove.

    Returns:
        numpy.ndarray: The image array with the specified channel removed.

    Raises:
        ValueError: If the channel index is out of bounds.
    """
    if channel < 0 or channel >= img.shape[2]:
        raise ValueError("Channel index is out of bounds.")

    return np.concatenate((img[:, :, :channel], img[:, :, channel+1:]), axis=2)

`select_channels(arr, indices)`

Selects specific channels based on the given indices from a 3D numpy array.

:param arr: A numpy array of shape (x, y, z). :param indices: A list or array-like containing the indices of channels to be selected. Its length must be 3 or it will raise a ValueError. :return: A numpy array of shape (x, y, 3).

Source code in ShallowLearn/ImageHelper.py

def select_channels(arr, indices):
    """
    Selects specific channels based on the given indices from a 3D numpy array.

    :param arr: A numpy array of shape (x, y, z).
    :param indices: A list or array-like containing the indices of channels to be selected.
                    Its length must be 3 or it will raise a ValueError.
    :return: A numpy array of shape (x, y, 3).
    """

    if len(indices) != 3:
        raise ValueError("The length of indices must be 3.")

    return arr[:, :, indices]

`add_nan_buffer(arr, dilation_size=3)`

Adds a buffer around NaN values in the given array using morphological dilation.

Parameters:

Name	Type	Description	Default
`arr`	`ndarray`	The input array.	required
`dilation_size`	`int`	Size of the dilation structure. This controls the thickness of the NaN buffer.	`3`

Returns:

Type	Description
	np.ndarray: The modified array with a buffer around NaN values.

Source code in ShallowLearn/CloudDetector.py

def add_nan_buffer(arr, dilation_size=3):
    """
    Adds a buffer around NaN values in the given array using morphological dilation.

    Parameters:
        arr (np.ndarray): The input array.
        dilation_size (int): Size of the dilation structure. This controls the thickness of the NaN buffer.

    Returns:
        np.ndarray: The modified array with a buffer around NaN values.
    """


    # Define the structure for dilation based on the given dilation size
    structure = np.ones((dilation_size, dilation_size))

    # Perform binary dilation on the NaN mask
    dilated_mask = binary_erosion(arr, structure=structure)

    return dilated_mask

`detect_clouds(datacube, threshold=0.0, window_size=8)`

Detects clouds in a time series of images by comparing each pixel to the mean of the surrounding window.

Parameters: - datacube: 4D numpy array (time, x, y, channels) - window_size: number of images in the sliding window

Returns: - cloud_mask: 4D boolean numpy array (time, x, y, channels) indicating cloud presence for each image

Source code in ShallowLearn/CloudDetector.py

def detect_clouds(datacube, threshold = 0.0, window_size=8):
    """
    Detects clouds in a time series of images by comparing each pixel to the mean of the surrounding window.

    Parameters:
    - datacube: 4D numpy array (time, x, y, channels)
    - window_size: number of images in the sliding window

    Returns:
    - cloud_mask: 4D boolean numpy array (time, x, y, channels) indicating cloud presence for each image
    """
    datacube = np.array(datacube)
    time, x, y, channels = datacube.shape
    half_window = window_size // 2

    # Initialize cloud mask with the same shape as datacube
    cloud_mask = np.zeros((time, x, y, channels), dtype=bool)

    # Pad the datacube to handle the boundaries
    padded_datacube = np.pad(datacube, ((half_window, half_window), (0, 0), (0, 0), (0, 0)), mode='reflect')

    for t in range(time):
        window = padded_datacube[t:t + window_size]
        window_mean = np.mean(window, axis=0)

        cloud_mask[t] = datacube[t] > (window_mean  + threshold)

    return cloud_mask

`color_histogram(image, bins=32)`

Computes the color histogram for each channel in the image.

Purpose: Color histograms represent the distribution of colors in an image and can be used for color-based image analysis.

image: numpy array, shape (height, width, num_channels) The input image array.
bins: int, optional Number of bins for the histogram (default is 32).

hist: numpy array, shape (num_channels, bins) The computed color histograms for each channel.

Source code in ShallowLearn/ComputerVisionFeatures.py

def color_histogram(image, bins=32):
    """
    Computes the color histogram for each channel in the image.

    Purpose:
    Color histograms represent the distribution of colors in an image and can be used for color-based image analysis.

    Parameters:
    - image: numpy array, shape (height, width, num_channels)
      The input image array.
    - bins: int, optional
      Number of bins for the histogram (default is 32).

    Returns:
    - hist: numpy array, shape (num_channels, bins)
      The computed color histograms for each channel.
    """
    num_channels = image.shape[2]
    hist = np.zeros((num_channels, bins))

    for channel in range(num_channels):
        hist[channel, :], _ = np.histogram(image[:, :, channel], bins=bins, range=[0, 256])

    return hist

`edge_density(image)`

Computes the edge density using the Canny Edge Detector.

Purpose: Edge density can be used to quantify the amount of texture or detail in an image, which can be useful in various image processing tasks.

image: numpy array, shape (height, width, num_channels) The input image array.

edge_density_map: numpy array, shape (height, width) The computed edge density.

Source code in ShallowLearn/ComputerVisionFeatures.py

def edge_density(image):
    """
    Computes the edge density using the Canny Edge Detector.

    Purpose:
    Edge density can be used to quantify the amount of texture or detail in an image, which can be useful in various image processing tasks.

    Parameters:
    - image: numpy array, shape (height, width, num_channels)
      The input image array.

    Returns:
    - edge_density_map: numpy array, shape (height, width)
      The computed edge density.
    """
    # Convert image to grayscale if it has multiple channels
    if image.shape[2] > 1:
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    else:
        gray_image = image

    # Apply Canny Edge Detector
    edges = cv2.Canny(gray_image, 50, 200)

    # Compute edge density
    edge_density_map = edges / 255.0
    return edge_density_map

`gabor_features(image, frequency=0.6)`

Applies Gabor filter to an image.

image: numpy array, shape (height, width, num_channels) The input image array.
frequency: float The frequency of the sinusoidal function.

gabor_response: numpy array, shape (height, width) The filtered image.

Source code in ShallowLearn/ComputerVisionFeatures.py

def gabor_features(image, frequency=0.6):
    """
    Applies Gabor filter to an image.

    Parameters:
    - image: numpy array, shape (height, width, num_channels)
      The input image array.
    - frequency: float
      The frequency of the sinusoidal function.

    Returns:
    - gabor_response: numpy array, shape (height, width)
      The filtered image.
    """
    if image.shape[2] > 1:
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # Applying Gabor filter
    gabor_response, _ = gabor(image, frequency=frequency)
    return gabor_response

`histogram_of_oriented_gradients(image, pixels_per_cell=(16, 16), cells_per_block=(4, 4), orientations=9)`

Computes the Histogram of Oriented Gradients (HOG) feature descriptor.

image: numpy array, shape (height, width, num_channels) The input image array.
pixels_per_cell: tuple of int Size (in pixels) of a cell.
cells_per_block: tuple of int Number of cells in each block.
orientations: int Number of orientation bins.

hog_features: numpy array The HOG feature descriptor for the image.

Source code in ShallowLearn/ComputerVisionFeatures.py

def histogram_of_oriented_gradients(image, pixels_per_cell=(16, 16), cells_per_block=(4, 4), orientations=9):
    """
    Computes the Histogram of Oriented Gradients (HOG) feature descriptor.

    Parameters:
    - image: numpy array, shape (height, width, num_channels)
      The input image array.
    - pixels_per_cell: tuple of int
      Size (in pixels) of a cell.
    - cells_per_block: tuple of int
      Number of cells in each block.
    - orientations: int
      Number of orientation bins.

    Returns:
    - hog_features: numpy array
      The HOG feature descriptor for the image.
    """
    # if image.shape[2] > 1:
    #     image = rgb2gray(image)

    fd, hog_features = hog(image, orientations=orientations,channel_axis=-1,  
                           pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, block_norm='L1-sqrt', visualize = True)

    return hog_features

`sobel_edge_detection(image)`

Applies Sobel edge detection to an image.

image: numpy array, shape (height, width, num_channels) The input image array.

edge_image: numpy array, shape (height, width) The edge-detected image.

Source code in ShallowLearn/ComputerVisionFeatures.py

def sobel_edge_detection(image):
    """
    Applies Sobel edge detection to an image.

    Parameters:
    - image: numpy array, shape (height, width, num_channels)
      The input image array.

    Returns:
    - edge_image: numpy array, shape (height, width)
      The edge-detected image.
    """
    # Convert to grayscale if necessary
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if image.shape[2] > 1 else image

    # Apply Sobel edge detection
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
    edge_image = np.sqrt(sobelx**2 + sobely**2)

    return edge_image

`texture_features(image, P=8, R=1)`

Computes texture features using Local Binary Pattern (LBP).

Purpose: LBP is a powerful texture descriptor and can be used for texture classification.

image: numpy array, shape (height, width, num_channels) The input image array.
P: int, optional Number of circularly symmetric neighbor set points (default is 8).
R: int, optional Radius of circle (default is 1).

lbp_texture: numpy array, shape (height, width) The computed texture features using LBP.

Source code in ShallowLearn/ComputerVisionFeatures.py

def texture_features(image, P=8, R=1):
    """
    Computes texture features using Local Binary Pattern (LBP).

    Purpose:
    LBP is a powerful texture descriptor and can be used for texture classification.

    Parameters:
    - image: numpy array, shape (height, width, num_channels)
      The input image array.
    - P: int, optional
      Number of circularly symmetric neighbor set points (default is 8).
    - R: int, optional
      Radius of circle (default is 1).

    Returns:
    - lbp_texture: numpy array, shape (height, width)
      The computed texture features using LBP.
    """
    # Convert image to grayscale if it has multiple channels
    if image.shape[2] > 1:
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    else:
        gray_image = image

    # Apply Local Binary Pattern
    lbp_texture = local_binary_pattern(gray_image, P, R, method="uniform")
    return lbp_texture

`extract_point_spectra(array, x, y)`

Extracts the spectra of a single point (x, y) across all time slices.

Parameters: - array: numpy array of shape (t, x, y, z) - x: x-coordinate of the point - y: y-coordinate of the point

Returns: - spectra: numpy array of shape (t, z)

Source code in ShallowLearn/PointExtraction.py

def extract_point_spectra(array, x, y):
    """
    Extracts the spectra of a single point (x, y) across all time slices.

    Parameters:
    - array: numpy array of shape (t, x, y, z)
    - x: x-coordinate of the point
    - y: y-coordinate of the point

    Returns:
    - spectra: numpy array of shape (t, z)
    """

    if len(array.shape) != 4:
        raise ValueError("Input array should be of shape (t, x, y, z)")

    spectra = array[:, x, y, :]
    return spectra

`BCET(image, min_value=0, max_value=255, desired_mean=110)`

Applies a Bias Correction and Enhancement Technique (BCET) to an image.

This technique stretches the image data with a parabolic function, which transforms the pixel intensity values to a specified range (default: 0 to 255) with a specified mean (default: 110).

Parameters: - image: The input image as a numpy array. - min_value: The minimum pixel intensity value in the output image. - max_value: The maximum pixel intensity value in the output image. - desired_mean: The mean pixel intensity value for the output image.

Returns: - The transformed image as a numpy array with dtype 'int'.

Source code in ShallowLearn/Transform.py

def BCET(image, min_value=0, max_value=255, desired_mean=110):
    """
    Applies a Bias Correction and Enhancement Technique (BCET) to an image.

    This technique stretches the image data with a parabolic function, which transforms
    the pixel intensity values to a specified range (default: 0 to 255) with a specified mean (default: 110).

    Parameters:
    - image: The input image as a numpy array.
    - min_value: The minimum pixel intensity value in the output image.
    - max_value: The maximum pixel intensity value in the output image.
    - desired_mean: The mean pixel intensity value for the output image.

    Returns:
    - The transformed image as a numpy array with dtype 'int'.
    """
    input_min = np.min(image)
    input_max = np.max(image)
    input_mean = np.mean(image)
    input_mean_sq = np.mean(image ** 2)

    parabola_vertex = (
        (input_max ** 2 * (desired_mean - min_value) - input_mean_sq * (max_value - min_value) +
         input_min ** 2 * (max_value - desired_mean)) /
        (2 * (input_max * (desired_mean - min_value) - input_mean * (max_value - min_value) + input_min * (max_value - desired_mean)))
    )

    parabola_coefficient = (max_value - min_value) / ((input_max - input_min) * (input_max + input_min - 2 * parabola_vertex))
    parabola_constant = min_value - parabola_coefficient * (input_min - parabola_vertex) ** 2

    transformed_image = parabola_coefficient * (image - parabola_vertex) ** 2 + parabola_constant

    return transformed_image.astype(int)  # Ensure the output values are integers for plotting

`BCET_multi(image, min_value=0, max_value=255, desired_mean=110)`

Applies BCET to each channel of a multi-channel image separately.

Source code in ShallowLearn/Transform.py

def BCET_multi(image, min_value=0, max_value=255, desired_mean=110):
    """
    Applies BCET to each channel of a multi-channel image separately.
    """
    # Get the number of channels in the image
    num_channels = image.shape[2] if len(image.shape) == 3 else 1

    # Initialize an empty array for the output image
    output_image = np.zeros_like(image)

    # Apply BCET to each channel separately
    for channel in range(num_channels):
        output_image[:, :, channel] = BCET(image[:, :, channel], min_value, max_value, desired_mean)

    return output_image

`LCE_multi(image)`

Applies linear contrast enhancement to each channel of a multi-channel image separately.

Source code in ShallowLearn/Transform.py

def LCE_multi(image):
    """
    Applies linear contrast enhancement to each channel of a multi-channel image separately.
    """
    # Get the number of channels in the image
    num_channels = image.shape[2] if len(image.shape) == 3 else 1

    # Initialize an empty array for the output image
    output_image = np.zeros_like(image)

    # Apply linear contrast enhancement to each channel separately
    for channel in range(num_channels):
        output_image[:, :, channel] = linear_contrast_enhancement(image[:, :, channel])

    return output_image

`hsi_to_rgb(array)`

Convert an array of HSI (Hue, Saturation, Intensity) values to an array of RGB values.

Source code in ShallowLearn/Transform.py

def hsi_to_rgb(array):
    """
    Convert an array of HSI (Hue, Saturation, Intensity) values to an array of RGB values.
    """
    # Extract the HSI values from the array
    h = array[:, :, 0]
    s = array[:, :, 1]
    i = array[:, :, 2]

    # Check if the hue is outside the range [0, 360)
    h = np.where(h < 0, h + 360, h)
    h = np.where(h >= 360, h - 360, h)

    # Check if the saturation is outside the range [0, 1]
    s = np.clip(s, 0, 1)

    # Check if the intensity is outside the range [0, 1]
    i = np.clip(i, 0, 1)

    # Convert the hue to the range [0, 6)
    h = h / 60

    # Calculate the chroma
    c = (1 - np.abs(2*i - 1)) * s

    # Calculate the x value
    x = c * (1 - np.abs(h % 2 - 1))

    # Calculate the m value
    m = i - c/2

    # Calculate the RGB values
    r, g, b = np.zeros_like(h), np.zeros_like(h), np.zeros_like(h)
    idx = np.where((0 <= h) & (h < 1))
    r[idx], g[idx], b[idx] = c[idx], x[idx], 0
    idx = np.where((1 <= h) & (h < 2))
    r[idx], g[idx], b[idx] = x[idx], c[idx], 0
    idx = np.where((2 <= h) & (h < 3))
    r[idx], g[idx], b[idx] = 0, c[idx], x[idx]
    idx = np.where((3 <= h) & (h < 4))
    r[idx], g[idx], b[idx] = 0, x[idx], c[idx]
    idx = np.where((4 <= h) & (h < 5))
    r[idx], g[idx], b[idx] = x[idx], 0, c[idx]
    idx = np.where((5 <= h) & (h < 6))
    r[idx], g[idx], b[idx] = c[idx], 0, x[idx]

    # Add the m value to each RGB value
    r, g, b = r + m, g + m, b + m

    # Convert the RGB values to the range [0, 255]
    r, g, b = (r * 255).astype(np.uint8), (g * 255).astype(np.uint8), (b * 255).astype(np.uint8)

    # Stack the RGB values into a single array
    rgb_array = np.stack((r, g, b), axis=2)

    return rgb_array

`linear_contrast_enhancement(image, max_value=255)`

Applies a Linear Contrast Enhancement (LCE) to an image.

This technique linearly rescales the image pixel intensity values to the full range of possible values (0-255).

Parameters: - image: The input image as a numpy array.

Returns: - The rescaled image as a numpy array.

Raises: - ValueError: If all valid pixel values in the image are the same.

Source code in ShallowLearn/Transform.py

def linear_contrast_enhancement(image, max_value=255):
    """
    Applies a Linear Contrast Enhancement (LCE) to an image.

    This technique linearly rescales the image pixel intensity values to the full range of possible values (0-255).

    Parameters:
    - image: The input image as a numpy array.

    Returns:
    - The rescaled image as a numpy array.

    Raises:
    - ValueError: If all valid pixel values in the image are the same.
    """
    # Identify NaN values
    mask_nan = np.isnan(image)

    # Replace NaN values with 0
    image[mask_nan] = 0

    # Get the minimum value from non-zero elements of the image
    min_intensity = np.min(image[np.nonzero(image)]) + 0.001

    # Get the maximum value from the image
    max_intensity = np.max(image)

    # Check if maximum and minimum are the same
    if max_intensity == min_intensity:
        raise ValueError("Cannot apply linear contrast enhancement: all pixel values in the image are the same.")

    # Apply linear contrast enhancement and clip to keep values within the desired range
    enhanced_image = np.clip((image - min_intensity) * (max_value / (max_intensity - min_intensity)), 0, max_value)

    enhanced_image[mask_nan] = np.nan  # Replace NaN values with NaN

    return enhanced_image

`transform_hsv_stretch(array, max_value=100)`

Transform an Image into HSV space and stretch the contrast of the S channel.

Source code in ShallowLearn/Transform.py

def transform_hsv_stretch(array, max_value = 100):
    """
    Transform an Image into HSV space and stretch the contrast of the S channel.
    """

    hsv_array = plot_hsv(array)
    hsv_array[:, :, 1] = linear_contrast_enhancement(hsv_array[:, :, 1], max_value = max_value)
    rgb_array = hsv2rgb(hsv_array)
    return rgb_array

`transform_lab_stretch(array)`

Transform an Image into LAB space and stretch the contrast of the L channel.

Source code in ShallowLearn/Transform.py

def transform_lab_stretch(array):
    """
    Transform an Image into LAB space and stretch the contrast of the L channel.
    """

    lab_array = plot_lab(array)
    lab_array[:, :, 0] = linear_contrast_enhancement(lab_array[:, :, 0])
    rgb_array = lab2rgb(lab_array)
    return rgb_array

`transform_multiband_hsv(arr, bands=None, max_value=100, mask=None)`

Reindex the bands of a multiband image to match the order of the HSV color space. Input is a multiband array If bands is None, the default is [3,2,1] - where 3 is the Red band, 2 is the Green band, and 1 is the Blue band.

Source code in ShallowLearn/Transform.py

def transform_multiband_hsv(arr, bands = None, max_value = 100, mask = None):
    """
    Reindex the bands of a multiband image to match the order of the HSV color space.
    Input is a multiband array
    If bands is None, the default is [3,2,1] - where 3 is the Red band, 2 is the Green band, and 1 is the Blue band.
    """
    if bands is None:
        bands = [3,2,1]

    arr_copy = arr.copy()
    #convert arr_copy to float64
    arr_copy = arr_copy.astype(np.float64)
    rgb_arr = transform_hsv_stretch(arr_copy, max_value = max_value)

    arr_copy[:,:,bands[0]] = rgb_arr[:,:,0]
    arr_copy[:,:,bands[1]] = rgb_arr[:,:,1]
    arr_copy[:,:,bands[2]] = rgb_arr[:,:,2]
    return arr_copy

`transform_multiband_lab(arr, bands=None)`

Reindex the bands of a multiband image to match the order of the LAB color space. Input is a multiband array If bands is None, the default is [3,2,1] - where 3 is the Red band, 2 is the Green band, and 1 is the Blue band.

Source code in ShallowLearn/Transform.py

def transform_multiband_lab(arr, bands = None):
    """
    Reindex the bands of a multiband image to match the order of the LAB color space.
    Input is a multiband array
    If bands is None, the default is [3,2,1] - where 3 is the Red band, 2 is the Green band, and 1 is the Blue band.
    """
    if bands is None:
        bands = [3,2,1]

    arr_copy = arr.copy()
    #convert arr_copy to float64
    arr_copy = arr_copy.astype(np.float64)
    rgb_arr = transform_lab_stretch(arr_copy)

    arr_copy[:,:,bands[0]] = rgb_arr[:,:,0]
    arr_copy[:,:,bands[1]] = rgb_arr[:,:,1]
    arr_copy[:,:,bands[2]] = rgb_arr[:,:,2]

    return arr_copy

`histogram_matching(source_img, reference_img)`

Match the histogram of the source_img to the histogram of the reference_img.

Source code in ShallowLearn/RadiometricNormalisation.py

def histogram_matching(source_img, reference_img):
    """
    Match the histogram of the source_img to the histogram of the reference_img.
    """
    matched_img = np.empty_like(source_img)
    for band in range(source_img.shape[-1]):
        matched_img[..., band] = exposure.match_histograms(source_img[..., band], reference_img[..., band])
    return matched_img

`histogram_matching_with_plot(source_img, reference_img)`

Match the histogram of the source_img to the histogram of the reference_img. Returns the matched image.

Source code in ShallowLearn/RadiometricNormalisation.py

def histogram_matching_with_plot(source_img, reference_img):
    """
    Match the histogram of the source_img to the histogram of the reference_img.
    Returns the matched image.
    """
    matched_img = np.empty_like(source_img)
    num_bands = source_img.shape[-1]

    for band in range(num_bands):
        matched_img[..., band] = exposure.match_histograms(source_img[..., band], reference_img[..., band])

        # Plot the histograms
        plt.figure(figsize=(12, 4))

        plt.subplot(1, 3, 1)
        plt.hist(source_img[..., band].ravel(), bins=256, color='blue', alpha=0.7, label='Source Histogram')
        plt.hist(reference_img[..., band].ravel(), bins=256, color='green', alpha=0.7, label='Reference Histogram')
        plt.title('Original Histograms - Band {}'.format(band))
        plt.legend()

        plt.subplot(1, 3, 2)
        plt.hist(matched_img[..., band].ravel(), bins=256, color='red', alpha=0.7, label='Matched Histogram')
        plt.title('Matched Histogram - Band {}'.format(band))
        plt.legend()

        plt.subplot(1, 3, 3)
        plt.imshow(matched_img[..., band], cmap='gray')
        plt.title('Matched Image - Band {}'.format(band))

        plt.tight_layout()
        plt.show()

    return matched_img

`pca_based_normalization(source_img, reference_img)`

Radiometric normalization of a source image based on a reference image using PCA. The function takes in two numpy arrays (source_img and reference_img) and returns the normalized source image.

Source code in ShallowLearn/RadiometricNormalisation.py

def pca_based_normalization(source_img, reference_img):
    """
    Radiometric normalization of a source image based on a reference image using PCA.
    The function takes in two numpy arrays (source_img and reference_img) and returns the normalized source image.
    """
    # Ensure source and reference images have the same shape
    assert source_img.shape == reference_img.shape, "The source and reference images must have the same shape."

    # Create an empty array for the normalized source image with the same shape as the source_img
    normalized_img = np.empty_like(source_img)
    # source_img = histogram_matching(source_img, reference_img)
    # Assuming X number of bands, all bands are processed
    for band in range(source_img.shape[-1]):
        src_band = source_img[..., band]
        ref_band = reference_img[..., band]

        # Mask and remove zeros
        src_band_non_zero, ref_band_non_zero = remove_zeros_from_pair(src_band, ref_band)

        # Create a joint dataset
        joint_data = np.vstack((src_band_non_zero, ref_band_non_zero)).T

        # Apply PCA on the joint dataset
        pca = PCA(n_components=1,svd_solver = 'full')
        pca.fit(joint_data)

        # Extract first principal component (PC1)
        pc1 = pca.transform(joint_data)

        # Use PC1 to perform linear regression between source and reference
        slope, intercept, r_value, p_value, std_err = linregress(pc1[:, 0], joint_data[:, 1])
# 
        # print("Band: {}, Slope: {}, Intercept: {}".format(band, slope, intercept))

        # Apply the derived slope (gain) and intercept (offset) to normalize the source band
        normalized_data = linear_contrast_enhancement(src_band.ravel() * slope + intercept).astype(source_img.dtype)
        normalized_img[..., band] = normalized_data.reshape(src_band.shape)

    return normalized_img

`pca_based_normalization_with_points(source_img, reference_img)`

Radiometric normalization of a source image based on a reference image using PCA. Returns the normalized source image and an array marking the points used for PCA.

Source code in ShallowLearn/RadiometricNormalisation.py

def pca_based_normalization_with_points(source_img, reference_img):
    """
    Radiometric normalization of a source image based on a reference image using PCA.
    Returns the normalized source image and an array marking the points used for PCA.
    """
    assert source_img.shape == reference_img.shape, "The source and reference images must have the same shape."
    normalized_img = np.empty_like(source_img)

    # Create an array to mark points used for PCA
    points_used_for_pca = np.zeros(source_img.shape[:2], dtype=bool)

    for band in range(source_img.shape[-1]):
        src_band = source_img[..., band]
        ref_band = reference_img[..., band]

        # Mask and remove zeros
        src_band_non_zero, ref_band_non_zero = remove_zeros_from_pair(src_band, ref_band)

        # Mark the points used for PCA in the mask
        mask = (src_band > 0) & (ref_band > 0)
        if band == 4:

            points_used_for_pca |= mask  # Update mask to include current band's points

        # Create a joint dataset
        joint_data = np.vstack((src_band_non_zero, ref_band_non_zero)).T

        # Apply PCA on the joint dataset
        pca = PCA(n_components=1, svd_solver='full')
        pca.fit(joint_data)

        # Extract first principal component (PC1)
        pc1 = pca.transform(joint_data)

        # Use PC1 to perform linear regression between source and reference
        slope, intercept, r_value, p_value, std_err = linregress(pc1[:, 0], joint_data[:, 1])

        print("Band: {}, Slope: {}, Intercept: {}".format(band, slope, intercept))

        # Normalize the source band
        normalized_data = (src_band.ravel() * slope + intercept).astype(source_img.dtype)
        normalized_img[..., band] = normalized_data.reshape(src_band.shape)

    return normalized_img, points_used_for_pca

`pca_filter_and_normalize(source_img, reference_img, threshold=1.0)`

Radiometric normalization based on a reference image using PCA, but only for the pixels that are close to the major principal component.

source_img : numpy array Source image to be normalized.
reference_img : numpy array Reference image.
threshold : float (optional) Threshold for filtering pixels based on their position relative to the major principal component. Default is 1.0.

Returns: - numpy array : Normalized source image.

Source code in ShallowLearn/RadiometricNormalisation.py

def pca_filter_and_normalize(source_img, reference_img, threshold=1.0):
    """
    Radiometric normalization based on a reference image using PCA, 
    but only for the pixels that are close to the major principal component.

    Parameters:
    - source_img : numpy array
        Source image to be normalized.
    - reference_img : numpy array
        Reference image.
    - threshold : float (optional)
        Threshold for filtering pixels based on their position relative to the major principal component. Default is 1.0.

    Returns:
    - numpy array : Normalized source image.
    """

    # Ensure source and reference images have the same shape
    assert source_img.shape == reference_img.shape, "The source and reference images must have the same shape."

    # Create an empty array for the normalized source image with the same shape as the source_img
    normalized_img = np.empty_like(source_img)
    # source_img = histogram_matching(source_img, reference_img)
    # Process each band
    for band in range(source_img.shape[-1]):
        src_band = source_img[..., band].ravel()
        ref_band = reference_img[..., band].ravel()

        # Create a joint dataset
        joint_data = np.vstack((src_band, ref_band)).T

        # Apply PCA on the joint dataset
        pca = PCA(n_components=2)
        pca.fit(joint_data)

        # Transform the joint data to the PCA space
        transformed_data = pca.transform(joint_data)

        # Extract values in the direction of the major eigenvector
        major_values = transformed_data[:, 1]

        # Filter based on the threshold
        valid_indices = np.logical_and(major_values >= (-threshold), major_values <= threshold)

        # Perform normalization on the valid pixels
        slope, intercept, _, _, _ = linregress(src_band[valid_indices], ref_band[valid_indices])
        normalized_band = linear_contrast_enhancement(src_band * slope + intercept).astype(source_img.dtype).reshape(source_img[..., band].shape)
        print("Band: {}, Slope: {}, Intercept: {}".format(band, slope, intercept))
        # Assign the normalized band to the output image
        normalized_img[..., band] = normalized_band

    return normalized_img

`pca_filter_and_normalize_b8(source_img, reference_img, band_8, threshold=1.0)`

Radiometric normalization based on a reference image using PCA, but only for the pixels that are close to the major principal component.

source_img : numpy array Source image to be normalized.
reference_img : numpy array Reference image.
threshold : float (optional) Threshold for filtering pixels based on their position relative to the major principal component. Default is 1.0.

Returns: - numpy array : Normalized source image.

Source code in ShallowLearn/RadiometricNormalisation.py

def pca_filter_and_normalize_b8(source_img, reference_img,band_8, threshold=1.0):
    """
    Radiometric normalization based on a reference image using PCA, 
    but only for the pixels that are close to the major principal component.

    Parameters:
    - source_img : numpy array
        Source image to be normalized.
    - reference_img : numpy array
        Reference image.
    - threshold : float (optional)
        Threshold for filtering pixels based on their position relative to the major principal component. Default is 1.0.

    Returns:
    - numpy array : Normalized source image.
    """

    # Ensure source and reference images have the same shape
    assert source_img.shape == reference_img.shape, "The source and reference images must have the same shape."

    # Create an empty array for the normalized source image with the same shape as the source_img
    normalized_img = np.empty_like(source_img)
    # source_img = histogram_matching(source_img, reference_img)
    # Process each band
    src_b8 = source_img[..., band_8].ravel()
    ref_b8 = reference_img[..., band_8].ravel()

    for band in range(source_img.shape[-1]):
        src_band = source_img[..., [band]].ravel()
        ref_band = reference_img[..., [band]].ravel()
        src_band = src_band / (src_b8 + 1)
        ref_band = ref_band / (ref_b8 + 1)
        # Create a joint dataset
        joint_data = np.vstack((src_band, ref_band)).T
        joint_data = np.nan_to_num(joint_data)

        # Apply PCA on the joint dataset
        pca = PCA(n_components=2)
        pca.fit(joint_data)

        # Transform the joint data to the PCA space
        transformed_data = pca.transform(joint_data)

        # Extract values in the direction of the major eigenvector
        major_values = transformed_data[:, 1]

        # Filter based on the threshold
        valid_indices = np.logical_and(major_values >= (-threshold), major_values <= threshold)

        # Perform normalization on the valid pixels
        slope, intercept, _, _, _ = linregress(src_band[valid_indices], ref_band[valid_indices])
        normalized_band = linear_contrast_enhancement(src_band * slope + intercept).astype(source_img.dtype).reshape(source_img[..., band].shape)
        print("Band: {}, Slope: {}, Intercept: {}".format(band, slope, intercept))
        # Assign the normalized band to the output image
        normalized_img[..., band] = normalized_band

    return normalized_img

`remove_zeros_from_pair(source_band, reference_band)`

Masks zeros from source_band and reference_band. Returns arrays without zero values.

Source code in ShallowLearn/RadiometricNormalisation.py

def remove_zeros_from_pair(source_band, reference_band):
    """
    Masks zeros from source_band and reference_band.
    Returns arrays without zero values.
    """
    mask = (source_band != 0) & (reference_band != 0)
    return source_band[mask], reference_band[mask]

`QuickLookArea`

Bases: QuickLookModel

Source code in ShallowLearn/QuickLook.py

class QuickLookArea(QuickLookModel):

    def __init__(self, df, shapefile, 
                 band_mapping =  ['B02', 'B03', 'B04', 'B08'], resolution = "10m", stretch_type = trf.LCE_multi):
        self.df = df[df.Label == 0]
        self.stretch_type = stretch_type
        # self.df = df[(df.Label == 0) | (df.Label == -1)]
        self.files = self.df.FILE_PATH.to_list()
        self.PVI = False
        # print(self.files)
        # self.files = fp.extract_MTD_files(directory)
        self.shapefile = shapefile
        self.imagery = self.load_data(band_mapping, resolution)
        print("Data loading finished")
        components = 0.95
        self.pca_model = PCA(n_components = components)  
        self.df = self.filter_dataframe_by_file_path(df, self.files)
        self.transformed_data = self.train()
        self.labels = self.predict()
        self.df['Label'] = self.labels

    def train(self):
        transformed_imagery = np.array(self.imagery).reshape(len(self.imagery), -1) / 10_000
        transformed_data = self.pca_model.fit_transform(transformed_imagery)
        return transformed_data

    def load_data(self, band_mapping = ['B02', 'B03', 'B04', 'B08'], resolution = "10m"):
        imagery = []
        self.updated_files = []
        for file in tqdm(self.files, desc="Processing files"):
            # print(file)
            image = load_sen2(file)
            clipped = image.clip_raster_with_shape(self.shapefile, resolution,
                                                   selected_bands=band_mapping, use_mask=False)
            # bit hacky - fix in dataloader
            if "N0400" in file or "N0500" in file or "N0509" in file or "N0510" in file:
                clipped -= 1000
            try:
                clipped = np.swapaxes(clipped, 0, 2)
                if self.stretch_type is not None:
                    clipped = self.stretch_type(clipped)

                clipped = clip_image(clipped, clip_percent=2)
                # clipped = trf.LCE_multi(clipped)
                imagery.append(clipped)
                self.updated_files.append(file)
            except:
                print(f"{file} failed to transform")

        self.files = self.updated_files
        return imagery

    def generate_dataframe(self):
        return self.df

    def filter_dataframe_by_file_path(self, df, valid_file_paths):
        """
        Drops rows from a DataFrame based on values in the 'FILE_PATH' column 
        that are not present in the provided list of valid file paths.

        Parameters:
        - df: The DataFrame to filter.
        - valid_file_paths: A list of valid file paths. Rows with 'FILE_PATH' not in this list will be dropped.

        Returns:
        - A filtered DataFrame with only rows that have 'FILE_PATH' values in the valid_file_paths list.
        """
        filtered_df = df[df['FILE_PATH'].isin(valid_file_paths)]
        return filtered_df

    def predict(self, model = None, n_components = 4):
        if model is None:
            model = GaussianMixture(n_components=n_components, random_state=42)

            self.df['Label'] = model.fit_predict(self.transformed_data)
            return self.df['Label']

`filter_dataframe_by_file_path(df, valid_file_paths)`

Drops rows from a DataFrame based on values in the 'FILE_PATH' column that are not present in the provided list of valid file paths.

Parameters: - df: The DataFrame to filter. - valid_file_paths: A list of valid file paths. Rows with 'FILE_PATH' not in this list will be dropped.

Returns: - A filtered DataFrame with only rows that have 'FILE_PATH' values in the valid_file_paths list.

Source code in ShallowLearn/QuickLook.py

def filter_dataframe_by_file_path(self, df, valid_file_paths):
    """
    Drops rows from a DataFrame based on values in the 'FILE_PATH' column 
    that are not present in the provided list of valid file paths.

    Parameters:
    - df: The DataFrame to filter.
    - valid_file_paths: A list of valid file paths. Rows with 'FILE_PATH' not in this list will be dropped.

    Returns:
    - A filtered DataFrame with only rows that have 'FILE_PATH' values in the valid_file_paths list.
    """
    filtered_df = df[df['FILE_PATH'].isin(valid_file_paths)]
    return filtered_df

`QuickLookModel`

Abstract base class implementation do not use

Source code in ShallowLearn/QuickLook.py

class QuickLookModel():
    """Abstract base class implementation do not use"""
    def __init__(self, files, model = None):
        self.PVI = None
        self.imagery = []
        self.pca_model = PCA()


    def create_custom_pastel_cmap(self, labels):
        """
        Create a custom colormap using a pastel theme for the given labels.

        Parameters:
        - labels: array-like of unique labels

        Returns:
        - custom_cmap: ListedColormap with tab20c colors
        """
        tab20_colors = plt.cm.Set2(np.linspace(0, 1, len(labels)))
        custom_cmap = ListedColormap(tab20_colors)
        return custom_cmap

    def load_data(self):
        pass

    def train(self):
        transformed_imagery = np.array(self.imagery).reshape(len(self.imagery), -1)/255
        transformed_data = self.pca_model.fit_transform(transformed_imagery)
        return transformed_data

    def predict(self, transformed_data, model = None):
        if model is None:
            dbscan_model = DBSCAN(eps=30, min_samples=9)
            dbscan_model.fit(transformed_data)
            return dbscan_model.labels_ 

    def generate_dataframe(self):
        raise Exception("Not implemented in baseclass")


    def plot_cloud_coverage(self, df, zoom = 0.05):
        if self.PVI:
            custom_cmap = ListedColormap(['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])  # Four distinct colors
        else:
            label_list = (df.Label.unique())
            custom_cmap = self.create_custom_pastel_cmap(label_list)
        df['DATATAKE_1_DATATAKE_SENSING_START'] = pd.to_datetime(df['DATATAKE_1_DATATAKE_SENSING_START'], errors='coerce')

        # Plotting
        fig, ax = plt.subplots(figsize=(12, 6))

        # Formatting date on x-axis
        ax.xaxis.set_major_locator(mdates.YearLocator())
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
        ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b'))

        # Adding labels and title
        ax.set_xlabel('Sensing Start Date', fontsize=14)
        ax.set_ylabel('Cloud Coverage Assessment', fontsize=14)
        ax.set_title('Cloud Coverage Assessment over Time', fontsize=16)

        # Adding a grid
        ax.grid(True, which='both', linestyle='--', linewidth=0.5)

        # Adding images on the plot with a border corresponding to the label color
        for x, y, img, label in zip(df['DATATAKE_1_DATATAKE_SENSING_START'], df['CLOUD_COVERAGE_ASSESSMENT'], self.imagery, df['Label']):
            if self.PVI == False:
                img = img[:,:,[2,1,0]]
            imagebox = OffsetImage(img, zoom=zoom, alpha=0.783)  # Adjust the zoom parameter and alpha for transparency
            ab = AnnotationBbox(imagebox, (mdates.date2num(x), y),
                                frameon=True,
                                bboxprops=dict(edgecolor=custom_cmap(label+1), linewidth=2))  # Offset label by 1 to handle -1
            ax.add_artist(ab)

        # Scatter plot with discrete colors on top of the images
        scatter = ax.scatter(df['DATATAKE_1_DATATAKE_SENSING_START'], df['CLOUD_COVERAGE_ASSESSMENT'], 
                             c=df['Label'], cmap=custom_cmap, edgecolor='black', linewidth=1, s=50)

        # Adding a color bar with discrete labels
        cbar = plt.colorbar(scatter, ax=ax, ticks=np.arange(-1, 3))
        cbar.set_label('Label', fontsize=12)
        if self.PVI:
            cbar.set_ticks([-1, 0, 1, 2])  # Ensure all four labels are present
            cbar.set_ticklabels(['Partially Cloudy', 'Clear Sky', 'Opaque Clouds', 'No Data'])  # Custom labels for classes

        # Improving the overall appearance
        plt.tight_layout()

        # Show plot
        plt.show()

    def plot_principal_components(self, df, plot_meshgrid=True, zoom = 0.05):
        if self.PVI:
            custom_cmap = ListedColormap(['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])  # Four distinct colors
            num = 1
        else:
            label_list = (df.Label.unique())
            custom_cmap = self.create_custom_pastel_cmap(label_list)
            num = 0
        background_cmap = plt.cm.viridis  # Warm and cool colormap

        # Perform PCA on the imagery data
        pca_result = self.pca_model.transform([img.flatten() for img in self.imagery])

        # Plotting
        fig, ax = plt.subplots(figsize=(15, 15))

        if plot_meshgrid:
            # Create a meshgrid for the background
            x_min, x_max = pca_result[:, 0].min() - 1, pca_result[:, 0].max() + 1
            y_min, y_max = pca_result[:, 1].min() - 1, pca_result[:, 1].max() + 1
            xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))

            # Flatten the meshgrid arrays and create a PCA input format
            grid_points = np.c_[xx.ravel(), yy.ravel()]

            # Create a dummy array with the correct number of components
            dummy_points = np.zeros((grid_points.shape[0], self.pca_model.n_components_))
            dummy_points[:, :2] = grid_points

            # Inverse transform to the original space
            inverse_transformed_points = self.pca_model.inverse_transform(dummy_points)

            # Predict or interpolate the cloud coverage for the grid points
            cloud_coverage_grid = griddata(pca_result[:, :2], df['CLOUD_COVERAGE_ASSESSMENT'], (xx, yy), method='linear')

            # Plot the background meshgrid
            c = ax.imshow(cloud_coverage_grid, extent=(x_min, x_max, y_min, y_max), origin='lower', cmap=background_cmap, alpha=0.5)

            # Adding a color bar for the background
            cbar_bg = plt.colorbar(c, ax=ax, fraction=0.01, pad = 0.1)
            cbar_bg.set_label('Cloud Coverage Assessment', fontsize=12)

        # Adding labels and title
        ax.set_xlabel('Principal Component 1', fontsize=14)
        ax.set_ylabel('Principal Component 2', fontsize=14)
        ax.set_title('Principal Component Analysis of Imagery with Cloud Coverage', fontsize=16)

        # Adding a grid
        ax.grid(True, which='both', linestyle='--', linewidth=0.5)

        # Adding images on the plot with a border corresponding to the label color
        for pc1, pc2, img, label in zip(pca_result[:, 0], pca_result[:, 1], self.imagery, df['Label']):
            if self.PVI == False:
                img = img[:,:,[2,1,0]]
            imagebox = OffsetImage(img, zoom=zoom, alpha=0.9)  # Adjust the zoom parameter and alpha for transparency
            ab = AnnotationBbox(imagebox, (pc1, pc2),
                                frameon=True,
                                bboxprops=dict(edgecolor=custom_cmap(label + num), linewidth=2), 
                                box_alignment=(0.5, 0.5))  # Align images to the center
            ax.add_artist(ab)

        # Scatter plot with discrete colors on top of the images
        scatter = ax.scatter(pca_result[:, 0], pca_result[:, 1], 
                            c=df['Label'], cmap=custom_cmap, edgecolor='black', linewidth=1, s=50)

        # Adding a color bar with discrete labels
        cbar = plt.colorbar(scatter, ax=ax, ticks=np.arange(-1, 3), fraction=0.01, pad=0.01)
        # cbar.set_label('Label', fontsize=12)
        if self.PVI:
            cbar.set_ticks([-1, 0, 1, 2])  # Ensure all four labels are present
            cbar.set_ticklabels(['Partially Cloudy', 'Clear Sky', 'Opaque Clouds', 'No Data'])  # Custom labels for classes
        else:
            cbar.set_ticks(df.Label.unique())
        # Ensure images at the border are either partially hidden or within the figure
        ax.set_xlim([pca_result[:, 0].min() - 1, pca_result[:, 0].max() + 1])
        ax.set_ylim([pca_result[:, 1].min() - 1, pca_result[:, 1].max() + 1])

        # Improving the overall appearance
        plt.tight_layout()

        # Show plot
        plt.show()

`create_custom_pastel_cmap(labels)`

Create a custom colormap using a pastel theme for the given labels.

Parameters: - labels: array-like of unique labels

Returns: - custom_cmap: ListedColormap with tab20c colors

Source code in ShallowLearn/QuickLook.py

def create_custom_pastel_cmap(self, labels):
    """
    Create a custom colormap using a pastel theme for the given labels.

    Parameters:
    - labels: array-like of unique labels

    Returns:
    - custom_cmap: ListedColormap with tab20c colors
    """
    tab20_colors = plt.cm.Set2(np.linspace(0, 1, len(labels)))
    custom_cmap = ListedColormap(tab20_colors)
    return custom_cmap

`plot_images_on_pca(transformed_data, original_images, zoom=0.1, figsize=(10, 10), title='Visualization of Transformed Imagery')`

Plots images on their respective PCA-transformed coordinates.

Parameters: - transformed_data: numpy array of shape (n_samples, 2) containing PCA-transformed coordinates. - original_images: numpy array of shape (n_samples, height, width, 3) containing the original images. - zoom: float, the zoom level of the images on the plot. - figsize: tuple, the size of the figure.

Source code in ShallowLearn/QuickLook.py

def plot_images_on_pca(transformed_data, original_images, zoom=0.1, figsize=(10, 10), title = "Visualization of Transformed Imagery"):
    """
    Plots images on their respective PCA-transformed coordinates.

    Parameters:
    - transformed_data: numpy array of shape (n_samples, 2) containing PCA-transformed coordinates.
    - original_images: numpy array of shape (n_samples, height, width, 3) containing the original images.
    - zoom: float, the zoom level of the images on the plot.
    - figsize: tuple, the size of the figure.
    """
    fig, ax = plt.subplots(figsize=figsize)
    for i in range(len(transformed_data)):
        original_dtype = original_images[i].dtype

        shape = original_images[i].shape
        # Adjust the call to `resize` to ensure it matches the original data type and range
        # Note: `anti_aliasing` is generally a good idea when downsampling images
        img = resize(original_images[i], 
                     output_shape=(int(shape[0] * zoom), int(shape[1] * zoom)), 
                     anti_aliasing=True, preserve_range=True).astype(original_dtype)
        # print(img.shape)
        # plt.imshow(img)
        # plt.show()
        # break
        imagebox = OffsetImage(np.array(img), zoom=zoom)
        ab = AnnotationBbox(imagebox, (transformed_data[i, 0], transformed_data[i, 1]), frameon=False)
        ax.add_artist(ab)

    ax.scatter(transformed_data[:, 0], transformed_data[:, 1], alpha=0.2)  # Plot transparent points to keep the scale
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.title(title)
    # plt.savefig("../Graphs/PVI_PCA2.svg")
    # plt.show()
    return ax

`clip_image(image, clip_percent=1)`

Apply a 2% clip on either side of the pixel value distribution for a single image.

Parameters: - image: numpy array representing the image - clip_percent: percentage to clip on each side of the pixel value distribution (default is 2%)

Returns: - clipped_image: numpy array with clipped pixel values

Source code in ShallowLearn/Util.py

def clip_image(image, clip_percent=1):
    """
    Apply a 2% clip on either side of the pixel value distribution for a single image.

    Parameters:
    - image: numpy array representing the image
    - clip_percent: percentage to clip on each side of the pixel value distribution (default is 2%)

    Returns:
    - clipped_image: numpy array with clipped pixel values
    """
    # Calculate the lower and upper clip values
    lower_clip = np.percentile(image, clip_percent)
    upper_clip = np.percentile(image, 100 - clip_percent)

    # Clip the image
    clipped_image = np.clip(image, lower_clip, upper_clip)

    # Normalize the clipped image to the range [0, 1]
    clipped_image = (clipped_image - lower_clip) / ((upper_clip - lower_clip) + 0.001)

    return clipped_image

`convert_data_types(df)`

Convert columns in a DataFrame from string to the most appropriate datatype (numeric, datetime, or string) by checking if the content fits known date formats, then checking for numeric values, and defaulting to string if neither fits. Also, consolidates columns with 'FLAG' in their name with corresponding columns without 'FLAG'.

:param df: pandas DataFrame with all columns as strings :return: DataFrame with converted datatypes

Source code in ShallowLearn/Util.py

def convert_data_types(df):
    """
    Convert columns in a DataFrame from string to the most appropriate datatype
    (numeric, datetime, or string) by checking if the content fits known date formats,
    then checking for numeric values, and defaulting to string if neither fits.
    Also, consolidates columns with 'FLAG' in their name with corresponding columns
    without 'FLAG'.

    :param df: pandas DataFrame with all columns as strings
    :return: DataFrame with converted datatypes
    """
    # Consolidate columns with similar names where one contains 'FLAG'
    flag_columns = [col for col in df.columns if 'FLAG' in col]
    for flag_col in flag_columns:
        base_col = flag_col.replace('_FLAG', '')
        if base_col in df.columns:
            # Combine data prioritizing non-FLAG data if not NaN
            df[base_col] = df[base_col].combine_first(df[flag_col])
            df.drop(flag_col, axis=1, inplace=True)  # Remove the FLAG column

    # Convert data types
    for column in df.columns:
        # First check for date by trying common date formats
        if pd.to_datetime(df[column], errors='coerce', format='%Y-%m-%d').notna().all():
            df[column] = pd.to_datetime(df[column], format='%Y-%m-%d')
            print(f"Column '{column}' converted to datetime.")
        elif pd.to_datetime(df[column], errors='coerce', format='%m/%d/%Y').notna().all():
            df[column] = pd.to_datetime(df[column], format='%m/%d/%Y')
            print(f"Column '{column}' converted to datetime.")
        else:
            # Attempt to convert to numeric if it's not a recognizable date
            try:
                df[column] = pd.to_numeric(df[column])
                print(f"Column '{column}' converted to numeric.")
            except ValueError:
                print(f"Column '{column}' retained as string.")  # Retain as string if it's neither datetime nor numeric

    return df

`plot_cloud_coverage_over_time(df)`

Plot cloud coverage assessment over time.

:param df: DataFrame containing the cloud coverage and date of data take.

Source code in ShallowLearn/Util.py

def plot_cloud_coverage_over_time(df):
    """
    Plot cloud coverage assessment over time.

    :param df: DataFrame containing the cloud coverage and date of data take.
    """
    # Convert the date column to datetime if not already done
    df['DATATAKE_1_DATATAKE_SENSING_START'] = pd.to_datetime(df['DATATAKE_1_DATATAKE_SENSING_START'])

    # Sort DataFrame by date for better plotting
    df = df.sort_values('DATATAKE_1_DATATAKE_SENSING_START')

    # Creating the plot
    plt.figure(figsize=(10, 6))
    sns.lineplot(x='DATATAKE_1_DATATAKE_SENSING_START', y='CLOUD_COVERAGE_ASSESSMENT', data=df)
    plt.title('Cloud Coverage Assessment Over Time')
    plt.xlabel('Date of Data Take')
    plt.ylabel('Cloud Coverage (%)')
    plt.xticks(rotation=45)  # Rotate date labels for better visibility
    plt.tight_layout()  # Adjust layout to make room for date labels
    plt.show()

`plot_cloud_coverage_over_time_with_baseline(df)`

Plot cloud coverage over time, color-coded by processing baseline.

:param df: DataFrame containing the cloud coverage, date of data take, and processing baseline.

Source code in ShallowLearn/Util.py

def plot_cloud_coverage_over_time_with_baseline(df):
    """
    Plot cloud coverage over time, color-coded by processing baseline.

    :param df: DataFrame containing the cloud coverage, date of data take, and processing baseline.
    """
    # Ensure 'DATATAKE_1_DATATAKE_SENSING_START' is in datetime format
    df['DATATAKE_1_DATATAKE_SENSING_START'] = pd.to_datetime(df['DATATAKE_1_DATATAKE_SENSING_START'])

    # Ensure 'PROCESSING_BASELINE' is numeric if not already
    df['PROCESSING_BASELINE'] = pd.to_numeric(df['PROCESSING_BASELINE'], errors='coerce')

    # Creating the plot
    plt.figure(figsize=(12, 8))
    sns.scatterplot(x='DATATAKE_1_DATATAKE_SENSING_START', y='CLOUD_COVERAGE_ASSESSMENT', 
                 hue='PROCESSING_BASELINE', palette='viridis', data=df)

    plt.title('Cloud Coverage Assessment Over Time by Processing Baseline')
    plt.xlabel('Date of Data Take')
    plt.ylabel('Cloud Coverage (%)')
    plt.legend(title='Processing Baseline', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks(rotation=45)
    plt.tight_layout()  # Adjust layout to make room for legend and date labels
    plt.show()

`plot_images_on_scatter(transformed_data, imagery, image_fraction=0.11, title='Images on scatter')`

Plots images on a scatter plot at specified coordinates.

Parameters: - transformed_data: np.array, an array of coordinates (shape [n, 2]). - imagery: list, a list of images in ndarray format. - image_fraction: float, the fraction of the plot range to determine the size of the images.

Source code in ShallowLearn/Util.py

def plot_images_on_scatter(transformed_data, imagery, image_fraction=0.11, title = "Images on scatter"):
    """
    Plots images on a scatter plot at specified coordinates.

    Parameters:
    - transformed_data: np.array, an array of coordinates (shape [n, 2]).
    - imagery: list, a list of images in ndarray format.
    - image_fraction: float, the fraction of the plot range to determine the size of the images.
    """
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.scatter(transformed_data[:, 0], transformed_data[:, 1])

    for i in range(len(imagery)):
        # Load and process the image
        if imagery[i].shape[-1] == 1:
            image_data = imagery[i][:,:,:]
        else:
            image_data = imagery[i][:,:,[0,1,2]]
        # Get the corresponding scatter point
        x, y = transformed_data[i, 0], transformed_data[i, 1]

        # Create an OffsetImage and AnnotationBbox
        imagebox = OffsetImage(image_data, zoom=image_fraction)
        ab = AnnotationBbox(imagebox, (x, y), frameon=False)

        # Add the AnnotationBbox to the plot
        ax.add_artist(ab)
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.title(title)
    plt.show()

`plot_images_with_info(all_images, band_order=[0, 1, 2])`

Plots 10 random images and their coordinates info on subplots from a larger list of images.

all_images: list of lists, where each inner list contains [image_data, x, y, z] image_data is an ndarray representing the image, x and y are coordinates, and z is the number of channels.

Source code in ShallowLearn/Util.py

def plot_images_with_info(all_images, band_order = [0,1,2]):
    """
    Plots 10 random images and their coordinates info on subplots from a larger list of images.

    Parameters:
    - all_images: list of lists, where each inner list contains [image_data, x, y, z]
      image_data is an ndarray representing the image, x and y are coordinates, and z is the number of channels.
    """
    # Check if there are at least 10 images
    if len(all_images) < 10:
        raise ValueError("Not enough images available to select 10 random ones.")

    # Select 10 random indices from the list
    random_indices = np.random.choice(len(all_images), size=10, replace=False)
    selected_images = [all_images[i] for i in random_indices]

    # Set the number of subplots
    fig, axes = plt.subplots(1, 10, figsize=(20, 4))  # Adjust figure size as needed

    for i, image in enumerate(selected_images):
        ax = axes[i] if len(selected_images) > 1 else axes  # Handle the case of a single subplot

        # Display the image
        if image.shape[-1] == 1:
            ax.imshow(image)
        else:
            ax.imshow(image[:,:,band_order])
        ax.axis('off')  # Turn off axis


    plt.tight_layout()
    plt.show()

`plot_quality_over_time(df)`

Plot various quality flags over time using a 4-axis subplot.

:param df: DataFrame containing the quality flags and date of data take.

Source code in ShallowLearn/Util.py

def plot_quality_over_time(df):
    """
    Plot various quality flags over time using a 4-axis subplot.

    :param df: DataFrame containing the quality flags and date of data take.
    """
    # Convert 'PRODUCT_START_TIME' to datetime if it's not already
    df['PRODUCT_START_TIME'] = pd.to_datetime(df['PRODUCT_START_TIME'])

    # Setting up the figure and axes
    fig, axs = plt.subplots(4, 1, figsize=(12, 18), sharex=True)

    # Plotting each quality flag
    quality_columns = ['GENERAL_QUALITY', 'GEOMETRIC_QUALITY', 
                       'RADIOMETRIC_QUALITY', 'SENSOR_QUALITY']
    titles = ['General Quality', 'Geometric Quality', 'Radiometric Quality', 'Sensor Quality']

    for i, quality in enumerate(quality_columns):
        sns.lineplot(x='PRODUCT_START_TIME', y=quality, data=df, ax=axs[i], marker='o')
        axs[i].set_title(titles[i])
        axs[i].set_ylabel('Quality Score')

    # Setting common labels
    plt.xlabel('Date of Data Take')
    plt.xticks(rotation=45)
    plt.tight_layout()  # Adjust layout to make room for label
    plt.show()

`plot_quality_over_time_side_by_side(df)`

Plot various quality flags over time using side-by-side subplots, enhanced for publication quality.

:param df: DataFrame containing the quality flags and date of data take.

Source code in ShallowLearn/Util.py

def plot_quality_over_time_side_by_side(df):
    """
    Plot various quality flags over time using side-by-side subplots, enhanced for publication quality.

    :param df: DataFrame containing the quality flags and date of data take.
    """
    # Ensure 'PRODUCT_START_TIME' is in datetime format
    df['PRODUCT_START_TIME'] = pd.to_datetime(df['PRODUCT_START_TIME'])

    # Setting the style of the plot
    sns.set(style="whitegrid")  # Using a white grid for a clean layout
    plt.rcParams['font.size'] = 12  # Base font size
    plt.rcParams['axes.labelsize'] = 14  # Axis label size
    plt.rcParams['axes.titlesize'] = 16  # Axis title size
    plt.rcParams['xtick.labelsize'] = 12  # X tick label size
    plt.rcParams['ytick.labelsize'] = 12  # Y tick label size
    plt.rcParams['legend.fontsize'] = 12  # Legend font size

    # Setting up the figure and axes
    fig, axs = plt.subplots(1, 4, figsize=(24, 8), dpi=300)  # 1 row, 4 columns, high resolution for publication

    # Plotting each quality flag
    quality_columns = ['GENERAL_QUALITY', 'GEOMETRIC_QUALITY', 
                       'RADIOMETRIC_QUALITY', 'SENSOR_QUALITY']
    titles = ['General Quality', 'Geometric Quality', 'Radiometric Quality', 'Sensor Quality']
    colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']  # Color palette for differentiation

    for i, quality in enumerate(quality_columns):
        sns.lineplot(x='PRODUCT_START_TIME', y=quality, data=df, ax=axs[i], marker='o', color=colors[i])
        axs[i].set_title(titles[i])
        axs[i].set_xlabel('Date of Data Take')
        axs[i].set_ylabel('Quality Score')
        axs[i].xaxis.set_major_locator(mdates.YearLocator())  # Set major ticks to every year
        axs[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Only show the year in the tick label
        axs[i].tick_params(axis='x', rotation=45)  # Optionally rotate tick labels for better visibility

    # Enhancing the overall aesthetics
    plt.tight_layout(pad=3.0)  # Adjust layout to make room for label
    plt.show()