Image segmentation Techniques in Python

Image Segmentation Tutorial: Watershed & Graph Cut

This tutorial demonstrates two powerful image segmentation techniques using Python and OpenCV. In the first part, you'll learn how to segment an image using the Watershed algorithm – a process that includes grayscale conversion, Gaussian blurring, thresholding, and distance transformation. In the second part, the Graph Cut algorithm is applied to segment an image by separating the foreground and background using a cost function based on edge weights.

This tutorial is based on the experiment documented in :contentReference[oaicite:0]{index=0}.

Task 1: Watershed Algorithm Segmentation

The Watershed algorithm is used to separate overlapping objects in an image. In this example, we perform the following steps:

Load the image using OpenCV
Preprocess the image by converting it to grayscale, applying Gaussian blur, and thresholding
Compute the distance transform and segment the image using markers
Extract contours and overlay them on the original image

Step 1: Import Required Libraries

import cv2
import numpy as np
import logging
from matplotlib import pyplot as plt
from IPython.display import Image, display

Step 2: Define Helper Functions

The following functions handle image display, loading, preprocessing, segmentation, and contour extraction:

def imshow(img, ax=None):
    # Display an image using IPython or matplotlib axes
    if ax is None:
        ret, encoded = cv2.imencode(".jpg", img)
        if ret:
            display(Image(encoded))
        else:
            logging.error("Failed to encode image for display.")
    else:
        ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        ax.axis('off')

def load_image(image_path: str) -> np.ndarray:
    img = cv2.imread(image_path)
    if img is None:
        logging.error(f"Failed to load image from path: {image_path}")
    else:
        logging.info(f"Image loaded successfully from {image_path}")
    return img

def preprocess_image(img: np.ndarray) -> dict:
    # Convert image to grayscale and apply Gaussian blur
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3, 3), 0)
    # Apply thresholding using Otsu's method with binary inversion
    ret, bin_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    return {'gray': gray, 'blur': blur, 'binary': bin_img}

def segment_image(bin_img: np.ndarray) -> tuple:
    kernel = np.ones((3, 3), np.uint8)
    sure_bg = cv2.dilate(bin_img, kernel, iterations=3)
    dist = cv2.distanceTransform(bin_img, cv2.DIST_L2, 5)
    ret, sure_fg = cv2.threshold(dist, 0.5 * dist.max(), 255, cv2.THRESH_BINARY)
    sure_fg = sure_fg.astype(np.uint8)
    unknown = cv2.subtract(sure_bg, sure_fg)
    ret, markers = cv2.connectedComponents(sure_fg)
    markers += 1
    markers[unknown == 255] = 0
    return markers, unknown, sure_bg, dist, sure_fg

def apply_watershed(img: np.ndarray, markers: np.ndarray) -> np.ndarray:
    markers = cv2.watershed(img, markers)
    return markers

def extract_contours(img: np.ndarray, markers: np.ndarray) -> list:
    coins = []
    labels = np.unique(markers)
    for label in labels[2:]:
        target = np.where(markers == label, 255, 0).astype(np.uint8)
        contours, hierarchy = cv2.findContours(target, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours:
            coins.append(contours[0])
    return coins

def draw_contours(img: np.ndarray, contours: list) -> np.ndarray:
    img_contours = cv2.drawContours(img.copy(), contours, -1, color=(0, 23, 223), thickness=2)
    return img_contours

Step 3: Main Watershed Segmentation Function

def main_watershed(image_path: str):
    # Load image and preprocess it
    img = load_image(image_path)
    if img is None:
        logging.error("Exiting: image could not be loaded.")
        return
    processed = preprocess_image(img)
    
    # Segment image to obtain markers for watershed
    markers, unknown, sure_bg, dist, sure_fg = segment_image(processed['binary'])
    
    # Visualize segmentation steps (background, distance transform, foreground, unknown)
    fig, axes = plt.subplots(2, 2, figsize=(10, 10))
    axes[0, 0].imshow(sure_bg, cmap='gray'); axes[0, 0].set_title('Sure Background'); axes[0, 0].axis('off')
    axes[0, 1].imshow(dist, cmap='gray'); axes[0, 1].set_title('Distance Transform'); axes[0, 1].axis('off')
    axes[1, 0].imshow(sure_fg, cmap='gray'); axes[1, 0].set_title('Sure Foreground'); axes[1, 0].axis('off')
    axes[1, 1].imshow(unknown, cmap='gray'); axes[1, 1].set_title('Unknown Region'); axes[1, 1].axis('off')
    plt.tight_layout(); plt.show()
    
    # Apply watershed and extract contours
    markers = apply_watershed(img, markers)
    contours = extract_contours(img, markers)
    coins_img = draw_contours(img, contours)
    
    # Display final results
    plt.figure(figsize=(6, 6))
    plt.imshow(markers, cmap="tab20b")
    plt.title("Markers after Watershed")
    plt.axis('off'); plt.show()
    
    plt.figure(figsize=(6, 6))
    plt.imshow(cv2.cvtColor(coins_img, cv2.COLOR_BGR2RGB))
    plt.title("Final Image with Contours")
    plt.axis('off'); plt.show()

if __name__ == "__main__":
    image_file = "bt1.jpg"  
    main_watershed(image_file)

Task 2: Graph Cut Segmentation

The Graph Cut algorithm segments an image into foreground and background by minimizing a cost function that sums the weights of the edges between segmented regions. In this example, the process involves:

Preprocessing the image (grayscale conversion, Gaussian blur, and thresholding)
Building a graph where each pixel is a node
Defining data and smoothness terms as edge weights
Running the maxflow algorithm to obtain a binary segmentation mask

Step 1: Import Additional Library

import maxflow

Step 2: Define the Graph Cut Segmentation Function

def graph_cut_segmentation(image_path, mu=100, sigma=20):
    # Load image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError("Error loading image. Check the file path.")
    image = image.astype(np.uint8)
    blur = cv2.GaussianBlur(image, (3, 3), 0)
    ret, image = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    image = image.astype(np.float32)
    
    # Create a graph and add grid nodes
    graph = maxflow.GraphFloat()
    nodeids = graph.add_grid_nodes(image.shape)
    
    # Data term: Compute foreground and background likelihoods
    intensity_mean_fg = np.mean(image) + mu
    intensity_mean_bg = np.mean(image) - mu
    fg_likelihood = np.exp(-((image - intensity_mean_fg) ** 2) / (2 * sigma ** 2))
    bg_likelihood = np.exp(-((image - intensity_mean_bg) ** 2) / (2 * sigma ** 2))
    fg_likelihood = np.clip(fg_likelihood, 1e-10, 1)
    bg_likelihood = np.clip(bg_likelihood, 1e-10, 1)
    
    # Set t-edges for each pixel
    for y in range(image.shape[0]):
        for x in range(image.shape[1]):
            node_id = nodeids[y, x]
            graph.add_tedge(node_id, -np.log(bg_likelihood[y, x]), -np.log(fg_likelihood[y, x]))
    
    # Smoothness term: Link neighboring pixels
    for y in range(image.shape[0]):
        for x in range(image.shape[1]):
            node_id = nodeids[y, x]
            if x < image.shape[1] - 1:
                neighbor_id = nodeids[y, x + 1]
                weight = 10 * np.exp(-((float(image[y, x]) - float(image[y, x + 1])) ** 2) / (2 * sigma ** 2))
                graph.add_edge(node_id, neighbor_id, weight, weight)
            if y < image.shape[0] - 1:
                neighbor_id = nodeids[y + 1, x]
                weight = 10 * np.exp(-((float(image[y, x]) - float(image[y + 1, x])) ** 2) / (2 * sigma ** 2))
                graph.add_edge(node_id, neighbor_id, weight, weight)
    
    # Run maxflow and get the segmentation result
    graph.maxflow()
    segmentation = graph.get_grid_segments(nodeids).astype(np.uint8)
    return segmentation

# Example usage of Graph Cut segmentation
if __name__ == '__main__':
    image_path = "bt1.jpg"  
    segmented_image = graph_cut_segmentation(image_path)
    original_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(original_image, cmap='gray')
    plt.title('Original Image')
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.imshow(segmented_image, cmap='gray')
    plt.title('Segmented Image (Graph Cut)')
    plt.axis('off')
    
    plt.show()

Performance Comparison

The following code snippet measures and prints the execution times for both the Watershed and Graph Cut segmentation methods:

import time

def main_with_timing(image_path: str):
    # Watershed Algorithm Timing
    start_time = time.time()
    main_watershed(image_path)
    watershed_time = time.time() - start_time

    # Graph Cut Algorithm Timing
    start_time = time.time()
    segmented_image = graph_cut_segmentation(image_path)
    graph_cut_time = time.time() - start_time

    print(f"Watershed algorithm execution time: {watershed_time:.4f} seconds")
    print(f"Graph Cut algorithm execution time: {graph_cut_time:.4f} seconds")

if __name__ == '__main__':
    image_file = "/content/bt1.jpg"  
    main_with_timing(image_file)

Conclusion

In this tutorial, you learned two advanced image segmentation techniques: the Watershed algorithm for separating overlapping objects and the Graph Cut algorithm for foreground-background segmentation. Both methods have their own strengths and are useful for different types of images. Experiment with the parameters and try these methods on various images to see which best suits your application!

Search This Blog

Think Tech Better