Image segmentation Techniques in Python
This tutorial demonstrates two powerful image segmentation techniques using Python and OpenCV. In the first part, you'll learn how to segment an image using the Watershed algorithm – a process that includes grayscale conversion, Gaussian blurring, thresholding, and distance transformation. In the second part, the Graph Cut algorithm is applied to segment an image by separating the foreground and background using a cost function based on edge weights.
This tutorial is based on the experiment documented in :contentReference[oaicite:0]{index=0}.
Task 1: Watershed Algorithm Segmentation
The Watershed algorithm is used to separate overlapping objects in an image. In this example, we perform the following steps:
- Load the image using OpenCV
- Preprocess the image by converting it to grayscale, applying Gaussian blur, and thresholding
- Compute the distance transform and segment the image using markers
- Extract contours and overlay them on the original image
Step 1: Import Required Libraries
import cv2
import numpy as np
import logging
from matplotlib import pyplot as plt
from IPython.display import Image, display
Step 2: Define Helper Functions
The following functions handle image display, loading, preprocessing, segmentation, and contour extraction:
def imshow(img, ax=None):
# Display an image using IPython or matplotlib axes
if ax is None:
ret, encoded = cv2.imencode(".jpg", img)
if ret:
display(Image(encoded))
else:
logging.error("Failed to encode image for display.")
else:
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
ax.axis('off')
def load_image(image_path: str) -> np.ndarray:
img = cv2.imread(image_path)
if img is None:
logging.error(f"Failed to load image from path: {image_path}")
else:
logging.info(f"Image loaded successfully from {image_path}")
return img
def preprocess_image(img: np.ndarray) -> dict:
# Convert image to grayscale and apply Gaussian blur
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
# Apply thresholding using Otsu's method with binary inversion
ret, bin_img = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
return {'gray': gray, 'blur': blur, 'binary': bin_img}
def segment_image(bin_img: np.ndarray) -> tuple:
kernel = np.ones((3, 3), np.uint8)
sure_bg = cv2.dilate(bin_img, kernel, iterations=3)
dist = cv2.distanceTransform(bin_img, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist, 0.5 * dist.max(), 255, cv2.THRESH_BINARY)
sure_fg = sure_fg.astype(np.uint8)
unknown = cv2.subtract(sure_bg, sure_fg)
ret, markers = cv2.connectedComponents(sure_fg)
markers += 1
markers[unknown == 255] = 0
return markers, unknown, sure_bg, dist, sure_fg
def apply_watershed(img: np.ndarray, markers: np.ndarray) -> np.ndarray:
markers = cv2.watershed(img, markers)
return markers
def extract_contours(img: np.ndarray, markers: np.ndarray) -> list:
coins = []
labels = np.unique(markers)
for label in labels[2:]:
target = np.where(markers == label, 255, 0).astype(np.uint8)
contours, hierarchy = cv2.findContours(target, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
coins.append(contours[0])
return coins
def draw_contours(img: np.ndarray, contours: list) -> np.ndarray:
img_contours = cv2.drawContours(img.copy(), contours, -1, color=(0, 23, 223), thickness=2)
return img_contours
Step 3: Main Watershed Segmentation Function
def main_watershed(image_path: str):
# Load image and preprocess it
img = load_image(image_path)
if img is None:
logging.error("Exiting: image could not be loaded.")
return
processed = preprocess_image(img)
# Segment image to obtain markers for watershed
markers, unknown, sure_bg, dist, sure_fg = segment_image(processed['binary'])
# Visualize segmentation steps (background, distance transform, foreground, unknown)
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
axes[0, 0].imshow(sure_bg, cmap='gray'); axes[0, 0].set_title('Sure Background'); axes[0, 0].axis('off')
axes[0, 1].imshow(dist, cmap='gray'); axes[0, 1].set_title('Distance Transform'); axes[0, 1].axis('off')
axes[1, 0].imshow(sure_fg, cmap='gray'); axes[1, 0].set_title('Sure Foreground'); axes[1, 0].axis('off')
axes[1, 1].imshow(unknown, cmap='gray'); axes[1, 1].set_title('Unknown Region'); axes[1, 1].axis('off')
plt.tight_layout(); plt.show()
# Apply watershed and extract contours
markers = apply_watershed(img, markers)
contours = extract_contours(img, markers)
coins_img = draw_contours(img, contours)
# Display final results
plt.figure(figsize=(6, 6))
plt.imshow(markers, cmap="tab20b")
plt.title("Markers after Watershed")
plt.axis('off'); plt.show()
plt.figure(figsize=(6, 6))
plt.imshow(cv2.cvtColor(coins_img, cv2.COLOR_BGR2RGB))
plt.title("Final Image with Contours")
plt.axis('off'); plt.show()
if __name__ == "__main__":
image_file = "bt1.jpg"
main_watershed(image_file)
Task 2: Graph Cut Segmentation
The Graph Cut algorithm segments an image into foreground and background by minimizing a cost function that sums the weights of the edges between segmented regions. In this example, the process involves:
- Preprocessing the image (grayscale conversion, Gaussian blur, and thresholding)
- Building a graph where each pixel is a node
- Defining data and smoothness terms as edge weights
- Running the maxflow algorithm to obtain a binary segmentation mask
Step 1: Import Additional Library
import maxflow
Step 2: Define the Graph Cut Segmentation Function
def graph_cut_segmentation(image_path, mu=100, sigma=20):
# Load image in grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if image is None:
raise ValueError("Error loading image. Check the file path.")
image = image.astype(np.uint8)
blur = cv2.GaussianBlur(image, (3, 3), 0)
ret, image = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
image = image.astype(np.float32)
# Create a graph and add grid nodes
graph = maxflow.GraphFloat()
nodeids = graph.add_grid_nodes(image.shape)
# Data term: Compute foreground and background likelihoods
intensity_mean_fg = np.mean(image) + mu
intensity_mean_bg = np.mean(image) - mu
fg_likelihood = np.exp(-((image - intensity_mean_fg) ** 2) / (2 * sigma ** 2))
bg_likelihood = np.exp(-((image - intensity_mean_bg) ** 2) / (2 * sigma ** 2))
fg_likelihood = np.clip(fg_likelihood, 1e-10, 1)
bg_likelihood = np.clip(bg_likelihood, 1e-10, 1)
# Set t-edges for each pixel
for y in range(image.shape[0]):
for x in range(image.shape[1]):
node_id = nodeids[y, x]
graph.add_tedge(node_id, -np.log(bg_likelihood[y, x]), -np.log(fg_likelihood[y, x]))
# Smoothness term: Link neighboring pixels
for y in range(image.shape[0]):
for x in range(image.shape[1]):
node_id = nodeids[y, x]
if x < image.shape[1] - 1:
neighbor_id = nodeids[y, x + 1]
weight = 10 * np.exp(-((float(image[y, x]) - float(image[y, x + 1])) ** 2) / (2 * sigma ** 2))
graph.add_edge(node_id, neighbor_id, weight, weight)
if y < image.shape[0] - 1:
neighbor_id = nodeids[y + 1, x]
weight = 10 * np.exp(-((float(image[y, x]) - float(image[y + 1, x])) ** 2) / (2 * sigma ** 2))
graph.add_edge(node_id, neighbor_id, weight, weight)
# Run maxflow and get the segmentation result
graph.maxflow()
segmentation = graph.get_grid_segments(nodeids).astype(np.uint8)
return segmentation
# Example usage of Graph Cut segmentation
if __name__ == '__main__':
image_path = "bt1.jpg"
segmented_image = graph_cut_segmentation(image_path)
original_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(original_image, cmap='gray')
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(segmented_image, cmap='gray')
plt.title('Segmented Image (Graph Cut)')
plt.axis('off')
plt.show()
Performance Comparison
The following code snippet measures and prints the execution times for both the Watershed and Graph Cut segmentation methods:
import time
def main_with_timing(image_path: str):
# Watershed Algorithm Timing
start_time = time.time()
main_watershed(image_path)
watershed_time = time.time() - start_time
# Graph Cut Algorithm Timing
start_time = time.time()
segmented_image = graph_cut_segmentation(image_path)
graph_cut_time = time.time() - start_time
print(f"Watershed algorithm execution time: {watershed_time:.4f} seconds")
print(f"Graph Cut algorithm execution time: {graph_cut_time:.4f} seconds")
if __name__ == '__main__':
image_file = "/content/bt1.jpg"
main_with_timing(image_file)
Conclusion
In this tutorial, you learned two advanced image segmentation techniques: the Watershed algorithm for separating overlapping objects and the Graph Cut algorithm for foreground-background segmentation. Both methods have their own strengths and are useful for different types of images. Experiment with the parameters and try these methods on various images to see which best suits your application!






Comments
Post a Comment