Texture Classification with GLCM and LBP Features in Python

Texture Classification with GLCM & LBP Features

In this tutorial, we demonstrate how to perform texture classification using a combination of GLCM (Gray-Level Co-occurrence Matrix) and LBP (Local Binary Pattern) features. We’ll download a dataset, extract features from images, and then train a Random Forest classifier to predict texture categories.

This tutorial is based on the experiment documented in :contentReference[oaicite:0]{index=0}.

Step 1: Download and Extract the Dataset

We use Python's requests and tarfile libraries to download and extract the Describable Textures Dataset (DTD). The dataset is stored as a compressed tar.gz file.

import requests
import tarfile
import os

# Download the dataset if not already downloaded
url = "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz"
filename = "dtd-r1.0.1.tar.gz"

if not os.path.exists(filename):
    print("Downloading dataset...")
    response = requests.get(url, stream=True)
    with open(filename, "wb") as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)
    print("Download complete.")
else:
    print("Dataset already downloaded.")

# Extract the dataset if not already extracted
if not os.path.exists("dtd"):
    print("Extracting dataset...")
    with tarfile.open(filename, "r:gz") as tar:
        tar.extractall()
    print("Extraction complete.")
else:
    print("Dataset already extracted.")

Step 2: Import Required Libraries and Define Helper Functions

Next, we import libraries for image processing, feature extraction, machine learning, and visualization. We also define functions to load images, extract GLCM and LBP features, and display sample images from the dataset.

import os
import cv2
import numpy as np
import glob
import joblib
import random
import matplotlib.pyplot as plt
import seaborn as sns
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from collections import Counter

# Dataset path and selected categories
DATASET_PATH = "/content/dtd/images"
CATEGORIES = ["banded", "blotchy", "braided", "bubbly", "bumpy"]

def load_image(image_path, size=(256, 256)):
    """
    Reads an image, converts it to grayscale, resizes it, and normalizes it.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error: Could not read image {image_path}")
        return None
    img = cv2.resize(img, size)
    img = img / 255.0
    return img

def display_sample_images():
    """
    Displays random sample images from the dataset with their category labels.
    """
    fig, axes = plt.subplots(2, 5, figsize=(12, 6))
    axes = axes.ravel()
    sample_images = []
    for category in CATEGORIES:
        category_path = os.path.join(DATASET_PATH, category)
        images = glob.glob(os.path.join(category_path, "*.jpg"))
        if images:
            sample_images.extend(random.sample(images, min(2, len(images))))
    for i, image_path in enumerate(sample_images):
        img = load_image(image_path)
        if img is not None:
            axes[i].imshow(img, cmap="gray")
            axes[i].set_title(f"Category: {CATEGORIES[i // 2]}")
            axes[i].axis("off")
    plt.tight_layout()
    plt.show()

Step 3: Feature Extraction Using GLCM and LBP

We extract texture features using two methods:

GLCM Features: Contrast, Correlation, Energy, and Homogeneity.
LBP Features: Local binary pattern histogram.

def extract_glcm_features(image):
    """
    Computes GLCM features: Contrast, Correlation, Energy, Homogeneity.
    """
    image = (image * 255).astype(np.uint8)
    glcm = graycomatrix(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    features = [graycoprops(glcm, prop)[0, 0] for prop in ('contrast', 'correlation', 'energy', 'homogeneity')]
    return features

def extract_lbp_features(image, P=8, R=1):
    """
    Computes LBP histogram features.
    """
    image_int = (image * 255).astype(np.uint8)
    lbp = local_binary_pattern(image_int, P, R, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P+3), range=(0, P+2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist.tolist()

def extract_features(image_path):
    """
    Loads an image and extracts combined GLCM and LBP features.
    """
    img = load_image(image_path)
    if img is None:
        return None
    return extract_glcm_features(img) + extract_lbp_features(img)

Step 4: Load Dataset, Train Classifier, and Evaluate

We load the dataset, extract features from each image, and train a Random Forest classifier. Finally, we evaluate the model's performance and visualize the results.

def load_dataset():
    """
    Loads the dataset, extracts features, and returns the feature matrix and labels.
    """
    data, labels = [], []
    for label, category in enumerate(CATEGORIES):
        category_path = os.path.join(DATASET_PATH, category)
        for image_path in glob.glob(os.path.join(category_path, "*.jpg")):
            features = extract_features(image_path)
            if features is not None:
                data.append(features)
                labels.append(label)
    return np.array(data), np.array(labels)

def train_classifier(X_train, y_train):
    """
    Trains a Random Forest classifier.
    """
    classifier = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    classifier.fit(X_train, y_train)
    return classifier

def evaluate_model(y_test, y_pred):
    """
    Evaluates the model using accuracy, classification report, and confusion matrix.
    """
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="coolwarm", xticklabels=CATEGORIES, yticklabels=CATEGORIES)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.show()

def visualize_data_distribution(y):
    """
    Visualizes the class distribution in the dataset.
    """
    counter = Counter(y)
    plt.figure(figsize=(7, 5))
    sns.barplot(x=list(counter.keys()), y=list(counter.values()), palette="muted")
    plt.xticks(ticks=range(len(CATEGORIES)), labels=CATEGORIES)
    plt.xlabel("Categories")
    plt.ylabel("Count")
    plt.title("Dataset Class Distribution")
    plt.show()

# Main Execution
def main():
    print("Loading dataset...")
    X, y = load_dataset()
    display_sample_images()
    visualize_data_distribution(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    classifier = train_classifier(X_train, y_train)
    y_pred = classifier.predict(X_test)
    evaluate_model(y_test, y_pred)

if __name__ == '__main__':
    main()

Conclusion

In this tutorial, you learned how to download and extract a texture dataset, extract GLCM and LBP features from images, and train a Random Forest classifier for texture classification. You also saw how to evaluate your model's performance using accuracy metrics and a confusion matrix. Experiment with different feature extraction methods and classifier settings to further improve performance!

Search This Blog

Think Tech Better