trying example from keras wiki

2025-05-10 18:09:59 +03:00 · 2025-05-10 18:09:59 +03:00 · d231ec9e57
parent 1063b60a98
commit d231ec9e57
2 changed files with 572 additions and 143 deletions
--- a/main.py
+++ b/main.py
@ -1,158 +1,429 @@
-from os import environ, makedirs, path, walk, listdir
-from shutil import move
-from dotenv import load_dotenv
-from base64 import b64decode
-import re
-import requests
-import cv2
-import keras
+"""
+Title: OCR model for reading Captchas
+Author: [A_K_Nain](https://twitter.com/A_K_Nain)
+Date created: 2020/06/14
+Last modified: 2024/03/13
+Description: How to implement an OCR model using CNNs, RNNs and CTC loss.
+Accelerator: GPU
+Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
+"""
+
+"""
+## Introduction
+
+This example demonstrates a simple OCR model built with the Functional API. Apart from
+combining CNN and RNN, it also illustrates how you can instantiate a new layer
+and use it as an "Endpoint layer" for implementing CTC loss. For a detailed
+guide to layer subclassing, please check out
+[this page](https://keras.io/guides/making_new_layers_and_models_via_subclassing/)
+in the developer guides.
+"""
+
+"""
+## Setup
+"""
+
+import os
+
+os.environ["KERAS_BACKEND"] = "tensorflow"
+
 import numpy as np
-from keras.callbacks import EarlyStopping, ModelCheckpoint
+import matplotlib.pyplot as plt

-load_dotenv()
+from pathlib import Path

-# Constants
-IMAGE_HEIGHT = 70
-IMAGE_WIDTH = 200
-DOWNLOAD_PATH = environ.get("DOWNLOAD_PATH")
-TESTING_PATH = environ.get("TESTING_PATH")
-TRAINING_PATH = environ.get("TRAINING_PATH")
-PERCENT_OF_TESTING = int(environ.get("PERCENT_OF_TESTING"))
+import tensorflow as tf
+import keras
+from keras import ops
+from keras import layers

-def prepare_dirs():
-    """Create necessary directories for downloading and storing images."""
-    makedirs(DOWNLOAD_PATH, exist_ok=True)
-    makedirs(TESTING_PATH, exist_ok=True)
-    makedirs(TRAINING_PATH, exist_ok=True)
-
-def fetch_captcha(id):
-    """Fetch a captcha image by its ID and save it to the download path."""
-    try:
-        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/{id}")
-        response.raise_for_status()
-        captcha = response.json()["captcha"]
-        captcha_file_path = path.join(DOWNLOAD_PATH, f"{captcha['hash']}_{captcha['solution']}.jpeg")
-        with open(captcha_file_path, 'wb') as captcha_file:
-            captcha_file.write(b64decode(captcha['image']))
-    except requests.RequestException as e:
-        print(f"Error fetching captcha {id}: {e}")
-
-def search_saved_captcha(hash, path):
-    """Check if a captcha with the given hash exists in the specified path."""
-    regex = re.compile(f"{hash}_\\w{{6}}\\.jpeg")
-    for _, _, files in walk(path):
-        for file in files:
-            if regex.match(file):
-                return True
-    return False
-
-def search_and_download_new(captchas):
-    """Search for new captchas and download them if they don't already exist."""
-    for captcha in captchas:
-        id = captcha["id"]
-        hash = captcha["hash"]
-        if not (search_saved_captcha(hash, TRAINING_PATH) or 
-                search_saved_captcha(hash, TESTING_PATH) or 
-                search_saved_captcha(hash, DOWNLOAD_PATH)):
-            fetch_captcha(id)
-
-def sort_datasets():
-    """Sort downloaded captchas into training and testing datasets."""
-    amount_of_new_data = len([file for file in listdir(DOWNLOAD_PATH) if path.isfile(path.join(DOWNLOAD_PATH, file))])
-    amount_to_send_to_test = round(amount_of_new_data * (PERCENT_OF_TESTING / 100))
-    
-    files = listdir(DOWNLOAD_PATH)
-    for index, file in enumerate(files):
-        if index < amount_to_send_to_test:
-            move(path.join(DOWNLOAD_PATH, file), TESTING_PATH)
-        else:
-            move(path.join(DOWNLOAD_PATH, file), TRAINING_PATH)
-
-def download_dataset():
-    """Download the dataset of captchas and sort them into training and testing sets."""
-    prepare_dirs()
-    try:
-        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/all")
-        response.raise_for_status()
-        captchas = response.json()["captchas"]
-        search_and_download_new(captchas)
-        sort_datasets()
-    except requests.RequestException as e:
-        print(f"Error downloading dataset: {e}")
-
-def load_dataset(dataset_path):
-    """Load images and their corresponding solutions from the specified dataset path."""
-    images = []
-    solutions = []
-    for filename in listdir(dataset_path):
-        img = cv2.imread(path.join(dataset_path, filename))
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        img = img / 255.0
-        images.append(img)
-        solution = path.splitext(filename)[0].split('_')[1]
-        solutions.append(solution)
-    
-    unique_solutions = sorted(set(solutions))
-    solution_to_label = {solution: i for i, solution in enumerate(unique_solutions)}
-    labels = [solution_to_label[solution] for solution in solutions]
-
-    return np.array(images), np.array(labels), unique_solutions
-
-def load_training_dataset():
-    """Load the training dataset."""
-    return load_dataset(TRAINING_PATH)
-
-def load_testing_dataset():
-    """Load the testing dataset."""
-    return load_dataset(TESTING_PATH)
+"""
+## Load the data: [Captcha Images](https://www.kaggle.com/fournierp/captcha-version-2-images)
+Let's download the data.
+"""


-def train_nn():
-    """Train the neural network on the training dataset."""
-    training_images, training_labels, unique_solutions = load_training_dataset()
-    testing_images, testing_labels = (None, None)
-    
-    if PERCENT_OF_TESTING > 0:
-        testing_images, testing_labels, _ = load_testing_dataset()
+"""shell
+curl -LO https://github.com/AakashKumarNain/CaptchaCracker/raw/master/captcha_images_v2.zip
+unzip -qq captcha_images_v2.zip
+"""

-    model = keras.Sequential([
-        keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 1)),
-        keras.layers.MaxPooling2D((2, 2)),
-        keras.layers.Conv2D(256, (3, 3), activation='relu'),
-        keras.layers.MaxPooling2D((2, 2)),
-        keras.layers.Conv2D(256, (3, 3), activation='relu'),
-        keras.layers.Flatten(),
-        keras.layers.Dense(128, activation='relu'),
-        keras.layers.Dropout(0.5),  # Dropout for regularization
-        keras.layers.Dense(len(unique_solutions), activation='softmax')  # Output layer
-    ])

-    model.summary()
-    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+"""
+The dataset contains 1040 captcha files as `jpeg` images. The label for each sample is a string,
+the name of the file (minus the file extension).
+We will map each character in the string to an integer for training the model. Similary,
+we will need to map the predictions of the model back to strings. For this purpose
+we will maintain two dictionaries, mapping characters to integers, and integers to characters,
+respectively.
+"""

-    callbacks = [
-        EarlyStopping(monitor='accuracy', patience=3),
-        ModelCheckpoint('best_model.keras', save_best_only=True)
-    ]

-    EPOCHS = 100
-    BATCH_SIZE = 8
+# Path to the data directory
+data_dir = Path("./datasets/training")

-    if PERCENT_OF_TESTING > 0:
-        model.fit(np.array(training_images), np.array(training_labels), 
-                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks,
-                  validation_data=(np.array(testing_images), np.array(testing_labels)),
-                  )
+# Get list of all the images
+images = sorted(list(map(str, list(data_dir.glob("*.jpeg")))))
+labels = [img.split(os.path.sep)[-1].split(".jpeg")[0].split("_")[1].upper() for img in images]
+characters = set(char for label in labels for char in label)
+characters = sorted(list(characters))
+
+print("Number of images found: ", len(images))
+print("Number of labels found: ", len(labels))
+print("Number of unique characters: ", len(characters))
+print("Characters present: ", characters)
+
+# Batch size for training and validation
+batch_size = 16
+
+# Desired image dimensions
+img_width = 200
+img_height = 70
+
+# Factor by which the image is going to be downsampled
+# by the convolutional blocks. We will be using two
+# convolution blocks and each block will have
+# a pooling layer which downsample the features by a factor of 2.
+# Hence total downsampling factor would be 4.
+downsample_factor = 4
+
+# Maximum length of any captcha in the dataset
+# print([len(label) for label in labels])
+max_length = max([len(label) for label in labels])
+
+
+"""
+## Preprocessing
+"""
+
+
+# Mapping characters to integers
+char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
+
+# Mapping integers back to original characters
+num_to_char = layers.StringLookup(
+    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
+)
+
+
+def split_data(images, labels, train_size=0.9, shuffle=True):
+    # 1. Get the total size of the dataset
+    size = len(images)
+    # 2. Make an indices array and shuffle it, if required
+    indices = ops.arange(size)
+    if shuffle:
+        indices = keras.random.shuffle(indices)
+    # 3. Get the size of training samples
+    train_samples = int(size * train_size)
+    # 4. Split data into training and validation sets
+    x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
+    x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
+    return x_train, x_valid, y_train, y_valid
+
+
+# Splitting data into training and validation sets
+x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels))
+
+
+def encode_single_sample(img_path, label):
+    # 1. Read image
+    img = tf.io.read_file(img_path)
+    # 2. Decode and convert to grayscale
+    img = tf.io.decode_jpeg(img, channels=1)
+    # 3. Convert to float32 in [0, 1] range
+    img = tf.image.convert_image_dtype(img, tf.float32)
+    # 4. Resize to the desired size
+    img = ops.image.resize(img, [img_height, img_width])
+    # 5. Transpose the image because we want the time
+    # dimension to correspond to the width of the image.
+    img = ops.transpose(img, axes=[1, 0, 2])
+    # 6. Map the characters in label to numbers
+    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
+    # 7. Return a dict as our model is expecting two inputs
+    return {"image": img, "label": label}
+
+
+"""
+## Create `Dataset` objects
+"""
+
+
+train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
+train_dataset = (
+    train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
+    .batch(batch_size)
+    .prefetch(buffer_size=tf.data.AUTOTUNE)
+)
+
+validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
+validation_dataset = (
+    validation_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
+    .batch(batch_size)
+    .prefetch(buffer_size=tf.data.AUTOTUNE)
+)
+
+"""
+## Visualize the data
+"""
+
+
+_, ax = plt.subplots(4, 4, figsize=(10, 5))
+for batch in train_dataset.take(1):
+    images = batch["image"]
+    labels = batch["label"]
+    for i in range(16):
+        img = (images[i] * 255).numpy().astype("uint8")
+        label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
+        ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap="gray")
+        ax[i // 4, i % 4].set_title(label)
+        ax[i // 4, i % 4].axis("off")
+plt.show()
+
+"""
+## Model
+"""
+
+
+def ctc_batch_cost(y_true, y_pred, input_length, label_length):
+    label_length = ops.cast(ops.squeeze(label_length, axis=-1), dtype="int32")
+    input_length = ops.cast(ops.squeeze(input_length, axis=-1), dtype="int32")
+    sparse_labels = ops.cast(
+        ctc_label_dense_to_sparse(y_true, label_length), dtype="int32"
+    )
+
+    y_pred = ops.log(ops.transpose(y_pred, axes=[1, 0, 2]) + keras.backend.epsilon())
+
+    return ops.expand_dims(
+        tf.compat.v1.nn.ctc_loss(
+            inputs=y_pred, labels=sparse_labels, sequence_length=input_length
+        ),
+        1,
+    )
+
+
+def ctc_label_dense_to_sparse(labels, label_lengths):
+    label_shape = ops.shape(labels)
+    num_batches_tns = ops.stack([label_shape[0]])
+    max_num_labels_tns = ops.stack([label_shape[1]])
+
+    def range_less_than(old_input, current_input):
+        return ops.expand_dims(ops.arange(ops.shape(old_input)[1]), 0) < tf.fill(
+            max_num_labels_tns, current_input
+        )
+
+    init = ops.cast(tf.fill([1, label_shape[1]], 0), dtype="bool")
+    dense_mask = tf.compat.v1.scan(
+        range_less_than, label_lengths, initializer=init, parallel_iterations=1
+    )
+    dense_mask = dense_mask[:, 0, :]
+
+    label_array = ops.reshape(
+        ops.tile(ops.arange(0, label_shape[1]), num_batches_tns), label_shape
+    )
+    label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)
+
+    batch_array = ops.transpose(
+        ops.reshape(
+            ops.tile(ops.arange(0, label_shape[0]), max_num_labels_tns),
+            tf.reverse(label_shape, [0]),
+        )
+    )
+    batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
+    indices = ops.transpose(
+        ops.reshape(ops.concatenate([batch_ind, label_ind], axis=0), [2, -1])
+    )
+
+    vals_sparse = tf.compat.v1.gather_nd(labels, indices)
+
+    return tf.SparseTensor(
+        ops.cast(indices, dtype="int64"),
+        vals_sparse,
+        ops.cast(label_shape, dtype="int64"),
+    )
+
+
+class CTCLayer(layers.Layer):
+    def __init__(self, name=None):
+        super().__init__(name=name)
+        self.loss_fn = ctc_batch_cost
+
+    def call(self, y_true, y_pred):
+        # Compute the training-time loss value and add it
+        # to the layer using `self.add_loss()`.
+        batch_len = ops.cast(ops.shape(y_true)[0], dtype="int64")
+        input_length = ops.cast(ops.shape(y_pred)[1], dtype="int64")
+        label_length = ops.cast(ops.shape(y_true)[1], dtype="int64")
+
+        input_length = input_length * ops.ones(shape=(batch_len, 1), dtype="int64")
+        label_length = label_length * ops.ones(shape=(batch_len, 1), dtype="int64")
+
+        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
+        self.add_loss(loss)
+
+        # At test time, just return the computed predictions
+        return y_pred
+
+
+def build_model():
+    # Inputs to the model
+    input_img = layers.Input(
+        shape=(img_width, img_height, 1), name="image", dtype="float32"
+    )
+    labels = layers.Input(name="label", shape=(None,), dtype="float32")
+
+    # First conv block
+    x = layers.Conv2D(
+        32,
+        (3, 3),
+        activation="relu",
+        kernel_initializer="he_normal",
+        padding="same",
+        name="Conv1",
+    )(input_img)
+    x = layers.MaxPooling2D((2, 2), name="pool1")(x)
+
+    # Second conv block
+    x = layers.Conv2D(
+        64,
+        (3, 3),
+        activation="relu",
+        kernel_initializer="he_normal",
+        padding="same",
+        name="Conv2",
+    )(x)
+    x = layers.MaxPooling2D((2, 2), name="pool2")(x)
+
+    # We have used two max pool with pool size and strides 2.
+    # Hence, downsampled feature maps are 4x smaller. The number of
+    # filters in the last layer is 64. Reshape accordingly before
+    # passing the output to the RNN part of the model
+    new_shape = ((img_width // 4), (img_height // 4) * 64)
+    x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
+    x = layers.Dense(64, activation="relu", name="dense1")(x)
+    x = layers.Dropout(0.2)(x)
+
+    # RNNs
+    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
+    x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
+
+    # Output layer
+    x = layers.Dense(
+        len(char_to_num.get_vocabulary()) + 1, activation="softmax", name="dense2"
+    )(x)
+
+    # Add CTC layer for calculating CTC loss at each step
+    output = CTCLayer(name="ctc_loss")(labels, x)
+
+    # Define the model
+    model = keras.models.Model(
+        inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
+    )
+    # Optimizer
+    opt = keras.optimizers.Adam()
+    # Compile the model and return
+    model.compile(optimizer=opt)
+    return model
+
+
+# Get the model
+model = build_model()
+model.summary()
+
+"""
+## Training
+"""
+
+
+# TODO restore epoch count.
+epochs = 100
+early_stopping_patience = 10
+# Add early stopping
+early_stopping = keras.callbacks.EarlyStopping(
+    monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
+)
+
+# Train the model
+history = model.fit(
+    train_dataset,
+    validation_data=validation_dataset,
+    epochs=epochs,
+    callbacks=[early_stopping],
+)
+
+
+"""
+## Inference
+
+You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/ocr-for-captcha)
+and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/ocr-for-captcha).
+"""
+
+
+def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
+    input_shape = ops.shape(y_pred)
+    num_samples, num_steps = input_shape[0], input_shape[1]
+    y_pred = ops.log(ops.transpose(y_pred, axes=[1, 0, 2]) + keras.backend.epsilon())
+    input_length = ops.cast(input_length, dtype="int32")
+
+    if greedy:
+        (decoded, log_prob) = tf.nn.ctc_greedy_decoder(
+            inputs=y_pred, sequence_length=input_length
+        )
    else:
-        model.fit(np.array(training_images), np.array(training_labels), 
-                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks
-                  )
-
-    keras.saving.save_model(model, 'captcha_solver.keras')
+        (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder(
+            inputs=y_pred,
+            sequence_length=input_length,
+            beam_width=beam_width,
+            top_paths=top_paths,
+        )
+    decoded_dense = []
+    for st in decoded:
+        st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps))
+        decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1))
+    return (decoded_dense, log_prob)


-if __name__ == "__main__":
-    download_dataset()
-    train_nn()
-    
+# Get the prediction model by extracting layers till the output layer
+prediction_model = keras.models.Model(
+    model.input[0], model.get_layer(name="dense2").output
+)
+prediction_model.summary()

+
+# A utility function to decode the output of the network
+def decode_batch_predictions(pred):
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    # Use greedy search. For complex tasks, you can use beam search
+    results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
+        :, :max_length
+    ]
+    # Iterate over the results and get back the text
+    output_text = []
+    for res in results:
+        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
+        output_text.append(res)
+    return output_text
+
+
+#  Let's check results on some validation samples
+for batch in validation_dataset.take(1):
+    batch_images = batch["image"]
+    batch_labels = batch["label"]
+
+    preds = prediction_model.predict(batch_images)
+    pred_texts = decode_batch_predictions(preds)
+
+    orig_texts = []
+    for label in batch_labels:
+        label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
+        orig_texts.append(label)
+
+    _, ax = plt.subplots(4, 4, figsize=(15, 5))
+    for i in range(len(pred_texts)):
+        img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)
+        img = img.T
+        title = f"Prediction: {pred_texts[i]}"
+        ax[i // 4, i % 4].imshow(img, cmap="gray")
+        ax[i // 4, i % 4].set_title(title)
+        ax[i // 4, i % 4].axis("off")
+plt.show()
--- a/main.py.old
+++ b/main.py.old
@ -0,0 +1,158 @@
+from os import environ, makedirs, path, walk, listdir
+from shutil import move
+from dotenv import load_dotenv
+from base64 import b64decode
+import re
+import requests
+import cv2
+import keras
+import numpy as np
+from keras.callbacks import EarlyStopping, ModelCheckpoint
+
+load_dotenv()
+
+# Constants
+IMAGE_HEIGHT = 70
+IMAGE_WIDTH = 200
+DOWNLOAD_PATH = environ.get("DOWNLOAD_PATH")
+TESTING_PATH = environ.get("TESTING_PATH")
+TRAINING_PATH = environ.get("TRAINING_PATH")
+PERCENT_OF_TESTING = int(environ.get("PERCENT_OF_TESTING"))
+
+def prepare_dirs():
+    """Create necessary directories for downloading and storing images."""
+    makedirs(DOWNLOAD_PATH, exist_ok=True)
+    makedirs(TESTING_PATH, exist_ok=True)
+    makedirs(TRAINING_PATH, exist_ok=True)
+
+def fetch_captcha(id):
+    """Fetch a captcha image by its ID and save it to the download path."""
+    try:
+        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/{id}")
+        response.raise_for_status()
+        captcha = response.json()["captcha"]
+        captcha_file_path = path.join(DOWNLOAD_PATH, f"{captcha['hash']}_{captcha['solution']}.jpeg")
+        with open(captcha_file_path, 'wb') as captcha_file:
+            captcha_file.write(b64decode(captcha['image']))
+    except requests.RequestException as e:
+        print(f"Error fetching captcha {id}: {e}")
+
+def search_saved_captcha(hash, path):
+    """Check if a captcha with the given hash exists in the specified path."""
+    regex = re.compile(f"{hash}_\\w{{6}}\\.jpeg")
+    for _, _, files in walk(path):
+        for file in files:
+            if regex.match(file):
+                return True
+    return False
+
+def search_and_download_new(captchas):
+    """Search for new captchas and download them if they don't already exist."""
+    for captcha in captchas:
+        id = captcha["id"]
+        hash = captcha["hash"]
+        if not (search_saved_captcha(hash, TRAINING_PATH) or 
+                search_saved_captcha(hash, TESTING_PATH) or 
+                search_saved_captcha(hash, DOWNLOAD_PATH)):
+            fetch_captcha(id)
+
+def sort_datasets():
+    """Sort downloaded captchas into training and testing datasets."""
+    amount_of_new_data = len([file for file in listdir(DOWNLOAD_PATH) if path.isfile(path.join(DOWNLOAD_PATH, file))])
+    amount_to_send_to_test = round(amount_of_new_data * (PERCENT_OF_TESTING / 100))
+    
+    files = listdir(DOWNLOAD_PATH)
+    for index, file in enumerate(files):
+        if index < amount_to_send_to_test:
+            move(path.join(DOWNLOAD_PATH, file), TESTING_PATH)
+        else:
+            move(path.join(DOWNLOAD_PATH, file), TRAINING_PATH)
+
+def download_dataset():
+    """Download the dataset of captchas and sort them into training and testing sets."""
+    prepare_dirs()
+    try:
+        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/all")
+        response.raise_for_status()
+        captchas = response.json()["captchas"]
+        search_and_download_new(captchas)
+        sort_datasets()
+    except requests.RequestException as e:
+        print(f"Error downloading dataset: {e}")
+
+def load_dataset(dataset_path):
+    """Load images and their corresponding solutions from the specified dataset path."""
+    images = []
+    solutions = []
+    for filename in listdir(dataset_path):
+        img = cv2.imread(path.join(dataset_path, filename))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img = img / 255.0
+        images.append(img)
+        solution = path.splitext(filename)[0].split('_')[1]
+        solutions.append(solution)
+    
+    unique_solutions = sorted(set(solutions))
+    solution_to_label = {solution: i for i, solution in enumerate(unique_solutions)}
+    labels = [solution_to_label[solution] for solution in solutions]
+
+    return np.array(images), np.array(labels), unique_solutions
+
+def load_training_dataset():
+    """Load the training dataset."""
+    return load_dataset(TRAINING_PATH)
+
+def load_testing_dataset():
+    """Load the testing dataset."""
+    return load_dataset(TESTING_PATH)
+
+
+def train_nn():
+    """Train the neural network on the training dataset."""
+    training_images, training_labels, unique_solutions = load_training_dataset()
+    testing_images, testing_labels = (None, None)
+    
+    if PERCENT_OF_TESTING > 0:
+        testing_images, testing_labels, _ = load_testing_dataset()
+
+    model = keras.Sequential([
+        keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 1)),
+        keras.layers.MaxPooling2D((2, 2)),
+        keras.layers.Conv2D(256, (3, 3), activation='relu'),
+        keras.layers.MaxPooling2D((2, 2)),
+        keras.layers.Conv2D(256, (3, 3), activation='relu'),
+        keras.layers.Flatten(),
+        keras.layers.Dense(128, activation='relu'),
+        keras.layers.Dropout(0.5),  # Dropout for regularization
+        keras.layers.Dense(len(unique_solutions), activation='softmax')  # Output layer
+    ])
+
+    model.summary()
+    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+
+    callbacks = [
+        EarlyStopping(monitor='accuracy', patience=3),
+        ModelCheckpoint('best_model.keras', save_best_only=True)
+    ]
+
+    EPOCHS = 100
+    BATCH_SIZE = 8
+
+    if PERCENT_OF_TESTING > 0:
+        model.fit(np.array(training_images), np.array(training_labels), 
+                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks,
+                  validation_data=(np.array(testing_images), np.array(testing_labels)),
+                  )
+    else:
+        model.fit(np.array(training_images), np.array(training_labels), 
+                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks
+                  )
+
+    keras.saving.save_model(model, 'captcha_solver.keras')
+
+
+if __name__ == "__main__":
+    download_dataset()
+    train_nn()
+    
+