trying example from keras wiki

2025-05-10 18:09:59 +03:00 · 2025-05-10 18:09:59 +03:00 · d231ec9e57
parent 1063b60a98
commit d231ec9e57
2 changed files with 572 additions and 143 deletions
--- a/main.py
+++ b/main.py
@ -1,158 +1,429 @@
-from os import environ, makedirs, path, walk, listdir
+"""
-from shutil import move
+Title: OCR model for reading Captchas
-from dotenv import load_dotenv
+Author: [A_K_Nain](https://twitter.com/A_K_Nain)
-from base64 import b64decode
+Date created: 2020/06/14
-import re
+Last modified: 2024/03/13
-import requests
+Description: How to implement an OCR model using CNNs, RNNs and CTC loss.
-import cv2
+Accelerator: GPU
-import keras
+Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
 """
 """
 ## Introduction
 This example demonstrates a simple OCR model built with the Functional API. Apart from
 combining CNN and RNN, it also illustrates how you can instantiate a new layer
 and use it as an "Endpoint layer" for implementing CTC loss. For a detailed
 guide to layer subclassing, please check out
 [this page](https://keras.io/guides/making_new_layers_and_models_via_subclassing/)
 in the developer guides.
 """
 """
 ## Setup
 """
 import os
 os.environ["KERAS_BACKEND"] = "tensorflow"
 import numpy as np
-from keras.callbacks import EarlyStopping, ModelCheckpoint
+import matplotlib.pyplot as plt
-load_dotenv()
+from pathlib import Path
-# Constants
+import tensorflow as tf
-IMAGE_HEIGHT = 70
+import keras
-IMAGE_WIDTH = 200
+from keras import ops
-DOWNLOAD_PATH = environ.get("DOWNLOAD_PATH")
+from keras import layers
 TESTING_PATH = environ.get("TESTING_PATH")
 TRAINING_PATH = environ.get("TRAINING_PATH")
 PERCENT_OF_TESTING = int(environ.get("PERCENT_OF_TESTING"))
-def prepare_dirs():
+"""
-    """Create necessary directories for downloading and storing images."""
+## Load the data: [Captcha Images](https://www.kaggle.com/fournierp/captcha-version-2-images)
-    makedirs(DOWNLOAD_PATH, exist_ok=True)
+Let's download the data.
-    makedirs(TESTING_PATH, exist_ok=True)
+"""
    makedirs(TRAINING_PATH, exist_ok=True)
 def fetch_captcha(id):
    """Fetch a captcha image by its ID and save it to the download path."""
    try:
        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/{id}")
        response.raise_for_status()
        captcha = response.json()["captcha"]
        captcha_file_path = path.join(DOWNLOAD_PATH, f"{captcha['hash']}_{captcha['solution']}.jpeg")
        with open(captcha_file_path, 'wb') as captcha_file:
            captcha_file.write(b64decode(captcha['image']))
    except requests.RequestException as e:
        print(f"Error fetching captcha {id}: {e}")
-def search_saved_captcha(hash, path):
+"""shell
-    """Check if a captcha with the given hash exists in the specified path."""
+curl -LO https://github.com/AakashKumarNain/CaptchaCracker/raw/master/captcha_images_v2.zip
-    regex = re.compile(f"{hash}_\\w{{6}}\\.jpeg")
+unzip -qq captcha_images_v2.zip
-    for _, _, files in walk(path):
+"""
        for file in files:
            if regex.match(file):
                return True
    return False
 def search_and_download_new(captchas):
    """Search for new captchas and download them if they don't already exist."""
    for captcha in captchas:
        id = captcha["id"]
        hash = captcha["hash"]
        if not (search_saved_captcha(hash, TRAINING_PATH) or 
                search_saved_captcha(hash, TESTING_PATH) or 
                search_saved_captcha(hash, DOWNLOAD_PATH)):
            fetch_captcha(id)
-def sort_datasets():
+"""
-    """Sort downloaded captchas into training and testing datasets."""
+The dataset contains 1040 captcha files as `jpeg` images. The label for each sample is a string,
-    amount_of_new_data = len([file for file in listdir(DOWNLOAD_PATH) if path.isfile(path.join(DOWNLOAD_PATH, file))])
+the name of the file (minus the file extension).
-    amount_to_send_to_test = round(amount_of_new_data * (PERCENT_OF_TESTING / 100))
+We will map each character in the string to an integer for training the model. Similary,
 we will need to map the predictions of the model back to strings. For this purpose
 we will maintain two dictionaries, mapping characters to integers, and integers to characters,
 respectively.
 """
-    files = listdir(DOWNLOAD_PATH)
+
-    for index, file in enumerate(files):
+# Path to the data directory
-        if index < amount_to_send_to_test:
+data_dir = Path("./datasets/training")
-            move(path.join(DOWNLOAD_PATH, file), TESTING_PATH)
+
 # Get list of all the images
 images = sorted(list(map(str, list(data_dir.glob("*.jpeg")))))
 labels = [img.split(os.path.sep)[-1].split(".jpeg")[0].split("_")[1].upper() for img in images]
 characters = set(char for label in labels for char in label)
 characters = sorted(list(characters))
 print("Number of images found: ", len(images))
 print("Number of labels found: ", len(labels))
 print("Number of unique characters: ", len(characters))
 print("Characters present: ", characters)
 # Batch size for training and validation
 batch_size = 16
 # Desired image dimensions
 img_width = 200
 img_height = 70
 # Factor by which the image is going to be downsampled
 # by the convolutional blocks. We will be using two
 # convolution blocks and each block will have
 # a pooling layer which downsample the features by a factor of 2.
 # Hence total downsampling factor would be 4.
 downsample_factor = 4
 # Maximum length of any captcha in the dataset
 # print([len(label) for label in labels])
 max_length = max([len(label) for label in labels])
 """
 ## Preprocessing
 """
 # Mapping characters to integers
 char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
 # Mapping integers back to original characters
 num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
 )
 def split_data(images, labels, train_size=0.9, shuffle=True):
    # 1. Get the total size of the dataset
    size = len(images)
    # 2. Make an indices array and shuffle it, if required
    indices = ops.arange(size)
    if shuffle:
        indices = keras.random.shuffle(indices)
    # 3. Get the size of training samples
    train_samples = int(size * train_size)
    # 4. Split data into training and validation sets
    x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
    x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
    return x_train, x_valid, y_train, y_valid
 # Splitting data into training and validation sets
 x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels))
 def encode_single_sample(img_path, label):
    # 1. Read image
    img = tf.io.read_file(img_path)
    # 2. Decode and convert to grayscale
    img = tf.io.decode_jpeg(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = ops.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = ops.transpose(img, axes=[1, 0, 2])
    # 6. Map the characters in label to numbers
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    # 7. Return a dict as our model is expecting two inputs
    return {"image": img, "label": label}
 """
 ## Create `Dataset` objects
 """
 train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
 train_dataset = (
    train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
 )
 validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
 validation_dataset = (
    validation_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
 )
 """
 ## Visualize the data
 """
 _, ax = plt.subplots(4, 4, figsize=(10, 5))
 for batch in train_dataset.take(1):
    images = batch["image"]
    labels = batch["label"]
    for i in range(16):
        img = (images[i] * 255).numpy().astype("uint8")
        label = tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
        ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap="gray")
        ax[i // 4, i % 4].set_title(label)
        ax[i // 4, i % 4].axis("off")
 plt.show()
 """
 ## Model
 """
 def ctc_batch_cost(y_true, y_pred, input_length, label_length):
    label_length = ops.cast(ops.squeeze(label_length, axis=-1), dtype="int32")
    input_length = ops.cast(ops.squeeze(input_length, axis=-1), dtype="int32")
    sparse_labels = ops.cast(
        ctc_label_dense_to_sparse(y_true, label_length), dtype="int32"
    )
    y_pred = ops.log(ops.transpose(y_pred, axes=[1, 0, 2]) + keras.backend.epsilon())
    return ops.expand_dims(
        tf.compat.v1.nn.ctc_loss(
            inputs=y_pred, labels=sparse_labels, sequence_length=input_length
        ),
        1,
    )
 def ctc_label_dense_to_sparse(labels, label_lengths):
    label_shape = ops.shape(labels)
    num_batches_tns = ops.stack([label_shape[0]])
    max_num_labels_tns = ops.stack([label_shape[1]])
    def range_less_than(old_input, current_input):
        return ops.expand_dims(ops.arange(ops.shape(old_input)[1]), 0) < tf.fill(
            max_num_labels_tns, current_input
        )
    init = ops.cast(tf.fill([1, label_shape[1]], 0), dtype="bool")
    dense_mask = tf.compat.v1.scan(
        range_less_than, label_lengths, initializer=init, parallel_iterations=1
    )
    dense_mask = dense_mask[:, 0, :]
    label_array = ops.reshape(
        ops.tile(ops.arange(0, label_shape[1]), num_batches_tns), label_shape
    )
    label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)
    batch_array = ops.transpose(
        ops.reshape(
            ops.tile(ops.arange(0, label_shape[0]), max_num_labels_tns),
            tf.reverse(label_shape, [0]),
        )
    )
    batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
    indices = ops.transpose(
        ops.reshape(ops.concatenate([batch_ind, label_ind], axis=0), [2, -1])
    )
    vals_sparse = tf.compat.v1.gather_nd(labels, indices)
    return tf.SparseTensor(
        ops.cast(indices, dtype="int64"),
        vals_sparse,
        ops.cast(label_shape, dtype="int64"),
    )
 class CTCLayer(layers.Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = ctc_batch_cost
    def call(self, y_true, y_pred):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.
        batch_len = ops.cast(ops.shape(y_true)[0], dtype="int64")
        input_length = ops.cast(ops.shape(y_pred)[1], dtype="int64")
        label_length = ops.cast(ops.shape(y_true)[1], dtype="int64")
        input_length = input_length * ops.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * ops.ones(shape=(batch_len, 1), dtype="int64")
        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)
        # At test time, just return the computed predictions
        return y_pred
 def build_model():
    # Inputs to the model
    input_img = layers.Input(
        shape=(img_width, img_height, 1), name="image", dtype="float32"
    )
    labels = layers.Input(name="label", shape=(None,), dtype="float32")
    # First conv block
    x = layers.Conv2D(
        32,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv1",
    )(input_img)
    x = layers.MaxPooling2D((2, 2), name="pool1")(x)
    # Second conv block
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv2",
    )(x)
    x = layers.MaxPooling2D((2, 2), name="pool2")(x)
    # We have used two max pool with pool size and strides 2.
    # Hence, downsampled feature maps are 4x smaller. The number of
    # filters in the last layer is 64. Reshape accordingly before
    # passing the output to the RNN part of the model
    new_shape = ((img_width // 4), (img_height // 4) * 64)
    x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
    x = layers.Dense(64, activation="relu", name="dense1")(x)
    x = layers.Dropout(0.2)(x)
    # RNNs
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
    x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
    # Output layer
    x = layers.Dense(
        len(char_to_num.get_vocabulary()) + 1, activation="softmax", name="dense2"
    )(x)
    # Add CTC layer for calculating CTC loss at each step
    output = CTCLayer(name="ctc_loss")(labels, x)
    # Define the model
    model = keras.models.Model(
        inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
    )
    # Optimizer
    opt = keras.optimizers.Adam()
    # Compile the model and return
    model.compile(optimizer=opt)
    return model
 # Get the model
 model = build_model()
 model.summary()
 """
 ## Training
 """
 # TODO restore epoch count.
 epochs = 100
 early_stopping_patience = 10
 # Add early stopping
 early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
 )
 # Train the model
 history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    callbacks=[early_stopping],
 )
 """
 ## Inference
 You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/ocr-for-captcha)
 and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/ocr-for-captcha).
 """
 def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
    input_shape = ops.shape(y_pred)
    num_samples, num_steps = input_shape[0], input_shape[1]
    y_pred = ops.log(ops.transpose(y_pred, axes=[1, 0, 2]) + keras.backend.epsilon())
    input_length = ops.cast(input_length, dtype="int32")
    if greedy:
        (decoded, log_prob) = tf.nn.ctc_greedy_decoder(
            inputs=y_pred, sequence_length=input_length
        )
    else:
-            move(path.join(DOWNLOAD_PATH, file), TRAINING_PATH)
+        (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder(
-
+            inputs=y_pred,
-def download_dataset():
+            sequence_length=input_length,
-    """Download the dataset of captchas and sort them into training and testing sets."""
+            beam_width=beam_width,
-    prepare_dirs()
+            top_paths=top_paths,
-    try:
+        )
-        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/all")
+    decoded_dense = []
-        response.raise_for_status()
+    for st in decoded:
-        captchas = response.json()["captchas"]
+        st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps))
-        search_and_download_new(captchas)
+        decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1))
-        sort_datasets()
+    return (decoded_dense, log_prob)
    except requests.RequestException as e:
        print(f"Error downloading dataset: {e}")
 def load_dataset(dataset_path):
    """Load images and their corresponding solutions from the specified dataset path."""
    images = []
    solutions = []
    for filename in listdir(dataset_path):
        img = cv2.imread(path.join(dataset_path, filename))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img / 255.0
        images.append(img)
        solution = path.splitext(filename)[0].split('_')[1]
        solutions.append(solution)
    unique_solutions = sorted(set(solutions))
    solution_to_label = {solution: i for i, solution in enumerate(unique_solutions)}
    labels = [solution_to_label[solution] for solution in solutions]
    return np.array(images), np.array(labels), unique_solutions
 def load_training_dataset():
    """Load the training dataset."""
    return load_dataset(TRAINING_PATH)
 def load_testing_dataset():
    """Load the testing dataset."""
    return load_dataset(TESTING_PATH)
-def train_nn():
+# Get the prediction model by extracting layers till the output layer
-    """Train the neural network on the training dataset."""
+prediction_model = keras.models.Model(
-    training_images, training_labels, unique_solutions = load_training_dataset()
+    model.input[0], model.get_layer(name="dense2").output
-    testing_images, testing_labels = (None, None)
+)
 prediction_model.summary()
    if PERCENT_OF_TESTING > 0:
        testing_images, testing_labels, _ = load_testing_dataset()
-    model = keras.Sequential([
+# A utility function to decode the output of the network
-        keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 1)),
+def decode_batch_predictions(pred):
-        keras.layers.MaxPooling2D((2, 2)),
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
-        keras.layers.Conv2D(256, (3, 3), activation='relu'),
+    # Use greedy search. For complex tasks, you can use beam search
-        keras.layers.MaxPooling2D((2, 2)),
+    results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
-        keras.layers.Conv2D(256, (3, 3), activation='relu'),
+        :, :max_length
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.5),  # Dropout for regularization
        keras.layers.Dense(len(unique_solutions), activation='softmax')  # Output layer
    ])
    model.summary()
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    callbacks = [
        EarlyStopping(monitor='accuracy', patience=3),
        ModelCheckpoint('best_model.keras', save_best_only=True)
    ]
-
+    # Iterate over the results and get back the text
-    EPOCHS = 100
+    output_text = []
-    BATCH_SIZE = 8
+    for res in results:
-
+        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
-    if PERCENT_OF_TESTING > 0:
+        output_text.append(res)
-        model.fit(np.array(training_images), np.array(training_labels), 
+    return output_text
                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks,
                  validation_data=(np.array(testing_images), np.array(testing_labels)),
                  )
    else:
        model.fit(np.array(training_images), np.array(training_labels), 
                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks
                  )
    keras.saving.save_model(model, 'captcha_solver.keras')
-if __name__ == "__main__":
+#  Let's check results on some validation samples
-    download_dataset()
+for batch in validation_dataset.take(1):
-    train_nn()
+    batch_images = batch["image"]
    batch_labels = batch["label"]
    preds = prediction_model.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    orig_texts = []
    for label in batch_labels:
        label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)
    _, ax = plt.subplots(4, 4, figsize=(15, 5))
    for i in range(len(pred_texts)):
        img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)
        img = img.T
        title = f"Prediction: {pred_texts[i]}"
        ax[i // 4, i % 4].imshow(img, cmap="gray")
        ax[i // 4, i % 4].set_title(title)
        ax[i // 4, i % 4].axis("off")
 plt.show()
--- a/main.py.old
+++ b/main.py.old
@ -0,0 +1,158 @@
 from os import environ, makedirs, path, walk, listdir
 from shutil import move
 from dotenv import load_dotenv
 from base64 import b64decode
 import re
 import requests
 import cv2
 import keras
 import numpy as np
 from keras.callbacks import EarlyStopping, ModelCheckpoint
 load_dotenv()
 # Constants
 IMAGE_HEIGHT = 70
 IMAGE_WIDTH = 200
 DOWNLOAD_PATH = environ.get("DOWNLOAD_PATH")
 TESTING_PATH = environ.get("TESTING_PATH")
 TRAINING_PATH = environ.get("TRAINING_PATH")
 PERCENT_OF_TESTING = int(environ.get("PERCENT_OF_TESTING"))
 def prepare_dirs():
    """Create necessary directories for downloading and storing images."""
    makedirs(DOWNLOAD_PATH, exist_ok=True)
    makedirs(TESTING_PATH, exist_ok=True)
    makedirs(TRAINING_PATH, exist_ok=True)
 def fetch_captcha(id):
    """Fetch a captcha image by its ID and save it to the download path."""
    try:
        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/{id}")
        response.raise_for_status()
        captcha = response.json()["captcha"]
        captcha_file_path = path.join(DOWNLOAD_PATH, f"{captcha['hash']}_{captcha['solution']}.jpeg")
        with open(captcha_file_path, 'wb') as captcha_file:
            captcha_file.write(b64decode(captcha['image']))
    except requests.RequestException as e:
        print(f"Error fetching captcha {id}: {e}")
 def search_saved_captcha(hash, path):
    """Check if a captcha with the given hash exists in the specified path."""
    regex = re.compile(f"{hash}_\\w{{6}}\\.jpeg")
    for _, _, files in walk(path):
        for file in files:
            if regex.match(file):
                return True
    return False
 def search_and_download_new(captchas):
    """Search for new captchas and download them if they don't already exist."""
    for captcha in captchas:
        id = captcha["id"]
        hash = captcha["hash"]
        if not (search_saved_captcha(hash, TRAINING_PATH) or 
                search_saved_captcha(hash, TESTING_PATH) or 
                search_saved_captcha(hash, DOWNLOAD_PATH)):
            fetch_captcha(id)
 def sort_datasets():
    """Sort downloaded captchas into training and testing datasets."""
    amount_of_new_data = len([file for file in listdir(DOWNLOAD_PATH) if path.isfile(path.join(DOWNLOAD_PATH, file))])
    amount_to_send_to_test = round(amount_of_new_data * (PERCENT_OF_TESTING / 100))
    files = listdir(DOWNLOAD_PATH)
    for index, file in enumerate(files):
        if index < amount_to_send_to_test:
            move(path.join(DOWNLOAD_PATH, file), TESTING_PATH)
        else:
            move(path.join(DOWNLOAD_PATH, file), TRAINING_PATH)
 def download_dataset():
    """Download the dataset of captchas and sort them into training and testing sets."""
    prepare_dirs()
    try:
        response = requests.get(f"{environ.get('CAPTCHA_AGGREGATOR_API')}/captcha/all")
        response.raise_for_status()
        captchas = response.json()["captchas"]
        search_and_download_new(captchas)
        sort_datasets()
    except requests.RequestException as e:
        print(f"Error downloading dataset: {e}")
 def load_dataset(dataset_path):
    """Load images and their corresponding solutions from the specified dataset path."""
    images = []
    solutions = []
    for filename in listdir(dataset_path):
        img = cv2.imread(path.join(dataset_path, filename))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img / 255.0
        images.append(img)
        solution = path.splitext(filename)[0].split('_')[1]
        solutions.append(solution)
    unique_solutions = sorted(set(solutions))
    solution_to_label = {solution: i for i, solution in enumerate(unique_solutions)}
    labels = [solution_to_label[solution] for solution in solutions]
    return np.array(images), np.array(labels), unique_solutions
 def load_training_dataset():
    """Load the training dataset."""
    return load_dataset(TRAINING_PATH)
 def load_testing_dataset():
    """Load the testing dataset."""
    return load_dataset(TESTING_PATH)
 def train_nn():
    """Train the neural network on the training dataset."""
    training_images, training_labels, unique_solutions = load_training_dataset()
    testing_images, testing_labels = (None, None)
    if PERCENT_OF_TESTING > 0:
        testing_images, testing_labels, _ = load_testing_dataset()
    model = keras.Sequential([
        keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 1)),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Conv2D(256, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Conv2D(256, (3, 3), activation='relu'),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.5),  # Dropout for regularization
        keras.layers.Dense(len(unique_solutions), activation='softmax')  # Output layer
    ])
    model.summary()
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    callbacks = [
        EarlyStopping(monitor='accuracy', patience=3),
        ModelCheckpoint('best_model.keras', save_best_only=True)
    ]
    EPOCHS = 100
    BATCH_SIZE = 8
    if PERCENT_OF_TESTING > 0:
        model.fit(np.array(training_images), np.array(training_labels), 
                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks,
                  validation_data=(np.array(testing_images), np.array(testing_labels)),
                  )
    else:
        model.fit(np.array(training_images), np.array(training_labels), 
                  epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callbacks
                  )
    keras.saving.save_model(model, 'captcha_solver.keras')
 if __name__ == "__main__":
    download_dataset()
    train_nn()