Loading project/src/classify_with_cnn.py +58 −4 Original line number Diff line number Diff line Loading @@ -12,12 +12,20 @@ import seaborn as sns from sklearn.model_selection import train_test_split import joblib # Function to rescale images def rescale(image): return image / 255.0 # Function to load images and labels for CNN def load_images_and_labels_cnn(base_path, size=(50, 50)): """ Load images and their labels for CNN processing from a given base path. Images are resized to a specified size. Parameters: base_path (str): The path of the directory containing image folders. size (tuple): The desired size for resizing the images (width, height). Returns: tuple: A tuple containing arrays of images and labels. """ images = [] labels = [] for folder in os.listdir(base_path): Loading @@ -35,6 +43,18 @@ def load_images_and_labels_cnn(base_path, size=(50, 50)): # Function to create the CNN model def get_model(lr=0.001, dropout_rate=0.3, activation='relu'): """ Create and compile a Convolutional Neural Network (CNN) model. Parameters: lr (float): Learning rate for the optimizer. Default is 0.001. dropout_rate (float): Dropout rate for regularization. Default is 0.3. activation (str): Activation function for the layers. Default is 'relu'. Returns: tensorflow.keras.models.Sequential: The compiled CNN model. """ input_shape = (50, 50, 3) outputs_number = 30 model = models.Sequential([ Loading @@ -56,6 +76,16 @@ def get_model(lr=0.001, dropout_rate=0.3, activation='relu'): # Learning rate schedule function def lr_schedule(epoch): """ Learning rate schedule function to adjust the learning rate based on the epoch. Parameters: epoch (int): The current epoch number during training. Returns: float: The adjusted learning rate. """ lr = 0.001 if epoch > 50: lr *= 0.5e-3 Loading @@ -69,6 +99,18 @@ def lr_schedule(epoch): # Function to create stratified subsets of data def stratified_subset(X, y, subset_ratio): """ Create a stratified subset of data based on the given subset ratio. Parameters: X (numpy.ndarray): The input features. y (numpy.ndarray): The target labels. subset_ratio (float): The ratio of the subset size to the original data size. Returns: tuple: A tuple containing the subset of features and labels. """ unique_classes = np.unique(y) X_subset = [] y_subset = [] Loading @@ -85,6 +127,18 @@ def stratified_subset(X, y, subset_ratio): # Function to train the model def train_model(X_train, y_train, X_dev, y_dev, subset_description, label_encoder): """ Train the CNN model with the given training and validation data. Parameters: X_train (numpy.ndarray): Training data features. y_train (numpy.ndarray): Training data labels. X_dev (numpy.ndarray): Validation data features. y_dev (numpy.ndarray): Validation data labels. subset_description (str): Description of the training data subset. label_encoder (sklearn.preprocessing.LabelEncoder): The label encoder used for transforming class labels. """ model = get_model() history = model.fit( X_train, y_train, Loading project/src/evaluate_cnn.py +99 −29 Original line number Diff line number Diff line Loading @@ -10,15 +10,43 @@ from tensorflow.keras.utils import to_categorical from sklearn.preprocessing import LabelEncoder def create_directory(directory_name): """ Create a directory if it doesn't already exist. Parameters: directory_name (str): The name or path of the directory to be created. """ if not os.path.exists(directory_name): os.makedirs(directory_name) def save_plot(figure, directory, file_name): """ Save a matplotlib plot to a specified directory with a given file name. Parameters: figure (matplotlib.figure.Figure): The matplotlib figure object to be saved. directory (str): The directory where the figure will be saved. file_name (str): The name of the file in which the figure will be saved. """ file_path = os.path.join(directory, file_name) figure.savefig(file_path) plt.close(figure) def load_images_and_labels_cnn(base_path, size=(50, 50)): """ Load images and their labels for CNN processing from a given base path. Images are resized to a specified size. Parameters: base_path (str): The path of the directory containing image folders. size (tuple): The desired size for resizing the images (width, height). Returns: tuple: A tuple containing arrays of images, labels, and file paths. """ images = [] labels = [] file_paths = [] # Store file paths Loading @@ -37,6 +65,19 @@ def load_images_and_labels_cnn(base_path, size=(50, 50)): return np.array(images), np.array(labels), file_paths def plot_metrics(y_true, y_pred, metric_name, title, directory, file_name, is_accuracy=False): """ Plot and save a metric evaluation bar chart. Parameters: y_true (array): True labels. y_pred (array): Predicted labels by the model. metric_name (function): The metric function to evaluate (e.g., accuracy_score). title (str): Title of the plot. directory (str): Directory to save the plot. file_name (str): Name of the file to save the plot. is_accuracy (bool): Indicator whether the metric is accuracy or not. Default is False. """ if is_accuracy: metric_value = metric_name(y_true, y_pred) else: Loading @@ -51,6 +92,20 @@ def plot_metrics(y_true, y_pred, metric_name, title, directory, file_name, is_ac save_plot(figure, directory, f'{file_name}.png') def display_misclassified_images(original_paths, X, y_true, y_pred, label_encoder, num_images=7, directory='', model_name=''): """ Display and save a specified number of misclassified images. Parameters: original_paths (list): List of paths to the original images. X (array): The array of image data. y_true (array): True labels. y_pred (array): Predicted labels. label_encoder (LabelEncoder): Encoder used to transform labels. num_images (int): Number of misclassified images to display. Default is 7. directory (str): Directory to save the images. model_name (str): Name of the model used for prefixing saved images. """ misclassified_indices = np.where(y_true != y_pred)[0] if len(misclassified_indices) < num_images: num_images = len(misclassified_indices) Loading @@ -70,6 +125,15 @@ def display_misclassified_images(original_paths, X, y_true, y_pred, label_encode def evaluate_model_with_metrics(model_path, datasets, label_encoder): """ Evaluate a model with various metrics and save the results. Parameters: model_path (str): Path to the trained model. datasets (dict): A dictionary containing datasets for evaluation. label_encoder (LabelEncoder): Encoder used to transform labels. """ model = tf.keras.models.load_model(model_path) model_name = os.path.basename(model_path).split('.')[0] results_directory = f'../figures/cnn/{model_name}' Loading Loading @@ -115,7 +179,16 @@ def evaluate_model_with_metrics(model_path, datasets, label_encoder): display_misclassified_images(paths, X, y_true, y_pred, label_encoder, directory=results_directory, model_name=split_name) def main(subset): """ Main function to evaluate a CNN model on a specific dataset subset. Parameters: subset (str): Subset of the training data used for the model (e.g., "10_percent"). """ # Construct the model file path model_path = f'../trained_classifiers/fruit_classifier_{subset}.keras' # Load and preprocess data (as in the training script) X_train, y_train, train_paths = load_images_and_labels_cnn('../data/train') Loading @@ -134,15 +207,12 @@ datasets = { 'Test': (X_test, y_test_encoded, test_paths) } evaluate_model_with_metrics(model_path, datasets, label_encoder) # Evaluate the full model # evaluate_model_with_metrics('fruit_classifier_full.keras', datasets, label_encoder) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Evaluate a CNN model on a specific dataset subset.') parser.add_argument('subset', type=str, help='Subset of the training data used for the model (e.g., "10_percent").') args = parser.parse_args() # For models trained on subsets evaluate_model_with_metrics('../trained_classifiers/fruit_classifier_10_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_50_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_05_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_30_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_70_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_90_percent.keras', datasets, label_encoder) main(args.subset) Loading
project/src/classify_with_cnn.py +58 −4 Original line number Diff line number Diff line Loading @@ -12,12 +12,20 @@ import seaborn as sns from sklearn.model_selection import train_test_split import joblib # Function to rescale images def rescale(image): return image / 255.0 # Function to load images and labels for CNN def load_images_and_labels_cnn(base_path, size=(50, 50)): """ Load images and their labels for CNN processing from a given base path. Images are resized to a specified size. Parameters: base_path (str): The path of the directory containing image folders. size (tuple): The desired size for resizing the images (width, height). Returns: tuple: A tuple containing arrays of images and labels. """ images = [] labels = [] for folder in os.listdir(base_path): Loading @@ -35,6 +43,18 @@ def load_images_and_labels_cnn(base_path, size=(50, 50)): # Function to create the CNN model def get_model(lr=0.001, dropout_rate=0.3, activation='relu'): """ Create and compile a Convolutional Neural Network (CNN) model. Parameters: lr (float): Learning rate for the optimizer. Default is 0.001. dropout_rate (float): Dropout rate for regularization. Default is 0.3. activation (str): Activation function for the layers. Default is 'relu'. Returns: tensorflow.keras.models.Sequential: The compiled CNN model. """ input_shape = (50, 50, 3) outputs_number = 30 model = models.Sequential([ Loading @@ -56,6 +76,16 @@ def get_model(lr=0.001, dropout_rate=0.3, activation='relu'): # Learning rate schedule function def lr_schedule(epoch): """ Learning rate schedule function to adjust the learning rate based on the epoch. Parameters: epoch (int): The current epoch number during training. Returns: float: The adjusted learning rate. """ lr = 0.001 if epoch > 50: lr *= 0.5e-3 Loading @@ -69,6 +99,18 @@ def lr_schedule(epoch): # Function to create stratified subsets of data def stratified_subset(X, y, subset_ratio): """ Create a stratified subset of data based on the given subset ratio. Parameters: X (numpy.ndarray): The input features. y (numpy.ndarray): The target labels. subset_ratio (float): The ratio of the subset size to the original data size. Returns: tuple: A tuple containing the subset of features and labels. """ unique_classes = np.unique(y) X_subset = [] y_subset = [] Loading @@ -85,6 +127,18 @@ def stratified_subset(X, y, subset_ratio): # Function to train the model def train_model(X_train, y_train, X_dev, y_dev, subset_description, label_encoder): """ Train the CNN model with the given training and validation data. Parameters: X_train (numpy.ndarray): Training data features. y_train (numpy.ndarray): Training data labels. X_dev (numpy.ndarray): Validation data features. y_dev (numpy.ndarray): Validation data labels. subset_description (str): Description of the training data subset. label_encoder (sklearn.preprocessing.LabelEncoder): The label encoder used for transforming class labels. """ model = get_model() history = model.fit( X_train, y_train, Loading
project/src/evaluate_cnn.py +99 −29 Original line number Diff line number Diff line Loading @@ -10,15 +10,43 @@ from tensorflow.keras.utils import to_categorical from sklearn.preprocessing import LabelEncoder def create_directory(directory_name): """ Create a directory if it doesn't already exist. Parameters: directory_name (str): The name or path of the directory to be created. """ if not os.path.exists(directory_name): os.makedirs(directory_name) def save_plot(figure, directory, file_name): """ Save a matplotlib plot to a specified directory with a given file name. Parameters: figure (matplotlib.figure.Figure): The matplotlib figure object to be saved. directory (str): The directory where the figure will be saved. file_name (str): The name of the file in which the figure will be saved. """ file_path = os.path.join(directory, file_name) figure.savefig(file_path) plt.close(figure) def load_images_and_labels_cnn(base_path, size=(50, 50)): """ Load images and their labels for CNN processing from a given base path. Images are resized to a specified size. Parameters: base_path (str): The path of the directory containing image folders. size (tuple): The desired size for resizing the images (width, height). Returns: tuple: A tuple containing arrays of images, labels, and file paths. """ images = [] labels = [] file_paths = [] # Store file paths Loading @@ -37,6 +65,19 @@ def load_images_and_labels_cnn(base_path, size=(50, 50)): return np.array(images), np.array(labels), file_paths def plot_metrics(y_true, y_pred, metric_name, title, directory, file_name, is_accuracy=False): """ Plot and save a metric evaluation bar chart. Parameters: y_true (array): True labels. y_pred (array): Predicted labels by the model. metric_name (function): The metric function to evaluate (e.g., accuracy_score). title (str): Title of the plot. directory (str): Directory to save the plot. file_name (str): Name of the file to save the plot. is_accuracy (bool): Indicator whether the metric is accuracy or not. Default is False. """ if is_accuracy: metric_value = metric_name(y_true, y_pred) else: Loading @@ -51,6 +92,20 @@ def plot_metrics(y_true, y_pred, metric_name, title, directory, file_name, is_ac save_plot(figure, directory, f'{file_name}.png') def display_misclassified_images(original_paths, X, y_true, y_pred, label_encoder, num_images=7, directory='', model_name=''): """ Display and save a specified number of misclassified images. Parameters: original_paths (list): List of paths to the original images. X (array): The array of image data. y_true (array): True labels. y_pred (array): Predicted labels. label_encoder (LabelEncoder): Encoder used to transform labels. num_images (int): Number of misclassified images to display. Default is 7. directory (str): Directory to save the images. model_name (str): Name of the model used for prefixing saved images. """ misclassified_indices = np.where(y_true != y_pred)[0] if len(misclassified_indices) < num_images: num_images = len(misclassified_indices) Loading @@ -70,6 +125,15 @@ def display_misclassified_images(original_paths, X, y_true, y_pred, label_encode def evaluate_model_with_metrics(model_path, datasets, label_encoder): """ Evaluate a model with various metrics and save the results. Parameters: model_path (str): Path to the trained model. datasets (dict): A dictionary containing datasets for evaluation. label_encoder (LabelEncoder): Encoder used to transform labels. """ model = tf.keras.models.load_model(model_path) model_name = os.path.basename(model_path).split('.')[0] results_directory = f'../figures/cnn/{model_name}' Loading Loading @@ -115,7 +179,16 @@ def evaluate_model_with_metrics(model_path, datasets, label_encoder): display_misclassified_images(paths, X, y_true, y_pred, label_encoder, directory=results_directory, model_name=split_name) def main(subset): """ Main function to evaluate a CNN model on a specific dataset subset. Parameters: subset (str): Subset of the training data used for the model (e.g., "10_percent"). """ # Construct the model file path model_path = f'../trained_classifiers/fruit_classifier_{subset}.keras' # Load and preprocess data (as in the training script) X_train, y_train, train_paths = load_images_and_labels_cnn('../data/train') Loading @@ -134,15 +207,12 @@ datasets = { 'Test': (X_test, y_test_encoded, test_paths) } evaluate_model_with_metrics(model_path, datasets, label_encoder) # Evaluate the full model # evaluate_model_with_metrics('fruit_classifier_full.keras', datasets, label_encoder) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Evaluate a CNN model on a specific dataset subset.') parser.add_argument('subset', type=str, help='Subset of the training data used for the model (e.g., "10_percent").') args = parser.parse_args() # For models trained on subsets evaluate_model_with_metrics('../trained_classifiers/fruit_classifier_10_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_50_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_05_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_30_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_70_percent.keras', datasets, label_encoder) # evaluate_model_with_metrics('fruit_classifier_90_percent.keras', datasets, label_encoder) main(args.subset)