dl_helper.py

### We create a bunch of helpful functions throughout the course.
### Storing them here so they're easily accessible.

import tensorflow as tf

# Function to load and resize our image to be used with our model
def load_and_prep_image(filepath, image_size=224):

  """
  Function loads and prepare the image for prediction provided a filepath and image_size

  Args:
  filepath: Path to the image
  image_size: Reshapes the image to (imgage_size,image_size)

  Prerequisites:
  tensorflow as tf

  Return:
  Reshaped image with expanded dims
  """

  # Load in the image and resize it to (img_size, img_size)
  loaded_image = tf.keras.preprocessing.image.load_img(filepath, target_size=(image_size, image_size))

  # Convert the image to array
  img_array = tf.keras.preprocessing.image.img_to_array(loaded_image)

  # Adding dimension to accomodate batch size
  img = tf.expand_dims(img_array, axis=0)

  return img

# # Create a function to import an image and resize it to be able to be used with our model db
# def load_and_prep_image(filename, img_shape=224, scale=True):
#   """
#   Reads in an image from filename, turns it into a tensor and reshapes into
#   (224, 224, 3).

#   Parameters
#   ----------
#   filename (str): string filename of target image
#   img_shape (int): size to resize target image to, default 224
#   scale (bool): whether to scale pixel values to range(0, 1), default True
#   """
#   # Read in the image
#   img = tf.io.read_file(filename)
#   # Decode it into a tensor
#   img = tf.image.decode_jpeg(img)
#   # Resize the image
#   img = tf.image.resize(img, [img_shape, img_shape])
#   if scale:
#     # Rescale the image (get all values between 0 and 1)
#     return img/255.
#   else:
#     return img

# Note: The following confusion matrix code is a remix of Scikit-Learn's 
# plot_confusion_matrix function - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.plot_confusion_matrix.html
import itertools
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

# Our function needs a different name to sklearn's plot_confusion_matrix
def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): 
  """Makes a labelled confusion matrix comparing predictions and ground truth labels.

  If classes is passed, confusion matrix will be labelled, if not, integer class values
  will be used.

  Args:
    y_true: Array of truth labels (must be same shape as y_pred).
    y_pred: Array of predicted labels (must be same shape as y_true).
    classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
    figsize: Size of output figure (default=(10, 10)).
    text_size: Size of output figure text (default=15).
    norm: normalize values or not (default=False).
    savefig: save confusion matrix to file (default=False).
  
  Returns:
    A labelled confusion matrix plot comparing y_true and y_pred.

  Example usage:
    make_confusion_matrix(y_true=test_labels, # ground truth test labels
                          y_pred=y_preds, # predicted labels
                          classes=class_names, # array of class label names
                          figsize=(15, 15),
                          text_size=10)
  """  
  # Create the confustion matrix
  cm = confusion_matrix(y_true, y_pred)
  cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
  n_classes = cm.shape[0] # find the number of classes we're dealing with

  # Plot the figure and make it pretty
  fig, ax = plt.subplots(figsize=figsize)
  cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
  fig.colorbar(cax)

  # Are there a list of classes?
  if classes:
    labels = classes
  else:
    labels = np.arange(cm.shape[0])
  
  # Label the axes
  ax.set(title="Confusion Matrix",
         xlabel="Predicted label",
         ylabel="True label",
         xticks=np.arange(n_classes), # create enough axis slots for each class
         yticks=np.arange(n_classes), 
         xticklabels=labels, # axes will labeled with class names (if they exist) or ints
         yticklabels=labels)
  
  # Make x-axis labels appear on bottom
  ax.xaxis.set_label_position("bottom")
  ax.xaxis.tick_bottom()

  ### Added: Rotate xticks for readability & increase font size (required due to such a large confusion matrix)
  plt.xticks(rotation=70, fontsize=text_size)
  plt.yticks(rotation=90, fontsize=text_size)

  # Set the threshold for different colors
  threshold = (cm.max() + cm.min()) / 2.

  # Plot the text on each cell
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    if norm:
      plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)
    else:
      plt.text(j, i, f"{cm[i, j]}",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)

  # Save the figure to the current working directory
  if savefig:
    fig.savefig("confusion_matrix.png")
    
# Imports for the function
import numpy as np
import matplotlib.pyplot as plt

def pred_and_plot_single(model, classes, image_size, target_dir, target_class=None, custom_images=None):
  """
  Function to plot a image and it's prediction
  
  Args:
    model: model used fore predicting the image
    classes: list of class names
    image_size: target image size
    target_dir: target directory from where image is picked
    target_class: class to be used for picking the image for prediction, None for custom images without labels
    custom_images: For custom images with labels, default: None
  PreRequisites;
    Follows the standard file structure for image classification train/classes, test/classes
    get_random_file function
    load_and_prep_image function
  Libraries:
    numpy
    matplotlib
  Return:
    Plotted image with it's prediction
  """

  # get the filepath
  file_path, target_class = get_file_path(target_dir=target_dir,
                                          classes=classes,
                                          target_class=target_class,
                                          custom_images=custom_images)
  
  # Prep the image
  img = load_and_prep_image(filepath=file_path, 
                            image_size=image_size)
  
  # get prediction or prediction probabalities
  preds = model.predict(img)

  # Checking the type of classification
  if preds.size == 2:
    pred_class = classes[np.argmax(preds)]
  else:
    pred_class = classes[np.argmax(preds)]

  pred_possiblity = np.max(preds)

  if target_class == pred_class:
    title_color = "g"
  elif custom_images == True:
    title_color="b"
  else:
    title_color="r"

  # Plotting the image
  plt.imshow(plt.imread(file_path))
  if custom_images == None:
    title = f"Pred: {pred_class} {np.round(pred_possiblity*100):.2f}%, True: {target_class}"
  else:
    title = f"Pred: {pred_class}, {pred_possiblity*100:.2f}, custom image"
  print(title)
  plt.title(title, c=title_color)
  plt.axis(False)
  
# Imports for the function
import numpy as np
import matplotlib.pyplot as plt

def pred_and_plot_multiple(num_rows, num_cols, model, classes, image_size, target_dir, target_class=None, custom_images=None):

  """
  Predict's and plots multiple images

  Args:
    num_rows: number of rows in the plot
    num_cols: number of columns in the plot
      num_images: is calculated by num_rows * num_cols
    model: model used for predicting the class of the image
    classes: class names in the problem
    image_size: image size of the input passed to model (image_size, image_size)
    target_dir: target directory from where images are picked
    target_class: class to be used for picking the image for prediction, None for custom images without labels
    custom_images: For custom images with labels, default: None

  Prerequiste Functions:
    pred_and_plot_single
    get_file_path
    load_and_prep_image

  Libraries:
    matplotlib
    numpy

  Returns:
    num_images plotted with true and predicted labels
  """

  num_images = num_rows * num_cols

  plt.figure(figsize=(2*2*num_cols, 2*num_rows))
  for image in range(num_images):
    plt.subplot(num_rows, num_cols, image + 1)
    pred_and_plot_single(model=model,
                               classes=classes,
                               image_size=image_size,
                               target_dir=target_dir,
                               target_class=target_class,
                               custom_images=custom_images)

  
# # Make a function to predict on images and plot them (works with multi-class) db
# def pred_and_plot(model, filename, class_names):
#   """
#   Imports an image located at filename, makes a prediction on it with
#   a trained model and plots the image with the predicted class as the title.
#   """
#   # Import the target image and preprocess it
#   img = load_and_prep_image(filename)

#   # Make a prediction
#   pred = model.predict(tf.expand_dims(img, axis=0))

#   # Get the predicted class
#   if len(pred[0]) > 1: # check for multi-class
#     pred_class = class_names[pred.argmax()] # if more than one output, take the max
#   else:
#     pred_class = class_names[int(tf.round(pred)[0][0])] # if only one output, round

#   # Plot the image and predicted class
#   plt.imshow(img)
#   plt.title(f"Prediction: {pred_class}")
#   plt.axis(False);
  
import datetime

def create_tensorboard_callback(dir_name, experiment_name):
  """
  Creates a TensorBoard callback instand to store log files.

  Stores log files with the filepath:
    "dir_name/experiment_name/current_datetime/"

  Args:
    dir_name: target directory to store TensorBoard log files
    experiment_name: name of experiment directory (e.g. efficientnet_model_1)
  """
  log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
  tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=log_dir
  )
  print(f"Saving TensorBoard log files to: {log_dir}")
  return tensorboard_callback

# Plot the validation and training data separately
import matplotlib.pyplot as plt

def plot_loss_curves(history):
  """
  Returns separate loss curves for training and validation metrics.

  Args:
    history: TensorFlow model History object (see: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History)
  """ 
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  accuracy = history.history['accuracy']
  val_accuracy = history.history['val_accuracy']

  epochs = range(len(history.history['loss']))

  # Plot loss
  plt.plot(epochs, loss, label='training_loss')
  plt.plot(epochs, val_loss, label='val_loss')
  plt.title('Loss')
  plt.xlabel('Epochs')
  plt.legend()

  # Plot accuracy
  plt.figure()
  plt.plot(epochs, accuracy, label='training_accuracy')
  plt.plot(epochs, val_accuracy, label='val_accuracy')
  plt.title('Accuracy')
  plt.xlabel('Epochs')
  plt.legend();

def compare_historys(original_history, new_history, initial_epochs=5):
    """
    Compares two TensorFlow model History objects.
    
    Args:
      original_history: History object from original model (before new_history)
      new_history: History object from continued model training (after original_history)
      initial_epochs: Number of epochs in original_history (new_history plot starts from here) 
    """
    
    # Get original history measurements
    acc = original_history.history["accuracy"]
    loss = original_history.history["loss"]

    val_acc = original_history.history["val_accuracy"]
    val_loss = original_history.history["val_loss"]

    # Combine original history with new history
    total_acc = acc + new_history.history["accuracy"]
    total_loss = loss + new_history.history["loss"]

    total_val_acc = val_acc + new_history.history["val_accuracy"]
    total_val_loss = val_loss + new_history.history["val_loss"]

    # Make plots
    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([initial_epochs-1, initial_epochs-1],
              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([initial_epochs-1, initial_epochs-1],
              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()
  
# Create function to unzip a zipfile into current working directory 
# (since we're going to be downloading and unzipping a few files)
import zipfile

def unzip_data(filename):
  """
  Unzips filename into the current working directory.

  Args:
    filename (str): a filepath to a target zip folder to be unzipped.
  """
  zip_ref = zipfile.ZipFile(filename, "r")
  zip_ref.extractall()
  zip_ref.close()

# Walk through an image classification directory and find out how many files (images)
# are in each subdirectory.
import os

def walk_through_dir(dir_path):
  """
  Walks through dir_path returning its contents.

  Args:
    dir_path (str): target directory
  
  Returns:
    A print out of:
      number of subdiretories in dir_path
      number of images (files) in each subdirectory
      name of each subdirectory
  """
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")
    
# Function to evaluate: accuracy, precision, recall, f1-score
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def calculate_results(y_true, y_pred):
  """
  Calculates model accuracy, precision, recall and f1 score of a binary classification model.

  Args:
      y_true: true labels in the form of a 1D array
      y_pred: predicted labels in the form of a 1D array

  Returns a dictionary of accuracy, precision, recall, f1-score.
  """
  # Calculate model accuracy
  model_accuracy = accuracy_score(y_true, y_pred)
  # Calculate model precision, recall and f1 score using "weighted average
  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
  model_results = {"accuracy": model_accuracy,
                  "precision": model_precision,
                  "recall": model_recall,
                  "f1": model_f1}
  return model_results

# Function to get random file path

# Imports
import os, random


def get_file_path(target_dir, classes, target_class=None, custom_images=None):

  """
  Chooses a  file provided a target_dir and target_class and follows standard directory structure.
  If target_class is not passed, chooses a random class from classes and picks an file

  Args:
    target_dir: target directory from where image name is picked
    classes: list of class names
    target_class: class to be used for picking the image for prediction, None for custom images without labels
    custom_images: For custom images with labels, default: None

  PreRequisites;
    Follows the standard file structure for image classification train/classes, test/classes

  Returns:
    filepath
    target_class
  """

  if target_class == None and custom_images == None:
    # Choose a random class
    target_class = random.choice(classes)
  else:
    target_class = target_class

  if custom_images == True:
    # Set target directory
    target_dir = target_dir
  else:
    target_dir = target_dir + target_class

  # Select a random file from target_directory
  target_image = random.choice(os.listdir(target_dir))

  filepath = target_dir + "/" + target_image

  return filepath, target_class

def save_model(model, model_name, target_dir):
  """
  Saves model in saved format in target directory
  """
  model.save(target_dir + "/" + model_name)

import matplotlib.pyplot as plt
import os, random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Function to view random image n computer vision problems
def view_random_image(target_dir, classes):
  """
  Function to view random images in computer vision

  PreRequisite:
    Standard Computer vision directory structure

  Args:
    target_dir: Directory from wherer images are picked
    classes: list of classes
  """
  target_class = random.choice(classes)
  target_dir = target_dir + "/" + target_class
  target_image = random.choice(os.listdir(target_dir))
  img_path = target_dir + "/" + target_image

  loaded_image = load_img(img_path)
  image_array = img_to_array(loaded_image)

  plt.imshow(image_array/255.)
  plt.axis(False)
  plt.title(f"Class: {target_class}, Shape: {image_array.shape}")

import matplotlib.pyplot as plt
import os, random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Function to view multiple images
def view_many_random_images(num_rows, num_cols, target_dir, classes):
  """
  To get one with the data, function to view multiple images using `view_random_image`

  PreRequisite:
  Standard Computer vision directory structure

  Args:
    num_rows: number of rows
    num_cols: number of columns
    target_dir: Directory from wherer images are picked
    classes: list of classes
    num_rows * num_images random images will be plotted
  """

  num_images = num_rows * num_cols
  plt.figure(figsize=(2*2*num_cols, 2*num_rows))
  for image in range(num_images):
    plt.subplot(num_rows, num_cols, image + 1)
    view_random_image(target_dir=target_dir,
                      classes=classes)