Object Detection Using Already Trained Models

In this jupyter notebook, we will try two different model: SSD Mobilenet trained on COCO dataset and Faster RCNN model trained on Resnet dataset.

Object Detection using ssd_mobilenet_v2_coco_2018_03_29 and faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28

We need several utilities from tensorflow/model repo. The repo can be found here. Download the repo and add the path of research and research/object_detection to the python path:

TF_MODELS_DIR = '/Users/sparrow/Learning/machine-learning/tensorflow-models-zoo'
sys.path.append(os.path.join(TF_MODELS_DIR, 'research'))
sys.path.append(os.path.join(TF_MODELS_DIR, 'research', 'object_detection'))

The already trained model can be downloaded from here. Download the model, extract in a folder called models in the project root directory. The extracted frozen graph's name will be frozen_inference_graph.pb.

1. Object Detection using ssd_mobilenet_v2_coco_2018_03_29

Import all necessary modules

import warnings
warnings.simplefilter('ignore')

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from PIL import Image
import argparse
import cv2

import matplotlib.pyplot as plt

%matplotlib inline

Define all paths

ROOT_DIR = '/Users/sparrow/Learning/machine-learning/the-eye/object-detection'
DATA_DIR = os.path.join(ROOT_DIR, 'data')
MODELS_DIR = os.path.join(ROOT_DIR, 'models', 'ssd_mobilenet_v2_coco_2018_03_29')
DATASET_DIR = '/srv/downloads/moshfiqur-ml-datasets/homenum-revelio'
TEST_IMAGES_DIR = os.path.join(ROOT_DIR, 'images')
CROPPED_IMAGES_DIR = os.path.join(ROOT_DIR, 'images', 'cropped')

Define all the variables

# List of the strings that is used to add correct label for each box.
LABELS_PATH = os.path.join(DATA_DIR, 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
MODEL_PATH = os.path.join(MODELS_DIR, 'frozen_inference_graph.pb')

Import the modules from Tensorflow models

TF_MODELS_DIR = '/Users/sparrow/Learning/machine-learning/tensorflow-models-zoo'
sys.path.append(os.path.join(TF_MODELS_DIR, 'research'))
sys.path.append(os.path.join(TF_MODELS_DIR, 'research', 'object_detection'))

# from helper import run_inference_for_single_image, load_image_into_numpy_array
from object_detection.utils import ops as utils_ops
from utils import label_map_util
from utils import visualization_utils as vis_util

Initialize the label map, categories and category indexes

label_map = label_map_util.load_labelmap(LABELS_PATH)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

Load the trained model graph

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(MODEL_PATH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

Object detection process

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        test_images = os.listdir(TEST_IMAGES_DIR)
        for file_name in test_images:
            if '.jpg' not in file_name:
                continue

            image_path = os.path.join(TEST_IMAGES_DIR, file_name)

            image = Image.open(image_path)

            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = cv2.imread(image_path)

            height, width, num_channels = image_np.shape

            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)

            # Actual detection.
            (result_boxes, result_scores, result_classes, result_num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})

            result_boxes = np.squeeze(result_boxes)
            result_classes = np.squeeze(result_classes)
            result_scores = np.squeeze(result_scores)

            for index, score in enumerate(result_scores):
                if score < 0.1:
                    continue

                label = category_index[result_classes[index]]['name']
                ymin, xmin, ymax, xmax = result_boxes[index]

                img_geo_width = abs(xmax - xmin)
                img_geo_height = abs(ymax - ymin)

                pixel_width = width * img_geo_width
                pixel_height = height * img_geo_height

                x_min = int(abs(0 + xmin) * width)
                x_max = int(abs(width + xmin) * pixel_width)
                y_min = int(abs(0 + ymin) * height)
                y_max = int(abs(height + ymin) * pixel_height)

                x_max = int(x_min + pixel_width)
                y_max = int(y_min + pixel_height)

                # print(label, score, ymin, xmin, ymax, xmax)
                plt.title('Label: {}, Score: {}, Top-Left: ({}, {}), Bottom-Right: ({}, {})'.format(
                    label, score, x_min, y_min, x_max, y_max))

                image_np_tmp = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)

                font = cv2.FONT_HERSHEY_SIMPLEX

                cv2.rectangle(image_np_tmp, (x_min, y_min), (x_max, y_max), (255, 0, 0), 3)

                plt.imshow(image_np_tmp)
                plt.show()

png

png

png

png

png

png

png

2. Object Detection using faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28

Import all necessary modules

import warnings
warnings.simplefilter('ignore')

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from PIL import Image
import argparse
import cv2

Declares the necessary directories

ROOT_DIR = '/Users/sparrow/Learning/machine-learning/the-eye/object-detection'
DATA_DIR = os.path.join(ROOT_DIR, 'data')
MODELS_DIR = os.path.join(ROOT_DIR, 'models', 'faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28')
DATASET_DIR = '/srv/downloads/moshfiqur-ml-datasets/homenum-revelio'
TEST_IMAGES_DIR = os.path.join(ROOT_DIR, 'images')
CROPPED_IMAGES_DIR = os.path.join(ROOT_DIR, 'images', 'cropped')

Declares the neccessary variables

# List of the strings that is used to add correct label for each box.
LABELS_PATH = os.path.join(DATA_DIR, 'oid_bbox_trainable_label_map.pbtxt')
NUM_CLASSES = 200000
MODEL_PATH = os.path.join(MODELS_DIR, 'frozen_inference_graph.pb')

Import the Tensorflow Modules

TF_MODELS_DIR = '/Users/sparrow/Learning/machine-learning/tensorflow-models-zoo'
sys.path.append(os.path.join(TF_MODELS_DIR, 'research'))
sys.path.append(os.path.join(TF_MODELS_DIR, 'research', 'object_detection'))
# from helper import run_inference_for_single_image, load_image_into_numpy_array
from object_detection.utils import ops as utils_ops
from utils import label_map_util
from utils import visualization_utils as vis_util

Initialize the label_map, categories and category indexes

label_map = label_map_util.load_labelmap(LABELS_PATH)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

Load the trained model graph

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(MODEL_PATH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

Object detection

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        test_images = os.listdir(TEST_IMAGES_DIR)
        for file_name in test_images:
            if '.jpg' not in file_name:
                continue

            image_path = os.path.join(TEST_IMAGES_DIR, file_name)

            image = Image.open(image_path)

            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = cv2.imread(image_path)

            height, width, num_channels = image_np.shape

            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)

            # Actual detection.
            (result_boxes, result_scores, result_classes, result_num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})

            result_boxes = np.squeeze(result_boxes)
            result_classes = np.squeeze(result_classes)
            result_scores = np.squeeze(result_scores)

            for index, score in enumerate(result_scores):
                if score < 0.1:
                    continue

                label = category_index[result_classes[index]]['name']
                ymin, xmin, ymax, xmax = result_boxes[index]

                img_geo_width = abs(xmax - xmin)
                img_geo_height = abs(ymax - ymin)

                pixel_width = width * img_geo_width
                pixel_height = height * img_geo_height

                x_min = int(abs(0 + xmin) * width)
                x_max = int(abs(width + xmin) * pixel_width)
                y_min = int(abs(0 + ymin) * height)
                y_max = int(abs(height + ymin) * pixel_height)

                x_max = int(x_min + pixel_width)
                y_max = int(y_min + pixel_height)

                # print(label, score, ymin, xmin, ymax, xmax)
                plt.title('Label: {}, Score: {}, Top-Left: ({}, {}), Bottom-Right: ({}, {})'.format(
                    label, score, x_min, y_min, x_max, y_max))

                image_np_tmp = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)

                font = cv2.FONT_HERSHEY_SIMPLEX

                cv2.rectangle(image_np_tmp, (x_min, y_min), (x_max, y_max), (255, 0, 0), 3)

                plt.imshow(image_np_tmp)
                plt.show()

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

png

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.

Back To Top