内容简介:TensorFlow提供了用于检测图片或视频中所包含物体的API,详情可参考以下链接物体检测和图片分类不同
TensorFlow提供了用于检测图片或视频中所包含物体的API,详情可参考以下链接
物体检测和图片分类不同
- 图片分类是将图片分为某一类别,即从多个可能的分类中选择一个,即使可以按照概率输出最可能的多个分类,但理论上的正确答案只有一个
- 物体检测是检测图片中所出现的全部物体并且用矩形(Anchor Box)进行标注,物体的类别可以包括多种,例如人、车、动物、路标等,即正确答案可以是多个
通过多个例子,了解TensorFlow物体检测API的使用方法
这里使用预训练好的 ssd_mobilenet_v1_coco 模型(Single Shot MultiBox Detector),更多可用的物体检测模型可以参考这里
举个例子
加载库
# -*- coding: utf-8 -*- import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from PIL import Image from utils import label_map_util from utils import visualization_utils as vis_util 复制代码
定义一些常量
PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb' PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt' NUM_CLASSES = 90 复制代码
加载预训练好的模型
detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: od_graph_def.ParseFromString(fid.read()) tf.import_graph_def(od_graph_def, name='') 复制代码
加载分类标签数据
label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) 复制代码
一个将图片转为数组的辅助函数,以及测试图片路径
def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) TEST_IMAGE_PATHS = ['test_images/image1.jpg', 'test_images/image2.jpg'] 复制代码
使用模型进行物体检测
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
image_np = load_image_into_numpy_array(image)
image_np_expanded = np.expand_dims(image_np, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8)
plt.figure(figsize=[12, 8])
plt.imshow(image_np)
plt.show()
复制代码
检测结果如下,第一张图片检测出了两只狗狗
第二张图片检测出了一些人和风筝
摄像头检测
安装 OpenCV ,用于实现和计算机视觉相关的功能,版本为 3.3.0.10
pip install opencv-python opencv-contrib-python -i https://pypi.tuna.tsinghua.edu.cn/simple 复制代码
查看是否安装成功,没有报错即可
import cv2 tracker = cv2.TrackerMedianFlow_create() 复制代码
在以上代码的基础上进行修改
cv2
完整代码如下
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from utils import label_map_util
from utils import visualization_utils as vis_util
import cv2
cap = cv2.VideoCapture(0)
PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt'
NUM_CLASSES = 90
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
od_graph_def.ParseFromString(fid.read())
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
while True:
ret, image_np = cap.read()
image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
image_np_expanded = np.expand_dims(image_np, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8)
cv2.imshow('object detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))
if cv2.waitKey(25) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
复制代码
视频检测
使用 cv2 读取视频并获取每一帧图片,然后将检测后的每一帧写入新的视频文件
生成的视频文件只有图像、没有声音,关于音频的处理以及视频和音频的合成,后面再进一步探索
完整代码如下
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from utils import label_map_util
from utils import visualization_utils as vis_util
import cv2
cap = cv2.VideoCapture('绝地逃亡.mov')
ret, image_np = cap.read()
out = cv2.VideoWriter('output.mov', -1, cap.get(cv2.CAP_PROP_FPS), (image_np.shape[1], image_np.shape[0]))
PATH_TO_CKPT = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
PATH_TO_LABELS = 'ssd_mobilenet_v1_coco_2017_11_17/mscoco_label_map.pbtxt'
NUM_CLASSES = 90
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
od_graph_def.ParseFromString(fid.read())
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
while cap.isOpened():
ret, image_np = cap.read()
if len((np.array(image_np)).shape) == 0:
break
image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
image_np_expanded = np.expand_dims(image_np, axis=0)
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8)
out.write(cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))
cap.release()
out.release()
cv2.destroyAllWindows()
复制代码
播放处理好的视频,可以看到很多地方都有相应的检测结果
以上就是本文的全部内容,希望本文的内容对大家的学习或者工作能带来一定的帮助,也希望大家多多支持 码农网
猜你喜欢:- ICLR 2019 | 骑驴找马:利用深度强化学习模型定位新物体
- 深度学习在 iOS 上的实践 —— 通过 YOLO 在 iOS 上实现实时物体检测
- 旷视发布通用物体检测数据集 Objects365,开启 CVPR 物体检测挑战赛
- c++ 投掷活动物体
- WebGL之物体选择
- 简易版物体识别
本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们。
C++语言的设计和演化
[美] Bjarne Stroustrup / 裘宗燕 / 机械工业出版社 / 2002-1 / 48.00元
这本书是C++的设计者关于C++语言的最主要著作之一。作者综合性地论述了C++的历史和发展,C++中各种重要机制的本质意义和设计背景,这些机制的基本用途和使用方法,讨论了C++所适合的应用领域及其未来的发展前景。一起来看看 《C++语言的设计和演化》 这本书的介绍吧!