Keras on tensorflow2 で SSD

Tensorflow2上のKerasでSSDを実行を試したときのメモ

※ 2021.05.21 kerasをtensorflow.kerasに変更

概要

Kerasを使ってSSDを実行してみようと思ったら、Tensorflow1.x上の情報ばかり出てきて、Tensorflow2で実行するのに手間取ったのでメモ。
(最終的にopenVINOに持っていきたいなぁ、と思って試し始めたんだけど、openVINOにサクッと変換できるのはcaffeだった😅)
ということで、やったけど 続きはないかも。実行遅いし…😩💨

手順

【実験】学習済のSSDを使ってPaPeRo i に複数種類の物体を数えさせる を参考に、KerasをTensorflow2で動かしてみる。

Pythonは3.8を使用した(念のため明記しておく)。

Anaconda使ってないし、Paperoじゃないので、そのまんまじゃないけど、
とりあえずSSDを動かしてみよう。

python仮想環境の準備

まずは、python仮想環境の準備をする。
pyenv環境前提。

# ベースディレクトリとpython仮想環境の準備
mkdir -p /work/keras2/
cd /work/keras2/
pyenv virtualenv 3.8.8 keras2
pyenv local keras2 
pip install --upgrade pip setuptools

# pythonモジュールのインストール
pip install opencv-python
pip install tensorflow
# kerasはtensorflow内を直接参照にしたので不要  
# pip install keras
pip install matplotlib
pip install imageio

インストールされているpythonモジュールは以下の通り

absl-py==0.12.0
astunparse==1.6.3
cachetools==4.2.2
certifi==2020.12.5
chardet==4.0.0
cycler==0.10.0
flatbuffers==1.12
gast==0.4.0
google-auth==1.30.0
google-auth-oauthlib==0.4.4
google-pasta==0.2.0
grpcio==1.34.1
h5py==3.1.0
idna==2.10
imageio==2.9.0
keras-nightly==2.5.0.dev2021032900
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
Markdown==3.3.4
matplotlib==3.4.2
numpy==1.19.5
oauthlib==3.1.0
opencv-python==4.5.2.52
opt-einsum==3.3.0
Pillow==8.2.0
protobuf==3.17.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing==2.4.7
python-dateutil==2.8.1
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7.2
six==1.15.0
tensorboard==2.5.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow==2.5.0
tensorflow-estimator==2.5.0
termcolor==1.1.0
typing-extensions==3.7.4.3
urllib3==1.26.4
Werkzeug==2.0.1
wrapt==1.12.1

[!NOTE] Keras修正以前の状態は以下

absl-py==0.12.0
astunparse==1.6.3
cachetools==4.2.1
certifi==2020.12.5
chardet==4.0.0
cycler==0.10.0
flatbuffers==1.12
gast==0.3.3
google-auth==1.28.0
google-auth-oauthlib==0.4.3
google-pasta==0.2.0
grpcio==1.32.0
h5py==2.10.0
idna==2.10
imageio==2.9.0
Keras==2.4.3
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
Markdown==3.3.4
matplotlib==3.3.4
numpy==1.19.5
oauthlib==3.1.0
opencv-python==4.5.1.48
opt-einsum==3.3.0
Pillow==8.1.2
protobuf==3.15.6
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing==2.4.7
python-dateutil==2.8.1
PyYAML==5.4.1
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7.2
scipy==1.6.1
six==1.15.0
tensorboard==2.4.1
tensorboard-plugin-wit==1.8.0
tensorflow==2.4.1
tensorflow-estimator==2.4.0
termcolor==1.1.0
typing-extensions==3.7.4.3
urllib3==1.26.4
Werkzeug==1.0.1
wrapt==1.12.1

patchファイルの準備

参照先には色々手順が書いてあるが、めんどくさいので パッチファイル用意した(足りない修正もあるし)。
以下の内容をssd_keras.patch ssd_keras_2.patchのファイル名で保存しておく。

ssd_keras.patch

参照先の情報によるpatch

ssd_keras.patch

diff --git a/ssd.py b/ssd.py
index c0049d8..656cd83 100644
--- a/ssd.py
+++ b/ssd.py
@@ -2,14 +2,15 @@
 
 import keras.backend as K
 from keras.layers import Activation
-from keras.layers import AtrousConvolution2D
+#from keras.layers import AtrousConvolution2D
 from keras.layers import Convolution2D
 from keras.layers import Dense
 from keras.layers import Flatten
 from keras.layers import GlobalAveragePooling2D
 from keras.layers import Input
 from keras.layers import MaxPooling2D
-from keras.layers import merge
+#from keras.layers import merge
+from keras.layers.merge import concatenate
 from keras.layers import Reshape
 from keras.layers import ZeroPadding2D
 from keras.models import Model
@@ -34,109 +35,115 @@ def SSD300(input_shape, num_classes=21):
     input_tensor = input_tensor = Input(shape=input_shape)
     img_size = (input_shape[1], input_shape[0])
     net['input'] = input_tensor
-    net['conv1_1'] = Convolution2D(64, 3, 3,
+    net['conv1_1'] = Convolution2D(64, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv1_1')(net['input'])
-    net['conv1_2'] = Convolution2D(64, 3, 3,
+    
+    net['conv1_2'] = Convolution2D(64, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv1_2')(net['conv1_1'])
-    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
+
+    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                                 name='pool1')(net['conv1_2'])
     # Block 2
-    net['conv2_1'] = Convolution2D(128, 3, 3,
+    net['conv2_1'] = Convolution2D(128, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv2_1')(net['pool1'])
-    net['conv2_2'] = Convolution2D(128, 3, 3,
+
+    net['conv2_2'] = Convolution2D(128, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv2_2')(net['conv2_1'])
-    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
+    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                                 name='pool2')(net['conv2_2'])
+
     # Block 3
-    net['conv3_1'] = Convolution2D(256, 3, 3,
+    net['conv3_1'] = Convolution2D(256, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv3_1')(net['pool2'])
-    net['conv3_2'] = Convolution2D(256, 3, 3,
+    net['conv3_2'] = Convolution2D(256, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv3_2')(net['conv3_1'])
-    net['conv3_3'] = Convolution2D(256, 3, 3,
+    net['conv3_3'] = Convolution2D(256, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv3_3')(net['conv3_2'])
-    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
+    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                                 name='pool3')(net['conv3_3'])
     # Block 4
-    net['conv4_1'] = Convolution2D(512, 3, 3,
+    net['conv4_1'] = Convolution2D(512, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv4_1')(net['pool3'])
-    net['conv4_2'] = Convolution2D(512, 3, 3,
+    net['conv4_2'] = Convolution2D(512, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv4_2')(net['conv4_1'])
-    net['conv4_3'] = Convolution2D(512, 3, 3,
+    net['conv4_3'] = Convolution2D(512, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv4_3')(net['conv4_2'])
-    net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same',
+    net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
                                 name='pool4')(net['conv4_3'])
-    # Block 5
-    net['conv5_1'] = Convolution2D(512, 3, 3,
+# Block 5
+    net['conv5_1'] = Convolution2D(512, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv5_1')(net['pool4'])
-    net['conv5_2'] = Convolution2D(512, 3, 3,
+    net['conv5_2'] = Convolution2D(512, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv5_2')(net['conv5_1'])
-    net['conv5_3'] = Convolution2D(512, 3, 3,
+    net['conv5_3'] = Convolution2D(512, (3, 3),
                                    activation='relu',
-                                   border_mode='same',
+                                   padding='same',
                                    name='conv5_3')(net['conv5_2'])
-    net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same',
+    net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
                                 name='pool5')(net['conv5_3'])
     # FC6
-    net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6),
-                                     activation='relu', border_mode='same',
+    net['fc6'] = Convolution2D(1024, (3, 3), dilation_rate=(6, 6),
+                                     activation='relu', padding='same',
                                      name='fc6')(net['pool5'])
     # x = Dropout(0.5, name='drop6')(x)
     # FC7
-    net['fc7'] = Convolution2D(1024, 1, 1, activation='relu',
-                               border_mode='same', name='fc7')(net['fc6'])
+    net['fc7'] = Convolution2D(1024, (1, 1), activation='relu',
+                               padding='same', name='fc7')(net['fc6'])
     # x = Dropout(0.5, name='drop7')(x)
     # Block 6
-    net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu',
-                                   border_mode='same',
+    net['conv6_1'] = Convolution2D(256, (1, 1), activation='relu',
+                                   padding='same',
                                    name='conv6_1')(net['fc7'])
-    net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2),
-                                   activation='relu', border_mode='same',
+
+
+    net['conv6_2'] = Convolution2D(512, (3, 3), strides=(2, 2),
+                                   activation='relu', padding='same',
                                    name='conv6_2')(net['conv6_1'])
-    # Block 7
-    net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu',
-                                   border_mode='same',
+        # Block 7
+    net['conv7_1'] = Convolution2D(128, (1, 1), activation='relu',
+                                   padding='same',
                                    name='conv7_1')(net['conv6_2'])
     net['conv7_2'] = ZeroPadding2D()(net['conv7_1'])
-    net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2),
-                                   activation='relu', border_mode='valid',
+    net['conv7_2'] = Convolution2D(256, (3, 3), strides=(2, 2),
+                                   activation='relu', padding='valid',
                                    name='conv7_2')(net['conv7_2'])
     # Block 8
-    net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu',
-                                   border_mode='same',
+    net['conv8_1'] = Convolution2D(128, (1, 1), activation='relu',
+                                   padding='same',
                                    name='conv8_1')(net['conv7_2'])
-    net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2),
-                                   activation='relu', border_mode='same',
+    net['conv8_2'] = Convolution2D(256, (3, 3), strides=(2, 2),
+                                   activation='relu', padding='same',
                                    name='conv8_2')(net['conv8_1'])
     # Last Pool
     net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2'])
     # Prediction from conv4_3
     net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
     num_priors = 3
-    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * 4, (3, 3), padding='same',
                       name='conv4_3_norm_mbox_loc')(net['conv4_3_norm'])
     net['conv4_3_norm_mbox_loc'] = x
     flatten = Flatten(name='conv4_3_norm_mbox_loc_flat')
@@ -144,7 +151,7 @@ def SSD300(input_shape, num_classes=21):
     name = 'conv4_3_norm_mbox_conf'
     if num_classes != 21:
         name += '_{}'.format(num_classes)
-    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * num_classes, (3, 3), padding='same',
                       name=name)(net['conv4_3_norm'])
     net['conv4_3_norm_mbox_conf'] = x
     flatten = Flatten(name='conv4_3_norm_mbox_conf_flat')
@@ -155,16 +162,16 @@ def SSD300(input_shape, num_classes=21):
     net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])
     # Prediction from fc7
     num_priors = 6
-    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3,
-                                        border_mode='same',
+    net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, (3, 3),
+                                        padding='same',
                                         name='fc7_mbox_loc')(net['fc7'])
     flatten = Flatten(name='fc7_mbox_loc_flat')
     net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc'])
     name = 'fc7_mbox_conf'
     if num_classes != 21:
         name += '_{}'.format(num_classes)
-    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3,
-                                         border_mode='same',
+    net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, (3, 3),
+                                         padding='same',
                                          name=name)(net['fc7'])
     flatten = Flatten(name='fc7_mbox_conf_flat')
     net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf'])
@@ -174,7 +181,7 @@ def SSD300(input_shape, num_classes=21):
     net['fc7_mbox_priorbox'] = priorbox(net['fc7'])
     # Prediction from conv6_2
     num_priors = 6
-    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * 4, (3, 3), padding='same',
                       name='conv6_2_mbox_loc')(net['conv6_2'])
     net['conv6_2_mbox_loc'] = x
     flatten = Flatten(name='conv6_2_mbox_loc_flat')
@@ -182,7 +189,7 @@ def SSD300(input_shape, num_classes=21):
     name = 'conv6_2_mbox_conf'
     if num_classes != 21:
         name += '_{}'.format(num_classes)
-    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * num_classes, (3, 3), padding='same',
                       name=name)(net['conv6_2'])
     net['conv6_2_mbox_conf'] = x
     flatten = Flatten(name='conv6_2_mbox_conf_flat')
@@ -193,7 +200,7 @@ def SSD300(input_shape, num_classes=21):
     net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])
     # Prediction from conv7_2
     num_priors = 6
-    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * 4, (3, 3), padding='same',
                       name='conv7_2_mbox_loc')(net['conv7_2'])
     net['conv7_2_mbox_loc'] = x
     flatten = Flatten(name='conv7_2_mbox_loc_flat')
@@ -201,7 +208,7 @@ def SSD300(input_shape, num_classes=21):
     name = 'conv7_2_mbox_conf'
     if num_classes != 21:
         name += '_{}'.format(num_classes)
-    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * num_classes, (3, 3), padding='same',
                       name=name)(net['conv7_2'])
     net['conv7_2_mbox_conf'] = x
     flatten = Flatten(name='conv7_2_mbox_conf_flat')
@@ -212,7 +219,7 @@ def SSD300(input_shape, num_classes=21):
     net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])
     # Prediction from conv8_2
     num_priors = 6
-    x = Convolution2D(num_priors * 4, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * 4, (3, 3), padding='same',
                       name='conv8_2_mbox_loc')(net['conv8_2'])
     net['conv8_2_mbox_loc'] = x
     flatten = Flatten(name='conv8_2_mbox_loc_flat')
@@ -220,7 +227,7 @@ def SSD300(input_shape, num_classes=21):
     name = 'conv8_2_mbox_conf'
     if num_classes != 21:
         name += '_{}'.format(num_classes)
-    x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same',
+    x = Convolution2D(num_priors * num_classes, (3, 3), padding='same',
                       name=name)(net['conv8_2'])
     net['conv8_2_mbox_conf'] = x
     flatten = Flatten(name='conv8_2_mbox_conf_flat')
@@ -241,7 +248,7 @@ def SSD300(input_shape, num_classes=21):
     priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3],
                         variances=[0.1, 0.1, 0.2, 0.2],
                         name='pool6_mbox_priorbox')
-    if K.image_dim_ordering() == 'tf':
+    if K.image_data_format() == 'channels_last':
         target_shape = (1, 1, 256)
     else:
         target_shape = (256, 1, 1)
@@ -249,42 +256,47 @@ def SSD300(input_shape, num_classes=21):
                                     name='pool6_reshaped')(net['pool6'])
     net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped'])
     # Gather all predictions
-    net['mbox_loc'] = merge([net['conv4_3_norm_mbox_loc_flat'],
+    net['mbox_loc'] = concatenate([net['conv4_3_norm_mbox_loc_flat'],
                              net['fc7_mbox_loc_flat'],
                              net['conv6_2_mbox_loc_flat'],
                              net['conv7_2_mbox_loc_flat'],
                              net['conv8_2_mbox_loc_flat'],
                              net['pool6_mbox_loc_flat']],
-                            mode='concat', concat_axis=1, name='mbox_loc')
-    net['mbox_conf'] = merge([net['conv4_3_norm_mbox_conf_flat'],
+                            axis=1,
+                                  name='mbox_loc')
+    net['mbox_conf'] = concatenate([net['conv4_3_norm_mbox_conf_flat'],
                               net['fc7_mbox_conf_flat'],
                               net['conv6_2_mbox_conf_flat'],
                               net['conv7_2_mbox_conf_flat'],
                               net['conv8_2_mbox_conf_flat'],
                               net['pool6_mbox_conf_flat']],
-                             mode='concat', concat_axis=1, name='mbox_conf')
-    net['mbox_priorbox'] = merge([net['conv4_3_norm_mbox_priorbox'],
+                              axis=1,
+                              name='mbox_conf')
+    net['mbox_priorbox'] = concatenate([net['conv4_3_norm_mbox_priorbox'],
                                   net['fc7_mbox_priorbox'],
                                   net['conv6_2_mbox_priorbox'],
                                   net['conv7_2_mbox_priorbox'],
                                   net['conv8_2_mbox_priorbox'],
                                   net['pool6_mbox_priorbox']],
-                                 mode='concat', concat_axis=1,
+                                 axis=1,
                                  name='mbox_priorbox')
     if hasattr(net['mbox_loc'], '_keras_shape'):
         num_boxes = net['mbox_loc']._keras_shape[-1] // 4
     elif hasattr(net['mbox_loc'], 'int_shape'):
-        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
+        num_boxes = net['mbox_loc'].int_shape()[-1] // 4
+    elif hasattr(net['mbox_loc'], 'get_shape'):
+        num_boxes = net['mbox_loc'].get_shape()[-1] // 4
     net['mbox_loc'] = Reshape((num_boxes, 4),
                               name='mbox_loc_final')(net['mbox_loc'])
     net['mbox_conf'] = Reshape((num_boxes, num_classes),
                                name='mbox_conf_logits')(net['mbox_conf'])
     net['mbox_conf'] = Activation('softmax',
                                   name='mbox_conf_final')(net['mbox_conf'])
-    net['predictions'] = merge([net['mbox_loc'],
+    net['predictions'] = concatenate([net['mbox_loc'],
                                net['mbox_conf'],
                                net['mbox_priorbox']],
-                               mode='concat', concat_axis=2,
+                               axis=2,
+                               #axis = 0,
                                name='predictions')
     model = Model(net['input'], net['predictions'])
     return model
diff --git a/ssd_layers.py b/ssd_layers.py
index 5e10478..41ee510 100644
--- a/ssd_layers.py
+++ b/ssd_layers.py
@@ -29,7 +29,8 @@ class Normalize(Layer):
         Add possibility to have one scale for all features.
     """
     def __init__(self, scale, **kwargs):
-        if K.image_dim_ordering() == 'tf':
+        if K.image_data_format() == 'channels_last':
+
             self.axis = 3
         else:
             self.axis = 1
@@ -41,7 +42,7 @@ class Normalize(Layer):
         shape = (input_shape[self.axis],)
         init_gamma = self.scale * np.ones(shape)
         self.gamma = K.variable(init_gamma, name='{}_gamma'.format(self.name))
-        self.trainable_weights = [self.gamma]
+        self._trainable_weights = [self.gamma]
 
     def call(self, x, mask=None):
         output = K.l2_normalize(x, self.axis)
@@ -81,7 +82,7 @@ class PriorBox(Layer):
     """
     def __init__(self, img_size, min_size, max_size=None, aspect_ratios=None,
                  flip=True, variances=[0.1], clip=True, **kwargs):
-        if K.image_dim_ordering() == 'tf':
+        if K.image_data_format() == 'channels_last':
             self.waxis = 2
             self.haxis = 1
         else:
@@ -108,7 +109,7 @@ class PriorBox(Layer):
         self.clip = True
         super(PriorBox, self).__init__(**kwargs)
 
-    def get_output_shape_for(self, input_shape):
+    def compute_output_shape(self, input_shape):
         num_priors_ = len(self.aspect_ratios)
         layer_width = input_shape[self.waxis]
         layer_height = input_shape[self.haxis]
diff --git a/ssd_utils.py b/ssd_utils.py
index 0a9ffb1..4fc7a49 100644
--- a/ssd_utils.py
+++ b/ssd_utils.py
@@ -1,7 +1,8 @@
 """Some utils for SSD."""
 
 import numpy as np
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
 
 
 class BBoxUtility(object):
diff --git a/testing_utils/videotest.py b/testing_utils/videotest.py
index 0cbae3c..7b2ff4f 100644
--- a/testing_utils/videotest.py
+++ b/testing_utils/videotest.py
@@ -3,13 +3,14 @@
 import cv2
 import keras
 from keras.applications.imagenet_utils import preprocess_input
-from keras.backend.tensorflow_backend import set_session
+# from keras.backend.tensorflow_backend import set_session
 from keras.models import Model
 from keras.preprocessing import image 
 import pickle
 import numpy as np
 from random import shuffle
-from scipy.misc import imread, imresize
+# from scipy.misc import imread, imresize
+from imageio import imread
 from timeit import default_timer as timer
 
 import sys
@@ -84,8 +85,8 @@ class VideoTest(object):
             "trying to open a webcam, make sure you video_path is an integer!"))
         
         # Compute aspect ratio of video     
-        vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)
-        vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
+        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
+        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
         vidar = vidw/vidh
         
         # Skip frames until reaching start_frame
diff --git a/testing_utils/videotest_example.py b/testing_utils/videotest_example.py
index fb4d873..27ec148 100644
--- a/testing_utils/videotest_example.py
+++ b/testing_utils/videotest_example.py
@@ -1,3 +1,7 @@
+import os
+# GPU不使用を設定
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
 import keras
 import pickle
 from videotest import VideoTest
@@ -9,16 +13,38 @@ from ssd import SSD300 as SSD
 input_shape = (300,300,3)
 
 # Change this if you run with other classes than VOC
-class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"];
+class_names = [ 
+                "background", 
+                "aeroplane", 
+                "bicycle", 
+                "bird", 
+                "boat", 
+                "bottle", 
+                "bus", 
+                "car", 
+                "cat", 
+                "chair", 
+                "cow", 
+                "diningtable", 
+                "dog", 
+                "horse", 
+                "motorbike", 
+                "person", 
+                "pottedplant", 
+                "sheep", 
+                "sofa", 
+                "train", 
+                "tvmonitor"
+            ];
 NUM_CLASSES = len(class_names)
 
 model = SSD(input_shape, num_classes=NUM_CLASSES)
 
 # Change this path if you want to use your own trained weights
-model.load_weights('../weights_SSD300.hdf5') 
+model.load_weights('../../weights_SSD300.hdf5') 
         
 vid_test = VideoTest(class_names, model, input_shape)
 
 # To test on webcam 0, remove the parameter (or change it to another number
 # to test on that webcam)
-vid_test.run('path/to/your/video.mkv')
+vid_test.run('../../testvideo3.mp4')

ssd_keras_2.patch

keras修正対応

ssd_keras_2.patch

diff -ur '--exclude=.git' '--exclude=__pycache__' ssd_keras/ssd.py ssd_keras.tmp/ssd.py
--- ssd_keras/ssd.py	2021-05-21 07:13:41.880000000 +0900
+++ ssd_keras.tmp/ssd.py	2021-05-21 07:06:44.970000000 +0900
@@ -1,19 +1,17 @@
 """Keras implementation of SSD."""
 
-import keras.backend as K
-from keras.layers import Activation
-#from keras.layers import AtrousConvolution2D
-from keras.layers import Convolution2D
-from keras.layers import Dense
-from keras.layers import Flatten
-from keras.layers import GlobalAveragePooling2D
-from keras.layers import Input
-from keras.layers import MaxPooling2D
-#from keras.layers import merge
-from keras.layers.merge import concatenate
-from keras.layers import Reshape
-from keras.layers import ZeroPadding2D
-from keras.models import Model
+import tensorflow.keras.backend as K
+from tensorflow.keras.layers import Activation
+from tensorflow.keras.layers import Convolution2D
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.layers import Flatten
+from tensorflow.keras.layers import GlobalAveragePooling2D
+from tensorflow.keras.layers import Input
+from tensorflow.keras.layers import MaxPooling2D
+from tensorflow.keras.layers import concatenate
+from tensorflow.keras.layers import Reshape
+from tensorflow.keras.layers import ZeroPadding2D
+from tensorflow.keras.models import Model
 
 from ssd_layers import Normalize
 from ssd_layers import PriorBox
diff -ur '--exclude=.git' '--exclude=__pycache__' ssd_keras/ssd_layers.py ssd_keras.tmp/ssd_layers.py
--- ssd_keras/ssd_layers.py	2021-05-21 07:13:41.880000000 +0900
+++ ssd_keras.tmp/ssd_layers.py	2021-05-21 06:31:11.780000000 +0900
@@ -1,8 +1,8 @@
 """Some special pupropse layers for SSD."""
 
-import keras.backend as K
-from keras.engine.topology import InputSpec
-from keras.engine.topology import Layer
+import tensorflow.keras.backend as K
+from tensorflow.python.keras.engine.input_spec import InputSpec
+from tensorflow.python.keras.engine.base_layer import Layer
 import numpy as np
 import tensorflow as tf
 
diff -ur '--exclude=.git' '--exclude=__pycache__' ssd_keras/testing_utils/videotest.py ssd_keras.tmp/testing_utils/videotest.py
--- ssd_keras/testing_utils/videotest.py	2021-05-21 07:13:41.880000000 +0900
+++ ssd_keras.tmp/testing_utils/videotest.py	2021-05-21 07:08:24.680000000 +0900
@@ -1,15 +1,13 @@
 """ A class for testing a SSD model on a video file or webcam """
 
 import cv2
-import keras
-from keras.applications.imagenet_utils import preprocess_input
-# from keras.backend.tensorflow_backend import set_session
-from keras.models import Model
-from keras.preprocessing import image 
+import tensorflow.keras as keras
+from tensorflow.keras.applications.imagenet_utils import preprocess_input
+from tensorflow.keras.models import Model
+from tensorflow.keras.preprocessing import image 
 import pickle
 import numpy as np
 from random import shuffle
-# from scipy.misc import imread, imresize
 from imageio import imread
 from timeit import default_timer as timer
 
@@ -179,6 +177,7 @@
             cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
             
             cv2.imshow("SSD result", to_draw)
-            cv2.waitKey(10)
-            
-        
+            key = cv2.waitKey(1)
+            if key == 27:
+                # ESCキー
+                break
diff -ur '--exclude=.git' '--exclude=__pycache__' ssd_keras/testing_utils/videotest_example.py ssd_keras.tmp/testing_utils/videotest_example.py
--- ssd_keras/testing_utils/videotest_example.py	2021-05-21 07:13:41.880000000 +0900
+++ ssd_keras.tmp/testing_utils/videotest_example.py	2021-05-21 07:04:24.320000000 +0900
@@ -2,7 +2,7 @@
 # GPU不使用を設定
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
-import keras
+import tensorflow.keras as keras
 import pickle
 from videotest import VideoTest
 
@@ -47,4 +47,4 @@
 
 # To test on webcam 0, remove the parameter (or change it to another number
 # to test on that webcam)
-vid_test.run('../../testvideo3.mp4')
+vid_test.run('../../images/testvideo3.mp4')

学習済み重みファイルのダウンロード

参照先の記載にしたがって、重み学習済み重みファイルをダウンロードする。

wgetでは取得できなかったので、ブラウザで↓ここ から weights_SSD300.hdf5 をダウンロード。
https://mega.nz/folder/7RowVLCL#q3cEVRK9jyOSB9el3SssIA
で、ZIPファイルをカレントディレクトリに解凍しておく。

ソースのダウンロード&パッチをあてる

githubからソースをダウンロードする。
このままではエラーになるので、先ほど作成したパッチファイルssd_keras.patchでパッチをあてる。

git clone https://github.com/rykov8/ssd_keras.git
cd ssd_keras
patch -p1 < ../ssd_keras.patch 
# Keras修正対応パッチ
patch -p1 < ../ssd_keras_2.patch 

実行

実行する。

cd testing_utils/
python videotest_example.py

おー。
しかし遅い….
Kerasは遅いみたい。。。orz…

こんなんもある

「Object Detection API」で物体検出の自前データを学習する方法(TensorFlow 2.x版)