20类物体分类¶

本节例程的位置在 百度云盘资料\野火K210 AI视觉相机\1-教程文档_例程源码\例程\10-KPU\voc20_object_detect\voc20_object_detect.py

介绍¶

实现PASCAL-VOC数据集的20类目标检测。20 个物体类别：aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor,下图为实机演示

例程¶

import sensor, image, time, lcd
from maix import KPU
import gc

lcd.init()
sensor.reset(dual_buff=True)                      # Reset and initialize the sensor. It will
                                    # run automatically, call sensor.run(0) to stop
sensor.set_pixformat(sensor.RGB565) # Set pixel format to RGB565 (or GRAYSCALE)
sensor.set_framesize(sensor.QVGA)   # Set frame size to QVGA (320x240)
#sensor.set_vflip(1)
sensor.skip_frames(time = 1000)     # Wait for settings take effect.
clock = time.clock()                # Create a clock object to track the FPS.

od_img = image.Image(size=(320,256))

obj_name = ("aeroplane","bicycle", "bird","boat","bottle","bus","car","cat","chair","cow","diningtable", "dog","horse", "motorbike","person","pottedplant", "sheep","sofa", "train", "tvmonitor")
anchor = (1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071)
kpu = KPU()
print("ready load model")
#kpu.load_kmodel(0x300000, 1536936)
kpu.load_kmodel("/sd/KPU/voc20_object_detect/voc20_detect.kmodel")
kpu.init_yolo2(anchor, anchor_num=5, img_w=320, img_h=240, net_w=320 , net_h=256 ,layer_w=10 ,layer_h=8, threshold=0.5, nms_value=0.2, classes=20)

i = 0
while True:
    i += 1
    print("cnt :", i)
    clock.tick()                    # Update the FPS clock.
    img = sensor.snapshot()
    a = od_img.draw_image(img, 0,0)
    od_img.pix_to_ai()
    kpu.run_with_output(od_img)
    dect = kpu.regionlayer_yolo2()
    fps = clock.fps()
    if len(dect) > 0:
        print("dect:",dect)
        for l in dect :
            a = img.draw_rectangle(l[0],l[1],l[2],l[3], color=(0, 255, 0))
            a = img.draw_string(l[0],l[1], obj_name[l[4]], color=(0, 255, 0), scale=1.5)

    a = img.draw_string(0, 0, "%2.1ffps" %(fps), color=(0, 60, 128), scale=1.0)
    lcd.display(img)
    gc.collect()


kpu.deinit()

例程解析¶

import sensor, image, time, lcd
from maix import KPU
import gc

这些库提供了控制摄像头、图像处理、时间管理、LCD显示和内存管理等功能。

lcd.init()
sensor.reset(dual_buff=True)                      # Reset and initialize the sensor. It will
                                    # run automatically, call sensor.run(0) to stop
sensor.set_pixformat(sensor.RGB565) # Set pixel format to RGB565 (or GRAYSCALE)
sensor.set_framesize(sensor.QVGA)   # Set frame size to QVGA (320x240)
#sensor.set_vflip(1)
sensor.skip_frames(time = 1000)     # Wait for settings take effect.

初始化LCD显示，重置摄像头并开启双缓冲，设置图像格式为RGB565，设置图像大小为QVGA（320x240像素），跳过一些帧以确保设置生效。

clock = time.clock()                # Create a clock object to track the FPS.

定义一个时钟对象clock来跟踪帧率（FPS）。

od_img = image.Image(size=(320,256))

obj_name = ("aeroplane","bicycle", "bird","boat","bottle","bus","car","cat","chair","cow","diningtable", "dog","horse", "motorbike","person","pottedplant", "sheep","sofa", "train", "tvmonitor")
anchor = (1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071)

创建一个用于对象检测的图像对象od_img，并实例化KPU（神经网络处理器）对象。

kpu = KPU()
print("ready load model")
#kpu.load_kmodel(0x300000, 1536936)
kpu.load_kmodel("/sd/KPU/voc20_object_detect/voc20_detect.kmodel")
kpu.init_yolo2(anchor, anchor_num=5, img_w=320, img_h=240, net_w=320 , net_h=256 ,layer_w=10 ,layer_h=8, threshold=0.5, nms_value=0.2, classes=20)

从SD卡加载预训练的Keras模型用于对象检测，然后初始化YOLOv2神经网络，设置锚点、图像尺寸、网络尺寸、层尺寸、置信度阈值、非极大值抑制阈值和类别数量。

i = 0
while True:
    i += 1
    print("cnt :", i)
    clock.tick()                    # Update the FPS clock.
    img = sensor.snapshot()
    a = od_img.draw_image(img, 0,0)
    od_img.pix_to_ai()
    kpu.run_with_output(od_img)
    dect = kpu.regionlayer_yolo2()
    fps = clock.fps()
    if len(dect) > 0:
        print("dect:",dect)
        for l in dect :
            a = img.draw_rectangle(l[0],l[1],l[2],l[3], color=(0, 255, 0))
            a = img.draw_string(l[0],l[1], obj_name[l[4]], color=(0, 255, 0), scale=1.5)

    a = img.draw_string(0, 0, "%2.1ffps" %(fps), color=(0, 60, 128), scale=1.0)
    lcd.display(img)
    gc.collect()

捕获一帧图像，将其绘制到od_img上，然后转换为神经网络可以处理的格式，运行神经网络，并获取检测结果。
绘制检测结果和FPS：
如果检测到对象，则在图像上绘制边框和对象名称，并显示当前的FPS（每秒帧数）。最后，显示图像并运行垃圾回收以释放内存。

kpu.deinit()

程序结束时，去初始化KPU以释放资源。