1.实验要求: 现有不同种类的鸟,每一类有5张图,要求是对每一幅图中的鸟进行准确定位,即用bounding box将图中的鸟框出来。
2.实验方案: 首先尝试采用图像处理技术实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 import cv2import numpy as npimport osdef preprocess_image (image_path ): image = cv2.imread(image_path) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) clahe = cv2.createCLAHE(clipLimit=2.0 , tileGridSize=(8 , 8 )) enhanced = clahe.apply(gray) blurred = cv2.GaussianBlur(enhanced, (5 , 5 ), 0 ) return blurred def detect_edges (image ): thresh = cv2.adaptiveThreshold(image, 255 , cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11 , 2 ) edges = cv2.Canny(thresh, 50 , 150 ) return edges def find_bounding_box (image, edges ): contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: x, y, w, h = cv2.boundingRect(contour) area = cv2.contourArea(contour) aspect_ratio = w / float (h) if area > 500 and 0.5 < aspect_ratio < 2 : cv2.rectangle(image, (x, y), (x + w, y + h), (0 , 255 , 0 ), 2 ) return image def save_image (image, output_path ): cv2.imwrite(output_path, image) def main (input_dir, output_dir ): if not os.path.exists(output_dir): os.makedirs(output_dir) image_paths = [os.path.join(input_dir, img) for img in os.listdir(input_dir) if img.endswith(('.png' , '.jpg' , '.jpeg' ))] for image_path in image_paths: preprocessed_image = preprocess_image(image_path) edges = detect_edges(preprocessed_image) original_image = cv2.imread(image_path) result_image = find_bounding_box(original_image, edges) output_path = os.path.join(output_dir, os.path.basename(image_path)) save_image(result_image, output_path) if __name__ == "__main__" : input_dir = "images" output_dir = "output" main(input_dir, output_dir)
1 2 pip install opencv-python numpy torch torchvision
1 2 3 4 git clone https://github.com/ultralytics/yolov5 cd yolov5 pip install -r requirements.txt
1 2 3 4 5 import torchimport cv2import osimport matplotlib.pyplot as plt
1 2 model = torch.hub.load('ultralytics/yolov5' , 'yolov5s' , pretrained=True , trust_repo=True )
1 2 3 4 5 6 7 8 def load_images_from_folder (folder ): images = [] for filename in os.listdir(folder): img = cv2.imread(os.path.join(folder, filename)) if img is not None : images.append((filename, img)) return images
1 2 3 4 def detect_birds (image ): results = model(image) return results
1 2 3 4 def is_valid_detection (label, confidence, threshold=0.2 ): valid_labels = [14 , 18 , 19 , 0 ] return label in valid_labels and confidence >= threshold
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 def draw_bounding_boxes (image, results, confidence_threshold=0.2 ): labels, cords = results.xyxyn[0 ][:, -1 ], results.xyxyn[0 ][:, :-1 ] x_shape, y_shape = image.shape[1 ], image.shape[0 ] for i in range (len (labels)): row = cords[i] label = int (labels[i]) confidence = row[4 ] print (f"Label: {label} , Confidence: {confidence} " ) if is_valid_detection(label, confidence, confidence_threshold): x1, y1, x2, y2 = int (row[0 ] * x_shape), int (row[1 ] * y_shape), int (row[2 ] * x_shape), int (row[3 ] * y_shape) bgr = (0 , 255 , 0 ) cv2.rectangle(image, (x1, y1), (x2, y2), bgr, 2 ) return image
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 def main (): input_folder = 'images' output_folder = 'output' images = load_images_from_folder(input_folder) for filename, image in images: results = detect_birds(image) image_with_boxes = draw_bounding_boxes(image, results) save_image(image_with_boxes, filename, output_folder) plt.imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)) plt.title(filename) plt.show() if __name__ == "__main__" : main()
YOLO(You Only Look Once)是一种实时对象检测系统,能够在单次前向传递中完成对象定位和分类任务。 YOLOv5 的模块 数据加载模块:负责加载和预处理数据,包括图像和标签文件。 模型定义模块:定义YOLOv5的模型结构,包括各层的定义和连接方式。 训练模块:负责模型的训练过程,包括前向传播、损失计算、反向传播和参数更新。 推理模块:负责使用训练好的模型进行预测,并输出检测结果。 后处理模块:负责对模型的输出进行处理,包括非极大值抑制(NMS)等,以得到最终的检测结果。
3.实验心得 在本次实验中,我通过图像处理技术和深度学习模型相结合的方法,成功实现了对鸟类图像的准确定位。实验初期尝试采用了传统的图像处理技术,包括灰度转换、CLAHE增强对比度、高斯模糊、Canny边缘检测等方法来实现目标检测,但在复杂背景下识别效果不佳。为了提高检测的准确性和鲁棒性,引入了预训练的YOLOv5模型,显著提升了鸟类定位的效果。最终实验结果表明,YOLOv5模型能够在多种复杂背景下准确定位鸟类,为实时对象检测提供了可靠的解决方案。