我们在做深度学习的目标检测项目时,经常碰到要将图像进行resize,统一其图片的宽高,并且如果直接使用函数将图像resize,就会使得图片变形,我们要完成等比缩放。如果我们进行的是目标检测算法的研究,那么图像缩放的同时还要将锚框一起缩放。
那么这里直接就给出代码了:
import glob
import xml.etree.ElementTree as ET
import numpy as np
import os
import PIL.Image as Image
import PIL.ImageDraw as Draw
import tqdm
xml_path = r"./dataset/car-identify/car-main/dataset/dataset/annotation/"
img_path = r"./dataset/car-identify/car-main/dataset/dataset/images/"
txt_path = r"./dataset/car-identify/car-main/dataset/dataset/Imagesets/img_label.txt"
img_dst_path = r"./dataset/car-identify/car-main/dataset/dataset/anno_img/"
label = {
"truck":1,
"bus":2,
"SUV":3,
"taxi":4,
"car":5
}
def read_XML():
lines = []
try:
file_list = os.listdir(xml_path)
for _xml in file_list:
line = []
xml_file = xml_path + _xml
with open(xml_file) as f:
tree = ET.parse(f)
height = int(tree.findtext('./size/height'))
width = int(tree.findtext('./size/width'))
if height <= 0 or width <= 0:
continue
file_name = tree.findtext('./filename')
line.append(file_name)
# 对于每一个目标都获得它的宽高
for obj in tree.iter('object'):
cat = label[str(obj.findtext('name'))]
xmin = int(float(obj.findtext('bndbox/xmin')))
ymin = int(float(obj.findtext('bndbox/ymin')))
xmax = int(float(obj.findtext('bndbox/xmax')))
ymax = int(float(obj.findtext('bndbox/ymax')))
line.extend([cat, xmin, ymin, xmax, ymax])
f.close()
lines.append(line)
except Exception as e:
# print("XML FILE OPEN ERROR!")
print(e)
return lines
"""
将图像resize, 并且将他的box也resize
"""
def img_resize(xml_data, size):
# 读取图像
f = open(txt_path, "w")
for line in tqdm.tqdm(xml_data):
bg_img = Image.new('RGB',size,(0,0,0))
# 先将图片进行缩放
path = img_path + line[0]
img = Image.open(path)
iw,ih = img.size
max_side = max(iw,ih)
scale = max_side / max(size)
img = img.resize((int(iw/scale), int(ih/scale)))
if iw > ih:
dy = int((size[0] - ih/scale)/2)
dx = 0
bg_img.paste(img,(0,dy))
else:
dx = int((size[0] - iw/scale)/2)
dy = 0
bg_img.paste(img,(dx,0))
# 这里是变化后的
strs = f"{line[0]} "
# 将框也对应缩放,读取每一个框
for i in range(1,len(line[1:]),5):
box = line[i:i+5] # [1,6)、[6,11)
box = np.array(box[1:],dtype=np.float32)
box = box / scale
box[0] = box[0]+dx
box[1] = box[1]+dy
box[2] = box[2]+dx
box[3] = box[3]+dy
w = int(box[2] - box[0])
h = int(box[3] - box[1])
cx = int(box[0] + w/2)
cy = int(box[2] + h/2)
strs += f"{line[i]} {cx} {cy} {w} {h}"
# draw = Draw.Draw(bg_img)
# draw.rectangle((box[0], box[1], box[2], box[3]),width=2,outline=(0,0,255))
# bg_img.show()
img_save_path = img_dst_path + line[0]
bg_img.save(img_save_path)
f.write(strs + "\n")
if __name__ == "__main__":
xml_data = read_XML()
img_resize(xml_data,(416,416))
效果如图所示: