当前位置: 首页>前端>正文

如何将图片和锚框一起resize——python代码实现

我们在做深度学习的目标检测项目时,经常碰到要将图像进行resize,统一其图片的宽高,并且如果直接使用函数将图像resize,就会使得图片变形,我们要完成等比缩放。如果我们进行的是目标检测算法的研究,那么图像缩放的同时还要将锚框一起缩放。

那么这里直接就给出代码了:

import glob
import xml.etree.ElementTree as ET
import numpy as np
import os 
import PIL.Image as Image
import PIL.ImageDraw as Draw
import tqdm

xml_path = r"./dataset/car-identify/car-main/dataset/dataset/annotation/"
img_path = r"./dataset/car-identify/car-main/dataset/dataset/images/"
txt_path = r"./dataset/car-identify/car-main/dataset/dataset/Imagesets/img_label.txt"
img_dst_path = r"./dataset/car-identify/car-main/dataset/dataset/anno_img/"
label = {
    "truck":1,
    "bus":2,
    "SUV":3,
    "taxi":4,
    "car":5
}

def read_XML():
    lines = []
    try:
        file_list = os.listdir(xml_path)
        for _xml in file_list:
            line = []
            xml_file = xml_path + _xml
            with open(xml_file) as f:
                tree = ET.parse(f)
                height = int(tree.findtext('./size/height'))
                width = int(tree.findtext('./size/width'))
                if height <= 0 or width <= 0:
                    continue

                file_name = tree.findtext('./filename')
                line.append(file_name)

                # 对于每一个目标都获得它的宽高
                for obj in tree.iter('object'):
                    cat = label[str(obj.findtext('name'))]
                    xmin = int(float(obj.findtext('bndbox/xmin')))
                    ymin = int(float(obj.findtext('bndbox/ymin')))
                    xmax = int(float(obj.findtext('bndbox/xmax')))
                    ymax = int(float(obj.findtext('bndbox/ymax')))

                    line.extend([cat, xmin, ymin, xmax, ymax])
            f.close()
            lines.append(line)
    except Exception as e:
        # print("XML FILE OPEN ERROR!")
        print(e)
    return lines

"""
将图像resize, 并且将他的box也resize
"""
def img_resize(xml_data, size):
    # 读取图像
    f = open(txt_path, "w")
    for line in tqdm.tqdm(xml_data):
        bg_img = Image.new('RGB',size,(0,0,0))
        # 先将图片进行缩放
        path = img_path + line[0]
        img = Image.open(path)
        iw,ih = img.size
        max_side = max(iw,ih)
        scale = max_side / max(size)
        img = img.resize((int(iw/scale), int(ih/scale)))
        if iw > ih:
            dy = int((size[0] - ih/scale)/2)
            dx = 0
            bg_img.paste(img,(0,dy))
        else:
            dx = int((size[0] - iw/scale)/2)
            dy = 0
            bg_img.paste(img,(dx,0))

        # 这里是变化后的
        strs = f"{line[0]} "
        # 将框也对应缩放,读取每一个框
        for i in range(1,len(line[1:]),5):
            box = line[i:i+5] # [1,6)、[6,11)
            box = np.array(box[1:],dtype=np.float32)
            box = box / scale
            box[0] = box[0]+dx
            box[1] = box[1]+dy
            box[2] = box[2]+dx
            box[3] = box[3]+dy

            w = int(box[2] - box[0])
            h = int(box[3] - box[1])
            cx = int(box[0] + w/2)
            cy = int(box[2] + h/2)

            strs += f"{line[i]} {cx} {cy} {w} {h}"
            # draw = Draw.Draw(bg_img)
            # draw.rectangle((box[0], box[1], box[2], box[3]),width=2,outline=(0,0,255))
            # bg_img.show()
        
        img_save_path = img_dst_path + line[0]
        bg_img.save(img_save_path)
        f.write(strs + "\n")

if __name__ == "__main__":
    xml_data = read_XML()
    img_resize(xml_data,(416,416))

效果如图所示:


如何将图片和锚框一起resize——python代码实现,第1张
如何将图片和锚框一起resize——python代码实现,第2张

https://www.xamrdz.com/web/2yd1886240.html

相关文章: