|
利用PyTorch训练模型识别数字+英文图片验证码
摘要:使用深度学习框架PyTorch来训练模型去识别4-6位数字+字母混合图片验证码(我们可以使用第三方库captcha生成这种图片验证码或者自己收集目标网站的图片验证码进行针对训练)。
一、制作训练数据集
我们可以把需要生成图片的一些参数放在setting.py文件中,方便以后更改- # setting.py
- SEED = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" # 字符池
- CODE_TYPE = "1004" # 1004:4位数字+字母,1005:5位数字+字母,1006:6位数字+字母
- CHAR_NUMBER = 4 # 字符数量,根据自己的需求更改
- IMG_WIDTH = 160 # 图片宽度
- IMG_HEIGHT = 60 # 图片高度
- BATCH_SIZE = 60 # 每个训练批次的数据样本数
复制代码 生成图片验证码的代码编写如下- # generate.py
- from captcha.image import ImageCaptcha
- import concurrent.futures
- from pathlib import Path
- import shutil
- import random
- from setting import IMG_WIDTH, IMG_HEIGHT, SEED, CHAR_NUMBER, CODE_TYPE
- def generate_captcha(num, output_dir, thread_name=0):
- """
- 生成一定数量的验证码图片
- :param num: 生成数量
- :param output_dir: 存放验证码图片的文件夹路径
- :param thread_name: 线程名称
- :return: None
- """
- # 如果目录已存在,则先删除后再创建
- if Path(output_dir).exists():
- shutil.rmtree(output_dir)
- Path(output_dir).mkdir()
- for i in range(num):
- img = ImageCaptcha(width=IMG_WIDTH, height=IMG_HEIGHT)
- chars = "".join([random.choice(SEED) for _ in range(CHAR_NUMBER)])
- save_path = f"{output_dir}/{i + 1}-{chars}.png"
- img.write(chars, save_path)
- print(f"Thread {thread_name}: 已生成{i + 1}张验证码")
- print(f"Thread {thread_name}: 验证码图片生成完毕")
- def main():
- with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
- executor.submit(generate_captcha, 50000, f"./train_{CODE_TYPE}", 0)
- executor.submit(generate_captcha, 1000, f"./test_{CODE_TYPE}", 1)
- if __name__ == '__main__':
- main()
复制代码 我们生成了50000张验证码图片用作训练集保存在train_1004文件夹下,1000张图片用作测试集保存在test_1004文件夹下,开启线程数为30(可根据情况更改)
二、用DataLoader加载自定义的Dataset
自定义一个Dataset类,将train_1004文件夹中的图片加载进来并作一定的处理,代码编写如下- # loader.py
- from torch.utils.data import Dataset, DataLoader
- from torchvision import transforms
- from PIL import Image
- import torch
- import os
- from setting import CODE_TYPE, BATCH_SIZE, SEED, CHAR_NUMBER
- class ImageDataSet(Dataset):
- def __init__(self, dir_path):
- super(ImageDataSet, self).__init__()
- self.img_path_list = [f"{dir_path}/{filename}" for filename in os.listdir(dir_path)]
- self.trans = transforms.Compose([
- transforms.ToTensor(),
- transforms.Grayscale() # 每张图片都会被这行代码灰度化
- ])
- def __getitem__(self, idx):
- image = self.trans(Image.open(self.img_path_list[idx]))
- label = self.img_path_list[idx].split("-")[-1].replace(".png", "")
- label = one_hot_encode(label)
- return image, label
- def __len__(self):
- return len(self.img_path_list)
- # 用torch.zeros()函数生成一个4行36列,值全是0的张量。接着循环标签中的各个字符,将字符在SEED中对应的索引获取到,然后将张量中对应位置的0,改成1。最后我们要返回一个一维的列表,长度是4*36=144
- def one_hot_encode(label):
- """将字符转为独热码"""
- cols = len(SEED)
- rows = CHAR_NUMBER
- result = torch.zeros((rows, cols), dtype=float)
- for i, char in enumerate(label):
- j = SEED.index(char)
- result[i, j] = 1.0
- return result.view(1, -1)[0]
- # 将模型预测的值从一维转成4行36列的二维张量,然后调用torch.argmax()函数寻找每一行最大值(也就是1)的索引。知道索引后就可以从SEED中找到对应的字符
- def one_hot_decode(pred_result):
- """将独热码转为字符"""
- pred_result = pred_result.view(-1, len(SEED))
- index_list = torch.argmax(pred_result, dim=1)
- text = "".join([SEED[i] for i in index_list])
- return text
- def get_loader(path):
- """加载数据"""
- dataset = ImageDataSet(path)
- dataloader = DataLoader(dataset, BATCH_SIZE, shuffle=True)
- return dataloader
- if __name__ == '__main__':
- train_dataloader = get_loader(f"./train_{CODE_TYPE}")
- test_dataloader = get_loader(f"./test_{CODE_TYPE}")
- for X, y in train_dataloader:
- print(X.shape)
- print(y.shape)
- break
复制代码 三、训练模型
编写一个CNN神经网络模型,然后开始训练,损失函数使用的是MultiLabelSoftMarginLoss,优化器用的是Adam,训练周期为30(可按需更改),代码编写如下- # train.py
- import torch
- from torch import nn
- from loader import get_loader
- from setting import CODE_TYPE, CHAR_NUMBER, SEED
- device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
- class NeuralNetWork(nn.Module):
- def __init__(self):
- super(NeuralNetWork, self).__init__()
- self.layer1 = nn.Sequential(
- nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1),
- nn.ReLU(),
- nn.MaxPool2d(kernel_size=2)
- )
- self.layer2 = nn.Sequential(
- nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
- nn.ReLU(),
- nn.MaxPool2d(kernel_size=2)
- )
- self.layer3 = nn.Sequential(
- nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
- nn.ReLU(),
- nn.MaxPool2d(kernel_size=2)
- )
- self.layer4 = nn.Sequential(
- nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
- nn.ReLU(),
- nn.MaxPool2d(kernel_size=2)
- )
- self.layer5 = nn.Sequential(
- nn.Flatten(),
- nn.Linear(in_features=15360, out_features=4096),
- nn.Dropout(0.5),
- nn.ReLU(),
- nn.Linear(in_features=4096, out_features=CHAR_NUMBER * len(SEED))
- )
- def forward(self, x):
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.layer5(x)
- return x
- def train(dataloader, model, loss_fn, optimizer):
- model.train()
- for batch, (X, y) in enumerate(dataloader):
- X, y = X.to(device), y.to(device)
- pred = model(X)
- loss = loss_fn(pred, y)
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- if batch % 100 == 0:
- print(f"损失值: {loss:>7f}")
- def main():
- model = NeuralNetWork().to(device)
- loss_fn = nn.MultiLabelSoftMarginLoss()
- optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
- train_dataloader = get_loader(f"./train_{CODE_TYPE}")
- epoch = 30
- for t in range(epoch):
- print(f"训练周期 {t + 1}\n-------------------------------")
- train(train_dataloader, model, loss_fn, optimizer)
- print("\n")
- torch.save(model.state_dict(), f"./model_{CODE_TYPE}.pth")
- print("训练完成,模型已保存")
- if __name__ == '__main__':
- main()
复制代码 四、识别验证码
最后一步就是验证模型的准确度了,代码编写如下- # main.py
- import os
- import torch
- from PIL import Image
- from train import NeuralNetWork
- from loader import one_hot_decode
- from torchvision import transforms
- from setting import CODE_TYPE
- device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
- def predict(model, file_path):
- trans = transforms.Compose([
- transforms.ToTensor(),
- transforms.Grayscale()
- ])
- with torch.no_grad():
- X = trans(Image.open(file_path)).reshape(1, 1, 60, 160)
- pred = model(X)
- text = one_hot_decode(pred)
- return text
- def main():
- model = NeuralNetWork().to(device)
- model.load_state_dict(torch.load(f"./model_{CODE_TYPE}.pth", map_location=torch.device("cpu")))
- model.eval()
- correct = 0
- test_dir = f"./test_{CODE_TYPE}"
- total = len(os.listdir(test_dir))
- for filename in os.listdir(test_dir):
- file_path = f"{test_dir}/{filename}"
- real_captcha = file_path.split("-")[-1].replace(".png", "")
- pred_captcha = predict(model, file_path)
- if pred_captcha == real_captcha:
- correct += 1
- print(f"{file_path}的预测结果为{pred_captcha},预测正确")
- else:
- print(f"{file_path}的预测结果为{pred_captcha},预测错误")
- accuracy = f"{correct / total * 100:.2f}%"
- print(accuracy)
- if __name__ == '__main__':
- main()
复制代码 经测试,4位图片验证码准确度为92.80%左右五、总结
这里我们只是对图像进行了灰度处理,并没有进一步的优化图像。如果要进一步提高识别精度,还可以增加训练数据集和训练周期,或者优化下神经网络各层结构。
本文只是实战操作,如果有同样需求的朋友可以更快上手,PyTorch里面还很多值得学习的东西,感兴趣的可以自己去研究模型的推理,以及源码。如果想要代码文件请在评论区留言。
来源:https://www.cnblogs.com/gggod/p/18136268
免责声明:由于采集信息均来自互联网,如果侵犯了您的权益,请联系我们【E-Mail:cb@itdo.tech】 我们会及时删除侵权内容,谢谢合作! |
|