defimage_pre_process(self): # 去除杂色点 for x in range(self.image.width): for y in range(self.image.height): pix = self.image.getpixel((x, y)) if pix == 43: self.image.putpixel((x, y), WHITE) else: self.image.putpixel((x, y), BLACK)
# 去除单像素噪点并进行二值化(八值法) for x in range(self.image.width): for y in range(self.image.height): count = 0 if x != 0and y != 0and x != self.image.width - 1and y != self.image.height - 1: for i in range(-1, 2): for j in range(-1, 2): tx = x + i ty = y + j if self.image.getpixel((tx, ty)) == BLACK: count += 1 if self.image.getpixel((x, y)) == WHITE and count == 8: # 如果一个白色区域的附近八连通区域都是黑色,那么该点也认为是黑色 self.image.putpixel((x, y), BLACK) self.image = self.image.convert('1')
defspilt2chars(): """ 分割已有的数据为字符并保存 """ try: shutil.rmtree('captcha_chars') except: pass os.mkdir("captcha_chars") values = "abcdefghijklmnopqrstuvwxyz1234567890" for value in values: os.mkdir('captcha_chars/{}'.format(value))
file_names = os.listdir('captchas') for file_name in file_names: # ifnot os.path.isdir(file_name) and file_name != '.DS_Store': values = file_name[:4] im = Image.open('captchas/{}'.format(file_name)) captcha = ZhengfangCaptcha(im) # 用的是去噪、二值化的图片 for im_part, value in zip(captcha.handle_split_image(), values): m = hashlib.md5() m.update("{}{}".format(time.time(), value).encode('utf8')) im_part.save("captcha_chars/{}/{}.png".format(value, m.hexdigest()))
保存模型数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
if __name__ == "__main__": # spilt2chars() letters = list('qwertyuiopasdfghjklzcxvbnm1234567890') # 将图像数据转为向量数据并保存 imageset = [] for letter in letters: try: for img in os.listdir('captcha_chars/{}/'.format(letter)): if img != "Thumbs.db"and img != ".DS_Store": vector = ZhengfangCaptcha.buildvector(Image.open("captcha_chars/{}/{}".format(letter, img))) imageset.append({letter: vector}) except FileNotFoundError as e: pass
with open('image_data.json', 'w') as f: json.dump(imageset, f)
defdistance_hanmming(vector1, vector2): """ 计算两向量的汉明距,(向量只包含0,1时) """ count = 0 for value1, value2 in zip(vector1, vector2): if value1 != value2: count += 1 return count
defbuild_vector(image, binary=True): """ 图像转一维特征向量 :param image: pillow Image object with mode 1 or mode L :param binary: 黑白图是否生成为0,1向量 :return: list of int """ vector = [] for pixel in image.getdata(): if binary: vector.append(1if pixel == 255else0) else: vector.append(pixel) return vector
defrotate_img(image): """ 根据图像在x轴方向投影大小确定字符的摆放方向 :param image: PIL.Image object :return: rotated Image object """ min_count = 1000 final_angle = 0 for angle in range(-45, 45): x_count = 0 ti = image.rotate(angle, expand=True) for x in range(ti.width): for y in range(ti.height): if ti.getpixel((x, y)) == WHITE: x_count += 1 break if x_count < min_count: min_count = x_count final_angle = angle image = image.rotate(final_angle, expand=False) return image