1234567891011121314151617181920212223242526272829 |
- import logging
- import pytesseract
- from PIL import Image
- # 配置日志记录
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
- # 如果 Tesseract-OCR 不在 PATH 中,需要指定其路径
- pytesseract.pytesseract.tesseract_cmd = r'G:\资料\tools\tesseract-ocr-master\Tesseract-OCR\tesseract.exe'
- # 打开图片文件
- image = Image.open('G:\\1111.jpg')
- try:
- # 使用 pytesseract 进行 OCR 识别,并指定编码
- text = pytesseract.image_to_string(image, lang='chi_sim') # 如果图片是中文,可以指定语言
- logging.info("OCR识别成功")
- except Exception as e:
- logging.error(f"OCR识别失败: {e}")
- text = ""
- # 打印识别的文字
- try:
- print(text.encode('utf-8', errors='ignore').decode('utf-8'))
- except UnicodeDecodeError as e:
- print(f"UnicodeDecodeError: {e}")
- print("尝试使用 'replace' 处理编码错误")
- print(text.encode('utf-8', errors='replace').decode('utf-8'))
|