최초 작성일:23년 12월 3일
최종 작성일:
목표: 도면에서 특정 위치의 이미지 추출하기
pip install pdf2image
import io
import pytesseract
from pdf2image import convert_from_path
def extract_text_from_pdf('/Users/hongminji/Desktop/pdf/sample.pdf'):
pages = convert_from_path('/Users/hongminji/Desktop/pdf/sample.pdf',500)
pip install opencv-python
import numpy as np
from pytesseract import Output
import pytesseract
from PIL import Image
import cv2
filepath='/Users/hongminji/Desktop/pdf/sample/drawing1.tiff'
img1=np.array(Image.open(filepath))
text=pytesseract.image_to_string(img1)
print(text)
norm_img = np.zeros((img1.shape[0],img1.shape[1]))
img = cv2.normalize(img1,norm_img,0,255,cv2.NORM_MINMAX)
img =cv2.threshold(img,100,255,cv2.THRESH_BINARY)[1]
img=cv2.GaussianBlur(img,(1,1),0)
text=pytesseract.image_to_string(img)
print(text)
filepath='/Users/hongminji/Desktop/pdf/sample/drawing1.tiff'
image=cv2.imread(filepath)
results=pytesseract.image_to_data(image,output_type=Output.DICT)
results
for i in range(0, len(results["text"])):
x=results["left"][i]
y=results["top"][i]
w=results["width"][i]
h=results["height"][i]
text=results["text"][i]
conf=int(results["conf"][i])
if conf>70:
text="".join([c if ord(c) <128 else "" for c in text]).strip()
cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,200),2)
cv2.imshow('custome window name',image)
cv2.waitKey(0)
'데이터 > Python' 카테고리의 다른 글
[실기] 파이썬 (0) | 2024.09.18 |
---|---|
[Challenge01.]OCR 처리한 결과 값을 화면에 Display하기 (1) | 2024.01.02 |