| 1 |
force_ocr = False # OCR을 강제할지 여부 플래그
|
| 2 |
pdf_page_count = 0
|
| 3 |
|
| 4 |
# 3-1. PDF 텍스트 추출 시도 (PyMuPDF)
|
| 5 |
try:
|
| 6 |
print("Attempting text extraction from PDF...")
|
| 7 |
with fitz.open(stream=contents, filetype="pdf") as doc:
|
| 8 |
pdf_page_count = len(doc)
|
| 9 |
if doc.needs_pass:
|
| 10 |
print("PDF is password protected. Text extraction skipped, proceeding to OCR.") |