언어/python
python으로 pdf 파일 read
파아랑새
2019. 12. 8. 22:14
from pdflib import Document
import os
import base64
class PDFObj():
def __init__(self):
self._targetPath="./pdf_dir"
def dirSearch(self):
os.chdir(self._targetPath)
cur = os.listdir()
for f in cur:
fname, fext = os.path.splitext(f)
if fext == ".pdf":
doc = Document(f)
print(doc.metadata)
for c, p in enumerate(doc):
print("{} ========================".format(p))
strData = " ".join(p.lines).strip()
encodedBytes = base64.b64encode(strData.encode("utf-8"))
encodedStr = str(encodedBytes, "utf-8")
print(encodedStr)
print(strData)
if c == 3:
exit(1)
if __name__ == "__main__":
o = PDFObj()
o.dirSearch()
테스트 환경
=> ubuntu 18.4
=> interpreter 3.6