from pdflib import Document
import os
import base64
from ela_dir.Ela import Ela
class PDFObj():
def __init__(self):
#Ela.__init__(self)
self._targetPath="./pdf_dir"
def dirSearch(self):
os.chdir(self._targetPath)
cur = os.listdir()
for f in cur:
fname, fext = os.path.splitext(f)
if fext == ".pdf":
doc = Document(f)
print(doc.metadata)
for c, p in enumerate(doc):
strData = " ".join(p.lines).strip()
#encodedBytes = base64.b64encode(strData.encode("utf-8"))
#encodedStr = str(encodedBytes, "utf-8")
#e = {"page_" : c+1,
# "data_" : encodedStr}
e = {"page_" : c+1, "data_": strData}
print(e)
if __name__ == "__main__":
o = PDFObj()
o.dirSearch()