|
@@ -34,8 +34,6 @@ nltk = importer.new("nltk") |
|
|
converter = importer.new("pdfminer.converter") |
|
|
converter = importer.new("pdfminer.converter") |
|
|
pdfinterp = importer.new("pdfminer.pdfinterp") |
|
|
pdfinterp = importer.new("pdfminer.pdfinterp") |
|
|
pdfpage = importer.new("pdfminer.pdfpage") |
|
|
pdfpage = importer.new("pdfminer.pdfpage") |
|
|
pdftypes = importer.new("pdfminer.pdftypes") |
|
|
|
|
|
psparser = importer.new("pdfminer.psparser") |
|
|
|
|
|
|
|
|
|
|
|
__all__ = ["ArticleTextParser", "get_parser"] |
|
|
__all__ = ["ArticleTextParser", "get_parser"] |
|
|
|
|
|
|
|
@@ -294,7 +292,7 @@ class _PDFParser(_BaseTextParser): |
|
|
pages = pdfpage.PDFPage.get_pages(StringIO(self.text)) |
|
|
pages = pdfpage.PDFPage.get_pages(StringIO(self.text)) |
|
|
for page in pages: |
|
|
for page in pages: |
|
|
interp.process_page(page) |
|
|
interp.process_page(page) |
|
|
except (pdftypes.PDFException, psparser.PSException, AssertionError): |
|
|
|
|
|
|
|
|
except Exception: # pylint: disable=broad-except |
|
|
return output.getvalue().decode("utf8") |
|
|
return output.getvalue().decode("utf8") |
|
|
finally: |
|
|
finally: |
|
|
conv.close() |
|
|
conv.close() |
|
|