Browse Source

copyvios: Catch PDF parser exceptions more aggressively.

tags/v0.3
Ben Kurtovic 5 years ago
parent
commit
42a224f365
1 changed files with 1 additions and 3 deletions
  1. +1
    -3
      earwigbot/wiki/copyvios/parsers.py

+ 1
- 3
earwigbot/wiki/copyvios/parsers.py View File

@@ -34,8 +34,6 @@ nltk = importer.new("nltk")
converter = importer.new("pdfminer.converter") converter = importer.new("pdfminer.converter")
pdfinterp = importer.new("pdfminer.pdfinterp") pdfinterp = importer.new("pdfminer.pdfinterp")
pdfpage = importer.new("pdfminer.pdfpage") pdfpage = importer.new("pdfminer.pdfpage")
pdftypes = importer.new("pdfminer.pdftypes")
psparser = importer.new("pdfminer.psparser")


__all__ = ["ArticleTextParser", "get_parser"] __all__ = ["ArticleTextParser", "get_parser"]


@@ -294,7 +292,7 @@ class _PDFParser(_BaseTextParser):
pages = pdfpage.PDFPage.get_pages(StringIO(self.text)) pages = pdfpage.PDFPage.get_pages(StringIO(self.text))
for page in pages: for page in pages:
interp.process_page(page) interp.process_page(page)
except (pdftypes.PDFException, psparser.PSException, AssertionError):
except Exception: # pylint: disable=broad-except
return output.getvalue().decode("utf8") return output.getvalue().decode("utf8")
finally: finally:
conv.close() conv.close()


Loading…
Cancel
Save