https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/6aed2e5320b5d412f0f81a36dd493ae4bd7c3ff4 https://bugs.gentoo.org/889250 From 6aed2e5320b5d412f0f81a36dd493ae4bd7c3ff4 Mon Sep 17 00:00:00 2001 From: Sam James Date: Mon, 9 Jan 2023 04:05:09 +0000 Subject: [PATCH] Support PyPDF version 3. pypdf upstream has renamed the package from PyPDF2 to pypdf. This patch supporst the new pypdf version while keeping compatibility for older PyPDF2, adjusting to API changes. Signed-off-by: Chris Lamb --- a/diffoscope.egg-info/requires.txt +++ b/diffoscope.egg-info/requires.txt @@ -11,7 +11,7 @@ binwalk defusedxml guestfs jsondiff -pypdf2 +pypdf python-debian pyxattr rpm-python --- a/diffoscope/comparators/pdf.py +++ b/diffoscope/comparators/pdf.py @@ -34,17 +34,23 @@ from .utils.command import Command logger = logging.getLogger(__name__) try: - import PyPDF2 + try: + import pypdf + except ImportError: + import PyPDF2 try: - # PyPDF 2.x - from PyPDF2.errors import PdfReadError + from pypdf.errors import PdfReadError except ImportError: - # PyPDF 1.x - from PyPDF2.utils import PdfReadError + try: + # PyPDF 2.x + from PyPDF2.errors import PdfReadError + except ImportError: + # PyPDF 1.x + from PyPDF2.utils import PdfReadError except ImportError: # noqa - python_module_missing("PyPDF2") + python_module_missing("pypdf") PyPDF2 = None @@ -68,10 +74,10 @@ class PdfFile(File): xs = [] if PyPDF2 is None: - pkg = get_package_provider("pypdf2") + pkg = get_package_provider("pypdf") infix = f" from the '{pkg}' package " if pkg else " " self.add_comment( - f"Installing the 'PyPDF2' Python module{infix}may produce a better output." + f"Installing the 'pypdf' Python module{infix}may produce a better output." ) else: difference = Difference.from_text( @@ -119,7 +125,7 @@ class PdfFile(File): return "\n".join(xs) except PdfReadError as e: - msg = f"Could not extract PyPDF2 metadata from {os.path.basename(file.name)}: {e}" + msg = f"Could not extract pypdf metadata from {os.path.basename(file.name)}: {e}" self.add_comment(msg) logger.error(msg) return "" @@ -142,7 +148,7 @@ class PdfFile(File): return "\n".join(xs) except PdfReadError as e: - msg = f"Could not extract PyPDF2 annotations from {os.path.basename(file.name)}: {e}" + msg = f"Could not extract pypdf annotations from {os.path.basename(file.name)}: {e}" file.add_comment(msg) logger.error(msg) return "" --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ setup( "guestfs", "jsondiff", "python-debian", - "pypdf2", + "pypdf", "pyxattr", "rpm-python", "tlsh", --- a/tests/comparators/test_pdf.py +++ b/tests/comparators/test_pdf.py @@ -70,7 +70,7 @@ def differences_metadata(pdf1, pdf1a): @skip_unless_tools_exist("pdftotext") -@skip_unless_module_exists("PyPDF2") +@skip_unless_module_exists("pypdf") def test_metadata(differences_metadata): assert_diff(differences_metadata[0], "pdf_metadata_expected_diff") @@ -81,7 +81,7 @@ def differences_annotations(pdf3, pdf4): @skip_unless_tools_exist("pdftotext") -@skip_unless_module_exists("PyPDF2") +@skip_unless_module_exists("pypdf") def test_annotations(differences_annotations): with open("tests/data/pdf_annotations_expected_diff", "w") as f: f.write(differences_annotations[0].unified_diff) --- a/diffoscope/comparators/pdf.py +++ b/diffoscope/comparators/pdf.py @@ -38,6 +38,7 @@ try: import pypdf except ImportError: import PyPDF2 + pypdf = PyPDF2 try: from pypdf.errors import PdfReadError @@ -51,7 +52,7 @@ try: except ImportError: # noqa python_module_missing("pypdf") - PyPDF2 = None + pypdf = None class Pdftotext(Command): @@ -73,7 +74,7 @@ class PdfFile(File): def compare_details(self, other, source=None): xs = [] - if PyPDF2 is None: + if pypdf is None: pkg = get_package_provider("pypdf") infix = f" from the '{pkg}' package " if pkg else " " self.add_comment( @@ -113,7 +114,7 @@ class PdfFile(File): def dump_pypdf2_metadata(self, file): try: - pdf = PyPDF2.PdfFileReader(file.path) + pdf = pypdf.PdfReader(file.path) document_info = pdf.getDocumentInfo() if document_info is None: @@ -133,10 +133,10 @@ class PdfFile(File): def dump_pypdf2_annotations(self, file): try: - pdf = PyPDF2.PdfFileReader(file.path) + pdf = pypdf.PdfReader(file.path) xs = [] - for x in range(pdf.getNumPages()): + for x in range(len(pdf.pages)): page = pdf.getPage(x) try: --- a/diffoscope/comparators/pdf.py +++ b/diffoscope/comparators/pdf.py @@ -115,7 +115,7 @@ class PdfFile(File): def dump_pypdf2_metadata(self, file): try: pdf = pypdf.PdfReader(file.path) - document_info = pdf.getDocumentInfo() + document_info = pdf.metadata if document_info is None: return "" --- a/diffoscope/comparators/pdf.py +++ b/diffoscope/comparators/pdf.py @@ -137,7 +137,7 @@ class PdfFile(File): xs = [] for x in range(len(pdf.pages)): - page = pdf.getPage(x) + page = pdf.pages[x] try: for annot in page["/Annots"]: