From 7f326243b6b28460c6991b73383b987d71dbe042 Mon Sep 17 00:00:00 2001 From: Benoit Rosa Date: Mon, 3 Jul 2023 16:37:47 +0200 Subject: [PATCH 1/2] First working version of PDF output using annotations --- pdf_diff/command_line.py | 95 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/pdf_diff/command_line.py b/pdf_diff/command_line.py index d761fd0..f372ee9 100644 --- a/pdf_diff/command_line.py +++ b/pdf_diff/command_line.py @@ -8,6 +8,8 @@ import json, subprocess, io, os from lxml import etree from PIL import Image, ImageDraw, ImageOps +from pdf_annotate import PdfAnnotator, Location, Appearance + def compute_changes(pdf_fn_1, pdf_fn_2, top_margin=0, bottom_margin=100): # Serialize the text in the two PDFs. @@ -184,6 +186,85 @@ def mark_difference(hunk_length, offset, boxes, changes): # there's no reason to hold onto it. It can't be marked as changed twice. changes.append(boxes.pop(0)) + +# Turns the JSON objects of PDF changes into annotations into the source PDFs +def render_changes_PDF(changes, styles, annotators): + + changes = simplify_changes(changes) + if len(changes) == 0: + raise Exception("There are no text differences.") + + + # def draw_red_boxes(changes, pages, styles): + # # Draw red boxes around changes. + + # for change in changes: + # if change == "*": continue # not handled yet + + # # 'box', 'strike', 'underline' + # style = styles[change["pdf"]["index"]] + + # # the Image of the page + # im = pages[change["pdf"]["index"]][change["page"]] + + # # draw it + # draw = ImageDraw.Draw(im) + + # if style == "box": + # draw.rectangle(( + # change["x"], change["y"], + # (change["x"]+change["width"]), (change["y"]+change["height"]), + # ), outline="red") + # elif style == "strike": + # draw.line(( + # change["x"], change["y"]+change["height"]/2, + # change["x"]+change["width"], change["y"]+change["height"]/2 + # ), fill="red") + # elif style == "underline": + # draw.line(( + # change["x"], change["y"]+change["height"], + # change["x"]+change["width"], change["y"]+change["height"] + # ), fill="red") + + + for change in changes: + if change == "*": continue + + style = styles[change["pdf"]["index"]] + + if style == "box": + annotators[change["pdf"]["index"]].add_annotation( + 'square', + Location(x1=change["x"], + y1 = change["page"]["height"] - change["y"], + x2 = change["width"] + change["x"], + y2 = change["page"]["height"] - (change["height"] + change["y"]), + page = change["page"]["number"]-1), + Appearance(stroke_color=(1, 0, 0), stroke_width=1)) + + elif style == "strike": + annotators[change["pdf"]["index"]].add_annotation( + 'square', + Location(x1=change["x"], + y1 = change["page"]["height"] - (change["y"] +change["height"]/2), + x2 = change["width"] + change["x"], + y2 = change["page"]["height"] - (change["height"]/2 + change["y"]), + page = change["page"]["number"]-1), + Appearance(stroke_color=(1, 0, 0), stroke_width=1)) + + elif style == "underline": + annotators[change["pdf"]["index"]].add_annotation( + 'square', + Location(x1=change["x"], + y1 = change["page"]["height"] - (change["y"] +change["height"]), + x2 = change["width"] + change["x"], + y2 = change["page"]["height"] - (change["height"] + change["y"]), + page = change["page"]["number"]-1), + Appearance(stroke_color=(1, 0, 0), stroke_width=1)) + + + + # Turns a JSON object of PDF changes into a PIL image object. def render_changes(changes, styles,width): # Merge sequential boxes to avoid sequential disjoint rectangles. @@ -465,6 +546,8 @@ def main(): help='bottom margin (ignored area) begin in percent of page height (default 100.0)') parser.add_argument('-r', '--result-width', default=900, type=int, help='width of the result image (width of image in px)') + parser.add_argument('-p', '--pdfoutput', action='store_true', default=False, + help='ouput the changes as annotations directly in the input PDFs (default:False)') args = parser.parse_args() def invalid_usage(msg): @@ -495,8 +578,16 @@ def invalid_usage(msg): invalid_usage('Insufficient number of files to compare; please supply exactly 2.') changes = compute_changes(args.files[0], args.files[1], top_margin=float(args.top_margin), bottom_margin=float(args.bottom_margin)) - img = render_changes(changes, style, args.result_width) - img.save(sys.stdout.buffer, args.format.upper()) + + + if args.pdfoutput: + annotators = [PdfAnnotator(args.files[0]),PdfAnnotator(args.files[1])] + out = render_changes_PDF(changes, style, annotators) + annotators[0].write('output1.pdf') + annotators[1].write('output2.pdf') + else: + img = render_changes(changes, style, args.result_width) + img.save(sys.stdout.buffer, args.format.upper()) if __name__ == "__main__": From ecb5b138a81b8c50793534123de167a70f31c041 Mon Sep 17 00:00:00 2001 From: Benoit Rosa Date: Mon, 3 Jul 2023 16:38:58 +0200 Subject: [PATCH 2/2] Fixed some copy-pasting issue --- pdf_diff/command_line.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/pdf_diff/command_line.py b/pdf_diff/command_line.py index f372ee9..c020867 100644 --- a/pdf_diff/command_line.py +++ b/pdf_diff/command_line.py @@ -194,39 +194,6 @@ def render_changes_PDF(changes, styles, annotators): if len(changes) == 0: raise Exception("There are no text differences.") - - # def draw_red_boxes(changes, pages, styles): - # # Draw red boxes around changes. - - # for change in changes: - # if change == "*": continue # not handled yet - - # # 'box', 'strike', 'underline' - # style = styles[change["pdf"]["index"]] - - # # the Image of the page - # im = pages[change["pdf"]["index"]][change["page"]] - - # # draw it - # draw = ImageDraw.Draw(im) - - # if style == "box": - # draw.rectangle(( - # change["x"], change["y"], - # (change["x"]+change["width"]), (change["y"]+change["height"]), - # ), outline="red") - # elif style == "strike": - # draw.line(( - # change["x"], change["y"]+change["height"]/2, - # change["x"]+change["width"], change["y"]+change["height"]/2 - # ), fill="red") - # elif style == "underline": - # draw.line(( - # change["x"], change["y"]+change["height"], - # change["x"]+change["width"], change["y"]+change["height"] - # ), fill="red") - - for change in changes: if change == "*": continue