Copies/page_splitter.py

408 lines
15 KiB
Python

import fitz # PyMuPDF
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk, ImageDraw
import sys
import os
import glob
import shutil
from pypdf import PdfReader, PdfWriter
# --- Constants ---
# Conversion factor: 1 cm to points (1 inch = 2.54 cm, 72 points = 1 inch)
CM_TO_POINTS = (1 / 2.54) * 72
def list_pdf_files(directory):
return list(reversed(sorted(glob.glob(os.path.join(directory, "*.pdf")))))
class PDFPreviewer:
def setup_next_file(self):
self.num += 1
if len(self.inputs) == 0:
return False
self.pdf_path = self.inputs.pop()
self.file_rotation = 0
self.base_name = os.path.splitext(os.path.basename(self.pdf_path))[0]
self.split_dir = f"{self.base_name}_split"
self.reorder_dir = f"{self.base_name}_reorder"
if self.output_dir is None:
self.final_file = f"{self.base_name}_final"
else:
self.final_file = f"{self.output_dir}/Copie{self.num:02}.pdf"
self.current_page_index = 0
self.page_settings = []
self.processing = False # Flag to prevent multiple finish calls
try:
self.doc = fitz.open(self.pdf_path)
except Exception as e:
messagebox.showerror("Error", f"Failed to open PDF file: {e}")
self.master.destroy()
return
self.master.title(f"PDF Splitter - {os.path.basename(self.pdf_path)}")
return True
def __init__(self, master, path):
"""
Initializes the application.
Args:
master (tk.Tk): The root Tkinter window.
pdf_path (str): The path to the input PDF file.
"""
if not os.path.exists(path):
messagebox.showerror("Error", f"File not found: {path}")
master.destroy()
return
if os.path.isdir(path):
self.inputs = list_pdf_files(path)
self.output_dir = f"{path}_out"
else:
self.inputs = [path]
self.output_dir = None
self.master = master
self.num = 0
self.global_rotation = 0 # Rotation appliquée à tous les fichiers
self.setup_next_file()
self._resize_job = None # For debouncing resize events
self._initialize_current_page_settings()
# --- UI Setup ---
# Set a reasonable initial size for the window
self.master.geometry("800x1000")
instructions = (
"← / → : Move line 1cm left/right\n"
"'c': Rotate page 180°, 'C' : rotate all pages, ',' : rotate all files\n"
"t s r n: keep left, next page, keep none, keep right\n"
"z: send this page to the end, 'R':restart file\n"
)
self.info_label = tk.Label(master, text=instructions, justify=tk.LEFT)
self.info_label.pack(pady=5, side=tk.TOP)
# self.restart_btn = tk.Button(master, text="Restart File (R)", command=self.restart_current_file)
# self.restart_btn.pack(pady=2, side=tk.TOP)
self.page_label = tk.Label(master, text="", font=("Helvetica", 12))
self.page_label.pack(pady=5, side=tk.TOP)
# Canvas for PDF page preview
self.canvas = tk.Canvas(master, bg="gray")
self.canvas.pack(fill="both", expand=True)
# --- Bindings ---
self.master.bind("<Left>", self.move_line_left)
self.master.bind("<Right>", self.move_line_right)
self.master.bind("<Return>", self.confirm_and_next_page)
self.master.bind("c", self.rotate_page)
self.master.bind("C", self.rotate_all_pages)
self.master.bind(",", self.rotate_all_files)
self.master.bind("t", self.keep_left)
self.master.bind("n", self.keep_right)
self.master.bind("s", self.confirm_and_next_page)
self.master.bind("r", self.discard_page)
self.master.bind("z", self.send_page_end)
self.master.bind("R", self.restart_current_file) # New binding
# Bind the resize event on the canvas
self.canvas.bind("<Configure>", self.on_resize)
self.current_zoom = 1.0
def on_resize(self, event):
"""
Handles window resize events by reloading the page.
Uses a "debounce" mechanism to avoid excessive redrawing.
"""
if self._resize_job:
self.master.after_cancel(self._resize_job)
self._resize_job = self.master.after(250, self.load_page) # Redraw after 250ms of no resizing
def _initialize_current_page_settings(self):
"""Initializes or resets the settings for the current page."""
if self.current_page_index < len(self.doc):
page = self.doc.load_page(self.current_page_index)
self.current_line_x = page.rect.width / 2
self.current_rotation = 0
def load_page(self):
"""Loads and displays the current page on the canvas, scaled to fit."""
if self.current_page_index >= len(self.doc):
if not self.processing:
self.processing = True
self.finish_and_process()
return
page = self.doc.load_page(self.current_page_index)
self.page_label.config(text=f"Page {self.current_page_index + 1} of {len(self.doc)}")
# --- Calculate Scaling ---
canvas_width = self.canvas.winfo_width()
canvas_height = self.canvas.winfo_height()
# Don't try to render if the canvas has no size yet.
if canvas_width <= 1 or canvas_height <= 1:
return
page_rect = page.rect
zoom_x = canvas_width / page_rect.width
zoom_y = canvas_height / page_rect.height
# Use 98% of the smallest zoom factor to leave a small margin
self.current_zoom = min(zoom_x, zoom_y) * 0.98
# --- Render Page ---
mat = fitz.Matrix(self.current_zoom, self.current_zoom)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Apply rotation if needed *after* drawing the line
if (self.current_rotation + self.file_rotation + self.global_rotation) % 360 != 0:
img = img.rotate(180, expand=True)
# --- Draw Line and Rotate ---
draw = ImageDraw.Draw(img)
# The line position is scaled by the same zoom factor
line_x_scaled = self.current_line_x * self.current_zoom
draw.line([(line_x_scaled, 0), (line_x_scaled, pix.height)], fill="red", width=3)
# --- Display on Canvas ---
self.photo_img = ImageTk.PhotoImage(img)
self.canvas.delete("all")
# Center the image on the canvas
self.canvas.create_image(canvas_width / 2, canvas_height / 2, anchor="center",
image=self.photo_img)
def restart_current_file(self, event=None):
"""Restarts the processing of the current file."""
# Close the modified in-memory document
if hasattr(self, 'doc'):
self.doc.close()
# Re-open the file from disk to reset changes (like moved pages)
try:
self.doc = fitz.open(self.pdf_path)
except Exception as e:
messagebox.showerror("Error", f"Failed to reopen PDF file: {e}")
self.master.destroy()
return
# Reset state variables for the current file
self.file_rotation = 0
self.current_page_index = 0
self.page_settings = []
self.processing = False
# Reload UI
self._initialize_current_page_settings()
self.load_page()
def move_line_left(self, event=None):
"""Moves the split line to the left."""
self.current_line_x = max(0, self.current_line_x - CM_TO_POINTS / 2)
self.load_page()
def move_line_right(self, event=None):
"""Moves the split line to the right."""
page = self.doc.load_page(self.current_page_index)
self.current_line_x = min(page.rect.width, self.current_line_x + CM_TO_POINTS / 2)
self.load_page()
def rotate_page(self, event=None):
"""Toggles the page rotation between 0 and 180 degrees."""
self.current_rotation = 180 if self.current_rotation == 0 else 0
self.load_page()
def rotate_all_pages(self, event=None):
"""Toggles the page rotation between 0 and 180 degrees."""
self.file_rotation = 180 if self.file_rotation == 0 else 0
self.load_page()
def rotate_all_files(self, event=None):
"""Toggles the page rotation between 0 and 180 degrees."""
self.global_rotation = 180 if self.global_rotation == 0 else 0
self.load_page()
def keep_left(self, event=None):
self.confirm_and_next_page(keep="left")
def keep_right(self, event=None):
self.confirm_and_next_page(keep="right")
def discard_page(self, event=None):
self.confirm_and_next_page(keep="none")
def send_page_end(self, event=None):
# Do nothing if we are already at or past the last page
if self.current_page_index >= len(self.doc) - 1:
return
# Move the current page to the end of the document
# -1 as the destination puts it after the last page
self.doc.move_page(self.current_page_index, -1)
# Initialize settings for the page that shifted into the current slot
self._initialize_current_page_settings()
# Reload the canvas to show the new page
self.load_page()
def confirm_and_next_page(self, event=None, keep="both"):
"""Saves the settings for the current page and moves to the next."""
self.page_settings.append({
"line_x": self.current_line_x,
"rotation": self.current_rotation,
"keep": keep
})
self.current_page_index += 1
if self.current_page_index < len(self.doc):
self._initialize_current_page_settings()
self.load_page()
else:
self.finish_and_process()
if self.setup_next_file():
self._initialize_current_page_settings()
self.load_page()
else:
self.master.destroy()
def finish_and_process(self):
"""Starts the PDF splitting process."""
self.split_pdf()
self.reorder_pdfs()
self.concate_files()
self.remove_dirs()
def split_filename_left(self, i):
return os.path.join(self.split_dir, f"{self.base_name}_{i+1}l.pdf")
def split_filename_right(self, i):
return os.path.join(self.split_dir, f"{self.base_name}_{i+1}r.pdf")
def reorder_filename(self, i):
return os.path.join(self.reorder_dir, f"{self.base_name}_{i+1}.pdf")
def clean_up_dir(self, dir, make=True):
if make:
os.makedirs(dir, exist_ok=True)
pdf_files = glob.glob(os.path.join(dir, "*.pdf"))
for pdf in pdf_files:
try:
os.remove(pdf)
except Exception as e:
print(f"Error deleting {pdf}: {e}")
def remove_dirs(self):
shutil.rmtree(self.split_dir)
shutil.rmtree(self.reorder_dir)
def split_pdf(self):
"""Splits each page of the PDF according to the saved settings."""
print("Starting PDF processing...")
self.clean_up_dir(self.split_dir)
for i, settings in enumerate(self.page_settings):
page = self.doc.load_page(i)
line_x = settings['line_x']
rotation_settings = settings['rotation']
keep = settings['keep']
rotation = (page.rotation + rotation_settings +
self.file_rotation + self.global_rotation) % 360
# --- Create Left Part ---
if rotation == 0:
rect_left = fitz.Rect(0, 0, line_x, page.rect.height)
else:
rect_left = fitz.Rect(page.rect.width-line_x, 0, page.rect.width, page.rect.height)
doc_left = fitz.open()
page_left = doc_left.new_page(width=rect_left.width, height=rect_left.height)
page_left.show_pdf_page(page_left.rect, self.doc, i, clip=rect_left)
page_left.set_rotation(rotation)
if keep == "both" or keep == "left":
output_path_left = self.split_filename_left(i)
doc_left.save(output_path_left)
doc_left.close()
# --- Create Right Part ---
if rotation == 0:
rect_right = fitz.Rect(line_x, 0, page.rect.width, page.rect.height)
else:
rect_right = fitz.Rect(0, 0, page.rect.width-line_x, page.rect.height)
doc_right = fitz.open()
page_right = doc_right.new_page(width=rect_right.width, height=rect_right.height)
page_right.show_pdf_page(page_right.rect, self.doc, i, clip=rect_right)
page_right.set_rotation(rotation)
if keep == "both" or keep == "right":
output_path_right = self.split_filename_right(i)
doc_right.save(output_path_right)
doc_right.close()
self.doc.close()
print(f"\nProcessing complete. Files are in '{self.split_dir}' directory.")
def reorder_pdfs(self):
"""Reordonne les pages, si ce sont des copies doubles."""
self.clean_up_dir(self.reorder_dir)
ps = self.page_settings
ri = 0
i = 0
while i < len(ps):
# Si c'est une copie double
if (ps[i]['keep'] == "both" or ps[i]['keep'] == "right") \
and i < len(ps)-1 and (ps[i+1]['keep'] != "right"):
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
ri += 1
if ps[i+1]['keep'] != "none":
shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
ri += 1
if ps[i+1]['keep'] != "left":
shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
ri += 1
if ps[i]['keep'] == "both":
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
ri += 1
i += 2
else:
psk = ps[i]['keep']
if psk == "left" or psk == "both":
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
ri += 1
if psk == "right" or psk == "both":
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
ri += 1
i += 1
def concate_files(self):
writer = PdfWriter()
pdf_files = sorted(glob.glob(os.path.join(self.reorder_dir, "*.pdf")))
for pdf in pdf_files:
reader = PdfReader(pdf)
for page in reader.pages:
writer.add_page(page)
if self.output_dir != None:
os.makedirs(os.path.dirname(self.final_file), exist_ok=True)
with open(self.final_file, "wb") as f:
writer.write(f)
print(f"Created merged PDF: {self.final_file}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script_name.py <path_to_pdf_file>")
sys.exit(1)
pdf_file_path = sys.argv[1]
root = tk.Tk()
app = PDFPreviewer(root, pdf_file_path)
root.mainloop()