Copies/page_splitter.py

569 lines
22 KiB
Python

import fitz # PyMuPDF
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk, ImageDraw
import sys
import os
import re
import glob
import shutil
import subprocess
from pypdf import PdfReader, PdfWriter
# --- Constants ---
# Conversion factor: 1 cm to points (1 inch = 2.54 cm, 72 points = 1 inch)
CM_TO_POINTS = (1 / 2.54) * 72
def list_pdf_files(directory):
l = list(reversed(sorted(glob.glob(os.path.join(directory, "*.pdf")))))
return [u for u in l if "enonce" not in u]
class PDFPreviewer:
def setup_next_file(self):
self.num += 1
if len(self.inputs) == 0:
return False
self.pdf_path = self.inputs.pop()
self.file_rotation = 0
self.base_name = os.path.splitext(os.path.basename(self.pdf_path))[0]
self.split_dir = f"{self.base_name}_split"
self.reorder_dir = f"{self.base_name}_reorder"
# Create a temporary output file
self.final_file = f"{self.base_name}_temp.pdf"
self.current_page_index = 0
self.page_settings = []
self.processing = False # Flag to prevent multiple finish calls
try:
self.doc = fitz.open(self.pdf_path)
except Exception as e:
messagebox.showerror("Error", f"Failed to open PDF file: {e}")
self.master.destroy()
return
self.master.title(f"PDF Splitter - {os.path.basename(self.pdf_path)}")
return True
def __init__(self, master, path):
"""
Initializes the application.
Args:
master (tk.Tk): The root Tkinter window.
pdf_path (str): The path to the input PDF file.
"""
if not os.path.exists(path):
messagebox.showerror("Error", f"File not found: {path}")
master.destroy()
return
if os.path.isdir(path):
self.inputs = list_pdf_files(path)
else:
# Check for existing original in backup and restore if found
dir_name = os.path.dirname(os.path.abspath(path))
file_name = os.path.basename(path)
if os.path.basename(dir_name) == "Copies":
dir_name = os.path.dirname(dir_name)
path = os.path.join(dir_name, file_name)
backup_path = os.path.join(dir_name, "Copies Originales", file_name)
if os.path.exists(backup_path):
try:
shutil.move(backup_path, path)
print(f"Restored original file from: {backup_path}")
except Exception as e:
messagebox.showerror("Error", f"Failed to restore original file: {e}")
master.destroy()
return
self.inputs = [path]
self.output_dir = None
self.master = master
self.num = 0
self.global_rotation = 0 # Rotation appliquée à tous les fichiers
self.history = []
if not self.setup_next_file():
print(f"Aucun fichier PDF valide trouvé dans : {path}")
master.destroy()
return
self._resize_job = None # For debouncing resize events
self._initialize_current_page_settings()
# --- UI Setup ---
# Set a reasonable initial size for the window
self.master.geometry("800x1000")
instructions = (
"← / → : Move line 1cm left/right\n"
"'c': Rotate page 180°, 'C' : rotate all pages, ',' : rotate all files\n"
"t s r n m: keep left, next page, keep none, keep right, keep as is\n"
"z: send this page to the end, 'A':pdf arranger 'R':restart file, 'P':back to previous file\n"
)
self.info_label = tk.Label(master, text=instructions, justify=tk.LEFT)
self.info_label.pack(pady=5, side=tk.TOP)
# self.restart_btn = tk.Button(master, text="Restart File (R)", command=self.restart_current_file)
# self.restart_btn.pack(pady=2, side=tk.TOP)
self.page_label = tk.Label(master, text="", font=("Helvetica", 12))
self.page_label.pack(pady=5, side=tk.TOP)
# Canvas for PDF page preview
self.canvas = tk.Canvas(master, bg="gray")
self.canvas.pack(fill="both", expand=True)
# --- Bindings ---
self.master.bind("<Left>", self.move_line_left)
self.master.bind("<Right>", self.move_line_right)
self.master.bind("<Return>", self.confirm_and_next_page)
self.master.bind("c", self.rotate_page)
self.master.bind("C", self.rotate_all_pages)
self.master.bind(",", self.rotate_all_files)
self.master.bind("t", self.keep_left)
self.master.bind("n", self.keep_right)
self.master.bind("m", self.keep_as_is)
self.master.bind("s", self.confirm_and_next_page)
self.master.bind("r", self.discard_page)
self.master.bind("z", self.send_page_end)
self.master.bind("R", self.restart_current_file)
self.master.bind("A", self.start_arranger)
self.master.bind("P", self.go_to_previous_file)
# Bind the resize event on the canvas
self.canvas.bind("<Configure>", self.on_resize)
self.current_zoom = 1.0
def start_arranger(self):
subprocess.Popen(["pdf-arranger", self.pdf_path])
def on_resize(self, event):
"""
Handles window resize events by reloading the page.
Uses a "debounce" mechanism to avoid excessive redrawing.
"""
if self._resize_job:
self.master.after_cancel(self._resize_job)
self._resize_job = self.master.after(250, self.load_page) # Redraw after 250ms of no resizing
def _initialize_current_page_settings(self):
"""Initializes or resets the settings for the current page."""
if self.current_page_index < len(self.doc):
page = self.doc.load_page(self.current_page_index)
self.current_line_x = page.rect.width / 2
self.current_rotation = 0
def load_page(self):
"""Loads and displays the current page on the canvas, scaled to fit."""
if self.current_page_index >= len(self.doc):
if not self.processing:
self.processing = True
self.finish_and_process()
return
page = self.doc.load_page(self.current_page_index)
self.page_label.config(text=f"Page {self.current_page_index + 1} of {len(self.doc)}")
# --- Calculate Scaling ---
canvas_width = self.canvas.winfo_width()
canvas_height = self.canvas.winfo_height()
# Don't try to render if the canvas has no size yet.
if canvas_width <= 1 or canvas_height <= 1:
return
page_rect = page.rect
zoom_x = canvas_width / page_rect.width
zoom_y = canvas_height / page_rect.height
# Use 98% of the smallest zoom factor to leave a small margin
self.current_zoom = min(zoom_x, zoom_y) * 0.98
# --- Render Page ---
mat = fitz.Matrix(self.current_zoom, self.current_zoom)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Apply rotation if needed *after* drawing the line
if (self.current_rotation + self.file_rotation + self.global_rotation) % 360 != 0:
img = img.rotate(180, expand=True)
# --- Draw Line and Rotate ---
draw = ImageDraw.Draw(img)
# The line position is scaled by the same zoom factor
line_x_scaled = self.current_line_x * self.current_zoom
draw.line([(line_x_scaled, 0), (line_x_scaled, pix.height)], fill="red", width=3)
# --- Display on Canvas ---
self.photo_img = ImageTk.PhotoImage(img)
self.canvas.delete("all")
# Center the image on the canvas
self.canvas.create_image(canvas_width / 2, canvas_height / 2, anchor="center",
image=self.photo_img)
def restart_current_file(self, event=None):
"""Restarts the processing of the current file."""
# Close the modified in-memory document
if hasattr(self, 'doc'):
self.doc.close()
# Re-open the file from disk to reset changes (like moved pages)
try:
self.doc = fitz.open(self.pdf_path)
except Exception as e:
messagebox.showerror("Error", f"Failed to reopen PDF file: {e}")
self.master.destroy()
return
# Reset state variables for the current file
self.file_rotation = 0
self.current_page_index = 0
self.page_settings = []
self.processing = False
# Reload UI
self._initialize_current_page_settings()
self.load_page()
def move_line_left(self, event=None):
"""Moves the split line to the left."""
self.current_line_x = max(0, self.current_line_x - CM_TO_POINTS / 2)
self.load_page()
def move_line_right(self, event=None):
"""Moves the split line to the right."""
page = self.doc.load_page(self.current_page_index)
self.current_line_x = min(page.rect.width, self.current_line_x + CM_TO_POINTS / 2)
self.load_page()
def rotate_page(self, event=None):
"""Toggles the page rotation between 0 and 180 degrees."""
self.current_rotation = 180 if self.current_rotation == 0 else 0
self.load_page()
def rotate_all_pages(self, event=None):
"""Toggles the page rotation between 0 and 180 degrees."""
self.file_rotation = 180 if self.file_rotation == 0 else 0
self.load_page()
def rotate_all_files(self, event=None):
"""Toggles the page rotation between 0 and 180 degrees."""
self.global_rotation = 180 if self.global_rotation == 0 else 0
self.load_page()
def keep_left(self, event=None):
self.confirm_and_next_page(keep="left")
def keep_right(self, event=None):
self.confirm_and_next_page(keep="right")
def discard_page(self, event=None):
self.confirm_and_next_page(keep="none")
def keep_as_is(self, event=None):
self.confirm_and_next_page(keep="as_is")
def send_page_end(self, event=None):
# Do nothing if we are already at or past the last page
if self.current_page_index >= len(self.doc) - 1:
return
# Move the current page to the end of the document
# -1 as the destination puts it after the last page
self.doc.move_page(self.current_page_index, -1)
# Initialize settings for the page that shifted into the current slot
self._initialize_current_page_settings()
# Reload the canvas to show the new page
self.load_page()
def confirm_and_next_page(self, event=None, keep="both"):
"""Saves the settings for the current page and moves to the next."""
self.page_settings.append({
"line_x": self.current_line_x,
"rotation": self.current_rotation,
"keep": keep
})
self.current_page_index += 1
if self.current_page_index < len(self.doc):
self._initialize_current_page_settings()
self.load_page()
else:
self.finish_and_process()
self.history.append(self.pdf_path)
if self.setup_next_file():
self._initialize_current_page_settings()
self.load_page()
else:
self.master.destroy()
def finish_and_process(self):
"""Starts the PDF splitting process and moves files."""
self.split_pdf()
# print("Debug : ", self.page_settings)
# input("Splitting done. Continue ?")
self.reorder_pdfs()
# input("Reorder done. Continue ?")
self.concate_files()
# Logic to move original to backup and replace with new file
try:
abs_path = os.path.abspath(self.pdf_path)
dir_name = os.path.dirname(abs_path)
file_name = os.path.basename(abs_path)
backup_dir = os.path.join(dir_name, "Copies Originales")
copies_dir = os.path.join(dir_name, "Copies")
os.makedirs(backup_dir, exist_ok=True)
os.makedirs(copies_dir, exist_ok=True)
backup_path = os.path.join(backup_dir, file_name)
copies_path = os.path.join(copies_dir, file_name)
# Remove backup if it already exists (overwrite)
if os.path.exists(backup_path):
os.remove(backup_path)
# Move the original file to "Copies Originales"
shutil.move(self.pdf_path, backup_path)
# Move the temp output file to replace the original
shutil.move(self.final_file, copies_path)
# print(f"Original moved to {backup_path}, new file saved at {self.pdf_path}")
except Exception as e:
messagebox.showerror("Error", f"Failed to move/replace files: {e}")
self.remove_dirs()
def _restore_original(self, path):
"""Restores the original file from the 'Copies Originales' backup."""
dir_name = os.path.dirname(os.path.abspath(path))
file_name = os.path.basename(path)
backup_path = os.path.join(dir_name, "Copies Originales", file_name)
if os.path.exists(backup_path):
try:
# Moving overwrites the generated PDF with the original backup
shutil.move(backup_path, path)
print(f"Restored original file from: {backup_path}")
except Exception as e:
print(f"Failed to restore original file: {e}")
def go_to_previous_file(self, event=None):
"""Goes back to the beginning of the previously completed file."""
if not self.history:
return # Nowhere to go back to
# Close the currently open document to avoid lock issues
if hasattr(self, 'doc'):
self.doc.close()
# 1. Push current file back onto the stack so it processes next
self.inputs.append(self.pdf_path)
# 2. Get the previous file, restore its original state, and push to stack
prev_file = self.history.pop()
self._restore_original(prev_file)
self.inputs.append(prev_file)
# 3. Reload environment (setup_next_file will pop prev_file back off the stack)
self.setup_next_file()
self._initialize_current_page_settings()
self.load_page()
def split_filename_left(self, i):
return os.path.join(self.split_dir, f"{self.base_name}_{i+1}l.pdf")
def split_filename_right(self, i):
return os.path.join(self.split_dir, f"{self.base_name}_{i+1}r.pdf")
def reorder_filename(self, i):
return os.path.join(self.reorder_dir, f"{self.base_name}_{i+1}.pdf")
def clean_up_dir(self, dir, make=True):
if make:
os.makedirs(dir, exist_ok=True)
pdf_files = glob.glob(os.path.join(dir, "*.pdf"))
for pdf in pdf_files:
try:
os.remove(pdf)
except Exception as e:
print(f"Error deleting {pdf}: {e}")
def remove_dirs(self):
shutil.rmtree(self.split_dir)
shutil.rmtree(self.reorder_dir)
def split_pdf(self):
"""Splits each page of the PDF according to the saved settings."""
print("Starting PDF processing...")
self.clean_up_dir(self.split_dir)
for i, settings in enumerate(self.page_settings):
page = self.doc.load_page(i)
line_x = settings['line_x']
rotation_settings = settings['rotation']
keep = settings['keep']
rotation = (page.rotation + rotation_settings +
self.file_rotation + self.global_rotation) % 360
if keep == "as_is":
doc_full = fitz.open()
page_full = doc_full.new_page(width=page.rect.width, height=page.rect.height)
page_full.show_pdf_page(page_full.rect, self.doc, i)
page_full.set_rotation(rotation)
output_path_full = self.split_filename_left(i)
doc_full.save(output_path_full)
doc_full.close()
continue # Skip left/right generation
# --- Create Left Part ---
if rotation == 0:
rect_left = fitz.Rect(0, 0, line_x, page.rect.height)
else:
rect_left = fitz.Rect(page.rect.width-line_x, 0, page.rect.width, page.rect.height)
if (keep == "both" or keep == "left") and line_x > 0:
doc_left = fitz.open()
page_left = doc_left.new_page(width=rect_left.width, height=rect_left.height)
page_left.show_pdf_page(page_left.rect, self.doc, i, clip=rect_left)
page_left.set_rotation(rotation)
output_path_left = self.split_filename_left(i)
doc_left.save(output_path_left)
doc_left.close()
# --- Create Right Part ---
if rotation == 0:
rect_right = fitz.Rect(line_x, 0, page.rect.width, page.rect.height)
else:
rect_right = fitz.Rect(0, 0, page.rect.width-line_x, page.rect.height)
if (keep == "both" or keep == "right") and line_x < page.rect.width:
doc_right = fitz.open()
page_right = doc_right.new_page(width=rect_right.width, height=rect_right.height)
page_right.show_pdf_page(page_right.rect, self.doc, i, clip=rect_right)
page_right.set_rotation(rotation)
output_path_right = self.split_filename_right(i)
doc_right.save(output_path_right)
doc_right.close()
self.doc.close()
print(f"\nProcessing complete. Files are in '{self.split_dir}' directory.")
def reorder_pdfs(self):
"""Reordonne les pages, si ce sont des copies doubles."""
self.clean_up_dir(self.reorder_dir)
ps = self.page_settings
ri = 0
i = 0
while i < len(ps):
psk = ps[i]['keep']
# Si c'est une copie double (on s'assure qu'on a bien 2 pages consécutives modifiables)
if psk in ["both", "right", "left", "none"] and i < len(ps)-1 and ps[i+1]['keep'] in ["both", "right", "left", "none"]:
# 1. Page de garde (Extérieur Droit)
if ps[i]['keep'] in ["both", "right"]:
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
ri += 1
# 2. Intérieur Gauche
if ps[i+1]['keep'] in ["both", "left"]:
shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
ri += 1
# 3. Intérieur Droit
if ps[i+1]['keep'] in ["both", "right"]:
shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
ri += 1
# 4. Dos de la copie (Extérieur Gauche)
if ps[i]['keep'] in ["both", "left"]:
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
ri += 1
i += 2
else:
# Si c'est une page simple (ou as_is)
if psk in ["left", "both", "as_is"]:
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
ri += 1
if psk in ["right", "both"]:
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
ri += 1
i += 1
# def reorder_pdfs(self):
# """Reordonne les pages, si ce sont des copies doubles."""
# self.clean_up_dir(self.reorder_dir)
# ps = self.page_settings
# ri = 0
# i = 0
# while i < len(ps):
# # Si c'est une copie double
# if (ps[i]['keep'] == "both" or ps[i]['keep'] == "right") \
# and i < len(ps)-1 and (ps[i+1]['keep'] != "right"):
# shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
# ri += 1
# if ps[i+1]['keep'] != "none":
# shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
# ri += 1
# if ps[i+1]['keep'] != "left":
# shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
# ri += 1
# if ps[i]['keep'] == "both":
# shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
# ri += 1
# i += 2
# else:
# psk = ps[i]['keep']
# if psk == "left" or psk == "both" or psk == "as_is":
# shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
# ri += 1
# if psk == "right" or psk == "both":
# shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
# ri += 1
# i += 1
def concate_files(self):
writer = PdfWriter()
def natural_key(text):
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)]
pdf_files = sorted(
glob.glob(os.path.join(self.reorder_dir, "*.pdf")),
key=natural_key
)
for pdf in pdf_files:
reader = PdfReader(pdf)
for page in reader.pages:
writer.add_page(page)
if self.output_dir != None:
os.makedirs(os.path.dirname(self.final_file), exist_ok=True)
with open(self.final_file, "wb") as f:
writer.write(f)
print(f"Created merged PDF: {self.final_file}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script_name.py <path_to_pdf_file>")
sys.exit(1)
pdf_file_path = sys.argv[1]
root = tk.Tk()
app = PDFPreviewer(root, pdf_file_path)
root.mainloop()