Copies/cutleft.py

321 lines
10 KiB
Python

import sys
import os
import time
import json # Added for schema output
import tkinter as tk
from threading import Thread
from queue import Queue, Empty
from pdf2image import convert_from_path
from PIL import Image, ImageTk
# --- Configuration ---
DELIMITER_WIDTH = 5
DELIMITER_COLOR = (0, 0, 0)
OUTPUT_SIZE = (1000, 1000)
if len(sys.argv) < 2:
sys.exit("Usage: python script.py <directory_path_or_file_path>")
path_arg = sys.argv[1]
files = []
INPUT_DIR = ""
if os.path.isfile(path_arg) and path_arg.lower().endswith('.pdf'):
INPUT_DIR = os.path.dirname(path_arg)
files = [os.path.basename(path_arg)]
elif os.path.isdir(path_arg):
INPUT_DIR = path_arg
files = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith('.pdf')])
else:
sys.exit("Error: Input must be a directory or a PDF file.")
OUTPUT_DIR = os.path.join(INPUT_DIR, 'Cutleft')
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
# --- Processing Logic ---
def distribute_pages(total_pages, max_per_file=5):
"""
Calculates how to split pages into chunks <= max_per_file,
balancing the number of columns per file.
Example: 12 pages, max 5 -> [4, 4, 4]
"""
if total_pages == 0:
return []
# Calculate minimum number of files needed
num_files = (total_pages + max_per_file - 1) // max_per_file
# Calculate base size and remainder
base_count = total_pages // num_files
remainder = total_pages % num_files
distribution = []
for i in range(num_files):
# Distribute remainder to the first few files
count = base_count + (1 if i < remainder else 0)
distribution.append(count)
return distribution
def stitch_images(image_list):
"""Helper to stitch a list of images horizontally with delimiters."""
if not image_list:
return None
num_images = len(image_list)
total_width = sum(img.width for img in image_list) + (num_images - 1) * DELIMITER_WIDTH
max_height = max(img.height for img in image_list)
combined = Image.new('RGB', (total_width, max_height), color=(255, 255, 255))
x_offset = 0
for idx, img in enumerate(image_list):
combined.paste(img, (x_offset, 0))
x_offset += img.width
if idx < num_images - 1:
delimiter = Image.new('RGB', (DELIMITER_WIDTH, max_height), color=DELIMITER_COLOR)
combined.paste(delimiter, (x_offset, 0))
x_offset += DELIMITER_WIDTH
return combined
def process_single_pdf(filename, shift_offset=0):
"""
Converts PDF to stitched images.
Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict)
"""
pdf_path = os.path.join(INPUT_DIR, filename)
try:
pages = convert_from_path(pdf_path)
cropped_images = []
for img in pages:
width, height = img.size
left = 100 + shift_offset
right = (width // 3) + 100 + shift_offset
# Ensure crop box is valid
left = max(0, left)
right = min(width, right)
if right > left:
crop_box = (left, 0, right, height)
cropped = img.crop(crop_box)
cropped_images.append(cropped)
if not cropped_images:
return None
# 1. Generate Schema / Distribution
col_distribution = distribute_pages(len(cropped_images), max_per_file=5)
# 2. Generate Split Images (Full Resolution)
split_images = []
current_idx = 0
for count in col_distribution:
chunk = cropped_images[current_idx : current_idx + count]
stitched_chunk = stitch_images(chunk)
split_images.append(stitched_chunk)
current_idx += count
# 3. Generate Preview (All stitched together, Resized)
full_stitch = stitch_images(cropped_images)
preview_resized = full_stitch.resize(OUTPUT_SIZE, Image.LANCZOS)
schema = {
"original_filename": filename,
"total_pages": len(cropped_images),
"number_of_files": len(split_images),
"columns_per_file": col_distribution
}
return (preview_resized, split_images, schema)
except Exception as e:
print(f"Error processing {filename}: {e}")
return None
def save_results(result_tuple, filename):
"""
Saves the split images and the schema JSON.
"""
_, splits, schema = result_tuple
base_name = os.path.splitext(filename)[0]
# Save Images
for i, img in enumerate(splits):
# Suffix _01, _02, etc.
suffix = f"_{i+1:02d}"
output_filename = f"{base_name}{suffix}.jpg"
output_path = os.path.join(OUTPUT_DIR, output_filename)
img.save(output_path, "JPEG", quality=95)
print(f"Saved: {output_filename}")
# Save Schema
json_filename = f"{base_name}_schema.json"
json_path = os.path.join(OUTPUT_DIR, json_filename)
with open(json_path, 'w') as f:
json.dump(schema, f, indent=4)
print(f"Saved schema: {json_filename}")
# --- GUI Application ---
class ImageReviewer:
def __init__(self, file_list):
self.files = file_list
self.index = 0
self.current_shift = 0
self.current_preview = None # Only stores the resized preview for GUI
self.is_processing = False
# Queue for pre-fetched results (index, (preview, splits, schema))
self.prefetch_queue = Queue(maxsize=1)
# Queue for manual re-processing results
self.manual_queue = Queue()
# Setup GUI
self.root = tk.Tk()
self.root.title("PDF Cropper")
self.root.geometry("+100+100")
self.label_img = tk.Label(self.root)
self.label_img.pack()
self.label_info = tk.Label(self.root, text="", font=("Arial", 12, "bold"))
self.label_info.pack(pady=5)
# Bindings
self.root.bind('<Return>', self.on_next)
self.root.bind('n', lambda e: self.on_shift(50))
self.root.bind('N', lambda e: self.on_shift(100))
self.root.bind('t', lambda e: self.on_shift(-50))
# Start background pre-fetcher
self.bg_thread = Thread(target=self.prefetch_worker, daemon=True)
self.bg_thread.start()
# Load first image
self.load_current_image()
self.root.lift()
self.root.focus_force()
self.root.mainloop()
def prefetch_worker(self):
"""Background thread to process the NEXT image constantly."""
idx_to_process = 0
while True:
target = self.index + 1
if target < len(self.files):
if idx_to_process != target:
fname = self.files[target]
result = process_single_pdf(fname, shift_offset=0)
if result:
self.prefetch_queue.put((target, result)) # Blocks if full
idx_to_process = target
time.sleep(0.1)
def load_current_image(self, use_prefetch=False):
if self.index >= len(self.files):
print("All files processed.")
self.root.destroy()
return
filename = self.files[self.index]
self.is_processing = False
result_found = None
if use_prefetch and not self.prefetch_queue.empty():
q_idx, q_result = self.prefetch_queue.queue[0]
if q_idx == self.index:
_, result_found = self.prefetch_queue.get()
self.current_shift = 0
print(f"Loaded {filename} from prefetch.")
if result_found:
self.handle_processing_result(result_found, filename)
else:
# Not in queue (first load or queue mismatch), process manually
self.trigger_processing(filename, self.current_shift)
def trigger_processing(self, filename, shift):
"""Starts a thread to process image so GUI doesn't freeze."""
self.is_processing = True
self.label_info.configure(text=f"Processing {filename} (Shift {shift})... Please wait.", fg="red")
def worker():
res = process_single_pdf(filename, shift)
self.manual_queue.put(res)
Thread(target=worker, daemon=True).start()
self.check_manual_queue(filename)
def check_manual_queue(self, filename):
"""Polls the manual queue for result."""
try:
result = self.manual_queue.get_nowait()
if result:
self.handle_processing_result(result, filename)
else:
print(f"Failed to process {filename}, skipping.")
self.index += 1
self.load_current_image(use_prefetch=True)
self.is_processing = False
except Empty:
# Check again in 100ms
self.root.after(100, lambda: self.check_manual_queue(filename))
def handle_processing_result(self, result, filename):
"""Unpacks result, saves files, and updates display."""
preview, splits, schema = result
self.current_preview = preview
# Save immediately upon loading/calculating
save_results(result, filename)
self.update_display(filename, schema)
def update_display(self, filename, schema=None):
if self.current_preview:
tk_image = ImageTk.PhotoImage(self.current_preview)
self.label_img.configure(image=tk_image)
self.label_img.image = tk_image
schema_info = ""
if schema:
cols = str(schema['columns_per_file'])
schema_info = f"\nFiles: {schema['number_of_files']} | Cols: {cols}"
self.label_info.configure(
text=f"[{self.index+1}/{len(self.files)}] {filename} | Shift: {self.current_shift}px"
f"{schema_info}\n"
f"Enter: Next | n: +50 | N: +100 | t: -50",
fg="black"
)
def on_shift(self, amount):
if self.is_processing:
return # Ignore keys while processing
self.current_shift += amount
print(f"Applying shift: {self.current_shift}")
self.trigger_processing(self.files[self.index], self.current_shift)
def on_next(self, event):
if self.is_processing:
return
self.index += 1
self.current_shift = 0
self.load_current_image(use_prefetch=True)
# --- Entry Point ---
if __name__ == "__main__":
if not files:
print("No PDF files found.")
else:
app = ImageReviewer(files)