import sys import os import time import tkinter as tk from threading import Thread from queue import Queue, Empty from pdf2image import convert_from_path from PIL import Image, ImageTk # --- Configuration --- DELIMITER_WIDTH = 5 DELIMITER_COLOR = (0, 0, 0) OUTPUT_SIZE = (1000, 1000) if len(sys.argv) < 2: sys.exit("Usage: python script.py ") path_arg = sys.argv[1] files = [] INPUT_DIR = "" if os.path.isfile(path_arg) and path_arg.lower().endswith('.pdf'): INPUT_DIR = os.path.dirname(path_arg) files = [os.path.basename(path_arg)] elif os.path.isdir(path_arg): INPUT_DIR = path_arg files = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith('.pdf')]) else: sys.exit("Error: Input must be a directory or a PDF file.") OUTPUT_DIR = os.path.join(INPUT_DIR, 'Cutleft') if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) # --- Processing Logic --- def process_single_pdf(filename, shift_offset=0): """ Converts PDF to stitched JPG image (PIL object). """ pdf_path = os.path.join(INPUT_DIR, filename) try: pages = convert_from_path(pdf_path) cropped_images = [] for img in pages: width, height = img.size left = 100 + shift_offset right = (width // 3) + 100 + shift_offset # Ensure crop box is valid left = max(0, left) right = min(width, right) if right > left: crop_box = (left, 0, right, height) cropped = img.crop(crop_box) cropped_images.append(cropped) if not cropped_images: return None # Combine num_images = len(cropped_images) total_width = sum(img.width for img in cropped_images) + (num_images - 1) * DELIMITER_WIDTH max_height = max(img.height for img in cropped_images) combined = Image.new('RGB', (total_width, max_height), color=(255, 255, 255)) x_offset = 0 for idx, img in enumerate(cropped_images): combined.paste(img, (x_offset, 0)) x_offset += img.width if idx < num_images - 1: delimiter = Image.new('RGB', (DELIMITER_WIDTH, max_height), color=DELIMITER_COLOR) combined.paste(delimiter, (x_offset, 0)) x_offset += DELIMITER_WIDTH # Resize resized = combined.resize(OUTPUT_SIZE, Image.LANCZOS) return resized except Exception as e: print(f"Error processing {filename}: {e}") return None def save_image(pil_img, filename): output_filename = os.path.splitext(filename)[0] + ".jpg" output_path = os.path.join(OUTPUT_DIR, output_filename) pil_img.save(output_path, "JPEG", quality=95) print(f"Saved: {output_filename}") # --- GUI Application --- class ImageReviewer: def __init__(self, file_list): self.files = file_list self.index = 0 self.current_shift = 0 self.current_pil = None self.is_processing = False # Queue for pre-fetched images (index, image) self.prefetch_queue = Queue(maxsize=1) # Queue for manual re-processing results self.manual_queue = Queue() # Setup GUI self.root = tk.Tk() self.root.title("PDF Cropper") self.root.geometry("+100+100") self.label_img = tk.Label(self.root) self.label_img.pack() self.label_info = tk.Label(self.root, text="", font=("Arial", 12, "bold")) self.label_info.pack(pady=5) # Bindings self.root.bind('', self.on_next) self.root.bind('n', lambda e: self.on_shift(50)) self.root.bind('N', lambda e: self.on_shift(100)) self.root.bind('t', lambda e: self.on_shift(-50)) # Start background pre-fetcher self.bg_thread = Thread(target=self.prefetch_worker, daemon=True) self.bg_thread.start() # Load first image self.load_current_image() self.root.lift() self.root.focus_force() self.root.mainloop() def prefetch_worker(self): """Background thread to process the NEXT image constantly.""" idx_to_process = 0 while True: target = self.index + 1 if target < len(self.files): if idx_to_process != target: fname = self.files[target] img = process_single_pdf(fname, shift_offset=0) if img: self.prefetch_queue.put((target, img)) # Blocks if full idx_to_process = target # Crucial fix: Sleep briefly to release CPU time.sleep(0.1) def load_current_image(self, use_prefetch=False): if self.index >= len(self.files): print("All files processed.") self.root.destroy() return filename = self.files[self.index] self.is_processing = False img_found = None if use_prefetch and not self.prefetch_queue.empty(): q_idx, q_img = self.prefetch_queue.queue[0] if q_idx == self.index: _, img_found = self.prefetch_queue.get() self.current_shift = 0 print(f"Loaded {filename} from prefetch.") if img_found: self.current_pil = img_found save_image(self.current_pil, filename) self.update_display(filename) else: # Not in queue (first load or queue mismatch), process manually self.trigger_processing(filename, self.current_shift) def trigger_processing(self, filename, shift): """Starts a thread to process image so GUI doesn't freeze.""" self.is_processing = True self.label_info.configure(text=f"Processing {filename} (Shift {shift})... Please wait.", fg="red") def worker(): img = process_single_pdf(filename, shift) self.manual_queue.put(img) Thread(target=worker, daemon=True).start() self.check_manual_queue(filename) def check_manual_queue(self, filename): """Polls the manual queue for result.""" try: img = self.manual_queue.get_nowait() self.current_pil = img if self.current_pil: save_image(self.current_pil, filename) self.update_display(filename) else: print(f"Failed to process {filename}, skipping.") self.index += 1 self.load_current_image(use_prefetch=True) self.is_processing = False except Empty: # Check again in 100ms self.root.after(100, lambda: self.check_manual_queue(filename)) def update_display(self, filename): if self.current_pil: tk_image = ImageTk.PhotoImage(self.current_pil) self.label_img.configure(image=tk_image) self.label_img.image = tk_image self.label_info.configure( text=f"[{self.index+1}/{len(self.files)}] {filename} | Shift: {self.current_shift}px\n" f"Enter: Next | n: +50 | N: +100 | t: -50", fg="black" ) def on_shift(self, amount): if self.is_processing: return # Ignore keys while processing self.current_shift += amount print(f"Applying shift: {self.current_shift}") self.trigger_processing(self.files[self.index], self.current_shift) def on_next(self, event): if self.is_processing: return self.index += 1 self.current_shift = 0 self.load_current_image(use_prefetch=True) # --- Entry Point --- if __name__ == "__main__": if not files: print("No PDF files found.") else: app = ImageReviewer(files)