240 lines
7.7 KiB
Python
240 lines
7.7 KiB
Python
import sys
|
|
import os
|
|
import time
|
|
import tkinter as tk
|
|
from threading import Thread
|
|
from queue import Queue, Empty
|
|
from pdf2image import convert_from_path
|
|
from PIL import Image, ImageTk
|
|
|
|
# --- Configuration ---
|
|
DELIMITER_WIDTH = 5
|
|
DELIMITER_COLOR = (0, 0, 0)
|
|
OUTPUT_SIZE = (1000, 1000)
|
|
|
|
if len(sys.argv) < 2:
|
|
sys.exit("Usage: python script.py <directory_path_or_file_path>")
|
|
|
|
path_arg = sys.argv[1]
|
|
files = []
|
|
INPUT_DIR = ""
|
|
|
|
if os.path.isfile(path_arg) and path_arg.lower().endswith('.pdf'):
|
|
INPUT_DIR = os.path.dirname(path_arg)
|
|
files = [os.path.basename(path_arg)]
|
|
elif os.path.isdir(path_arg):
|
|
INPUT_DIR = path_arg
|
|
files = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith('.pdf')])
|
|
else:
|
|
sys.exit("Error: Input must be a directory or a PDF file.")
|
|
|
|
OUTPUT_DIR = os.path.join(INPUT_DIR, 'Cutleft')
|
|
|
|
if not os.path.exists(OUTPUT_DIR):
|
|
os.makedirs(OUTPUT_DIR)
|
|
|
|
# --- Processing Logic ---
|
|
|
|
def process_single_pdf(filename, shift_offset=0):
|
|
"""
|
|
Converts PDF to stitched JPG image (PIL object).
|
|
"""
|
|
pdf_path = os.path.join(INPUT_DIR, filename)
|
|
try:
|
|
pages = convert_from_path(pdf_path)
|
|
cropped_images = []
|
|
|
|
for img in pages:
|
|
width, height = img.size
|
|
left = 100 + shift_offset
|
|
right = (width // 3) + 100 + shift_offset
|
|
|
|
# Ensure crop box is valid
|
|
left = max(0, left)
|
|
right = min(width, right)
|
|
|
|
if right > left:
|
|
crop_box = (left, 0, right, height)
|
|
cropped = img.crop(crop_box)
|
|
cropped_images.append(cropped)
|
|
|
|
if not cropped_images:
|
|
return None
|
|
|
|
# Combine
|
|
num_images = len(cropped_images)
|
|
total_width = sum(img.width for img in cropped_images) + (num_images - 1) * DELIMITER_WIDTH
|
|
max_height = max(img.height for img in cropped_images)
|
|
|
|
combined = Image.new('RGB', (total_width, max_height), color=(255, 255, 255))
|
|
|
|
x_offset = 0
|
|
for idx, img in enumerate(cropped_images):
|
|
combined.paste(img, (x_offset, 0))
|
|
x_offset += img.width
|
|
if idx < num_images - 1:
|
|
delimiter = Image.new('RGB', (DELIMITER_WIDTH, max_height), color=DELIMITER_COLOR)
|
|
combined.paste(delimiter, (x_offset, 0))
|
|
x_offset += DELIMITER_WIDTH
|
|
|
|
# Resize
|
|
resized = combined.resize(OUTPUT_SIZE, Image.LANCZOS)
|
|
return resized
|
|
|
|
except Exception as e:
|
|
print(f"Error processing {filename}: {e}")
|
|
return None
|
|
|
|
def save_image(pil_img, filename):
|
|
output_filename = os.path.splitext(filename)[0] + ".jpg"
|
|
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
|
pil_img.save(output_path, "JPEG", quality=95)
|
|
print(f"Saved: {output_filename}")
|
|
|
|
# --- GUI Application ---
|
|
|
|
class ImageReviewer:
|
|
def __init__(self, file_list):
|
|
self.files = file_list
|
|
self.index = 0
|
|
self.current_shift = 0
|
|
self.current_pil = None
|
|
self.is_processing = False
|
|
|
|
# Queue for pre-fetched images (index, image)
|
|
self.prefetch_queue = Queue(maxsize=1)
|
|
# Queue for manual re-processing results
|
|
self.manual_queue = Queue()
|
|
|
|
# Setup GUI
|
|
self.root = tk.Tk()
|
|
self.root.title("PDF Cropper")
|
|
self.root.geometry("+100+100")
|
|
|
|
self.label_img = tk.Label(self.root)
|
|
self.label_img.pack()
|
|
|
|
self.label_info = tk.Label(self.root, text="", font=("Arial", 12, "bold"))
|
|
self.label_info.pack(pady=5)
|
|
|
|
# Bindings
|
|
self.root.bind('<Return>', self.on_next)
|
|
self.root.bind('n', lambda e: self.on_shift(50))
|
|
self.root.bind('N', lambda e: self.on_shift(100))
|
|
self.root.bind('t', lambda e: self.on_shift(-50))
|
|
|
|
# Start background pre-fetcher
|
|
self.bg_thread = Thread(target=self.prefetch_worker, daemon=True)
|
|
self.bg_thread.start()
|
|
|
|
# Load first image
|
|
self.load_current_image()
|
|
|
|
self.root.lift()
|
|
self.root.focus_force()
|
|
self.root.mainloop()
|
|
|
|
def prefetch_worker(self):
|
|
"""Background thread to process the NEXT image constantly."""
|
|
idx_to_process = 0
|
|
while True:
|
|
target = self.index + 1
|
|
if target < len(self.files):
|
|
if idx_to_process != target:
|
|
fname = self.files[target]
|
|
img = process_single_pdf(fname, shift_offset=0)
|
|
if img:
|
|
self.prefetch_queue.put((target, img)) # Blocks if full
|
|
idx_to_process = target
|
|
|
|
# Crucial fix: Sleep briefly to release CPU
|
|
time.sleep(0.1)
|
|
|
|
def load_current_image(self, use_prefetch=False):
|
|
if self.index >= len(self.files):
|
|
print("All files processed.")
|
|
self.root.destroy()
|
|
return
|
|
|
|
filename = self.files[self.index]
|
|
self.is_processing = False
|
|
|
|
img_found = None
|
|
|
|
if use_prefetch and not self.prefetch_queue.empty():
|
|
q_idx, q_img = self.prefetch_queue.queue[0]
|
|
if q_idx == self.index:
|
|
_, img_found = self.prefetch_queue.get()
|
|
self.current_shift = 0
|
|
print(f"Loaded {filename} from prefetch.")
|
|
|
|
if img_found:
|
|
self.current_pil = img_found
|
|
save_image(self.current_pil, filename)
|
|
self.update_display(filename)
|
|
else:
|
|
# Not in queue (first load or queue mismatch), process manually
|
|
self.trigger_processing(filename, self.current_shift)
|
|
|
|
def trigger_processing(self, filename, shift):
|
|
"""Starts a thread to process image so GUI doesn't freeze."""
|
|
self.is_processing = True
|
|
self.label_info.configure(text=f"Processing {filename} (Shift {shift})... Please wait.", fg="red")
|
|
|
|
def worker():
|
|
img = process_single_pdf(filename, shift)
|
|
self.manual_queue.put(img)
|
|
|
|
Thread(target=worker, daemon=True).start()
|
|
self.check_manual_queue(filename)
|
|
|
|
def check_manual_queue(self, filename):
|
|
"""Polls the manual queue for result."""
|
|
try:
|
|
img = self.manual_queue.get_nowait()
|
|
self.current_pil = img
|
|
if self.current_pil:
|
|
save_image(self.current_pil, filename)
|
|
self.update_display(filename)
|
|
else:
|
|
print(f"Failed to process {filename}, skipping.")
|
|
self.index += 1
|
|
self.load_current_image(use_prefetch=True)
|
|
self.is_processing = False
|
|
except Empty:
|
|
# Check again in 100ms
|
|
self.root.after(100, lambda: self.check_manual_queue(filename))
|
|
|
|
def update_display(self, filename):
|
|
if self.current_pil:
|
|
tk_image = ImageTk.PhotoImage(self.current_pil)
|
|
self.label_img.configure(image=tk_image)
|
|
self.label_img.image = tk_image
|
|
self.label_info.configure(
|
|
text=f"[{self.index+1}/{len(self.files)}] {filename} | Shift: {self.current_shift}px\n"
|
|
f"Enter: Next | n: +50 | N: +100 | t: -50",
|
|
fg="black"
|
|
)
|
|
|
|
def on_shift(self, amount):
|
|
if self.is_processing:
|
|
return # Ignore keys while processing
|
|
self.current_shift += amount
|
|
print(f"Applying shift: {self.current_shift}")
|
|
self.trigger_processing(self.files[self.index], self.current_shift)
|
|
|
|
def on_next(self, event):
|
|
if self.is_processing:
|
|
return
|
|
self.index += 1
|
|
self.current_shift = 0
|
|
self.load_current_image(use_prefetch=True)
|
|
|
|
# --- Entry Point ---
|
|
if __name__ == "__main__":
|
|
if not files:
|
|
print("No PDF files found.")
|
|
else:
|
|
app = ImageReviewer(files)
|
|
|