diff --git a/cutleft.py b/cutleft.py index 1eb7393..7aa07af 100644 --- a/cutleft.py +++ b/cutleft.py @@ -1,4 +1,5 @@ import sys +from functools import lru_cache import os import time import json # Added for schema output @@ -82,14 +83,20 @@ def stitch_images(image_list): return combined +@lru_cache(maxsize=3) +def get_pdf_pages(filename): + """Caches the heavy PDF rendering step for the current and next files.""" + pdf_path = os.path.join(INPUT_DIR, filename) + return convert_from_path(pdf_path) + def process_single_pdf(filename, shift_offset=0, max_per_file=5): """ Converts PDF to stitched images. Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict) """ - pdf_path = os.path.join(INPUT_DIR, filename) + # pdf_path = os.path.join(INPUT_DIR, filename) try: - pages = convert_from_path(pdf_path) + pages = get_pdf_pages(filename) cropped_images = [] for img in pages: