From 38ea0069af6e3f05dff1184d45e07b3d6cf8d357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= Date: Tue, 17 Mar 2026 14:26:32 +0100 Subject: [PATCH] Caching cutlef.py --- cutleft.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cutleft.py b/cutleft.py index 1eb7393..7aa07af 100644 --- a/cutleft.py +++ b/cutleft.py @@ -1,4 +1,5 @@ import sys +from functools import lru_cache import os import time import json # Added for schema output @@ -82,14 +83,20 @@ def stitch_images(image_list): return combined +@lru_cache(maxsize=3) +def get_pdf_pages(filename): + """Caches the heavy PDF rendering step for the current and next files.""" + pdf_path = os.path.join(INPUT_DIR, filename) + return convert_from_path(pdf_path) + def process_single_pdf(filename, shift_offset=0, max_per_file=5): """ Converts PDF to stitched images. Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict) """ - pdf_path = os.path.join(INPUT_DIR, filename) + # pdf_path = os.path.join(INPUT_DIR, filename) try: - pages = convert_from_path(pdf_path) + pages = get_pdf_pages(filename) cropped_images = [] for img in pages: