Caching cutlef.py

master
Sébastien Miquel 2026-03-17 14:26:32 +01:00
parent ebc7a9aadc
commit 38ea0069af
1 changed files with 9 additions and 2 deletions

View File

@ -1,4 +1,5 @@
import sys
from functools import lru_cache
import os
import time
import json # Added for schema output
@ -82,14 +83,20 @@ def stitch_images(image_list):
return combined
@lru_cache(maxsize=3)
def get_pdf_pages(filename):
"""Caches the heavy PDF rendering step for the current and next files."""
pdf_path = os.path.join(INPUT_DIR, filename)
return convert_from_path(pdf_path)
def process_single_pdf(filename, shift_offset=0, max_per_file=5):
"""
Converts PDF to stitched images.
Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict)
"""
pdf_path = os.path.join(INPUT_DIR, filename)
# pdf_path = os.path.join(INPUT_DIR, filename)
try:
pages = convert_from_path(pdf_path)
pages = get_pdf_pages(filename)
cropped_images = []
for img in pages: