Caching cutlef.py

master
Sébastien Miquel 2026-03-17 14:26:32 +01:00
parent ebc7a9aadc
commit 38ea0069af
1 changed files with 9 additions and 2 deletions

View File

@ -1,4 +1,5 @@
import sys import sys
from functools import lru_cache
import os import os
import time import time
import json # Added for schema output import json # Added for schema output
@ -82,14 +83,20 @@ def stitch_images(image_list):
return combined return combined
@lru_cache(maxsize=3)
def get_pdf_pages(filename):
"""Caches the heavy PDF rendering step for the current and next files."""
pdf_path = os.path.join(INPUT_DIR, filename)
return convert_from_path(pdf_path)
def process_single_pdf(filename, shift_offset=0, max_per_file=5): def process_single_pdf(filename, shift_offset=0, max_per_file=5):
""" """
Converts PDF to stitched images. Converts PDF to stitched images.
Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict) Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict)
""" """
pdf_path = os.path.join(INPUT_DIR, filename) # pdf_path = os.path.join(INPUT_DIR, filename)
try: try:
pages = convert_from_path(pdf_path) pages = get_pdf_pages(filename)
cropped_images = [] cropped_images = []
for img in pages: for img in pages: