Faster grouping, in order, with threads

master
Sébastien Miquel 2026-02-25 21:07:28 +01:00
parent b13ed34acf
commit f3dc5c452a
1 changed files with 25 additions and 46 deletions

View File

@ -4,6 +4,7 @@ import re
import sys
import shutil
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from PIL import Image, ImageDraw, ImageFont
from pdf2image import convert_from_path, pdfinfo_from_path
@ -105,40 +106,6 @@ def group_files(file_list):
# Return list of lists (strip the metadata)
return [g['items'] for g in groups]
# def group_files(file_list):
# """Groups files based on constraints."""
# sorted_files = sorted(file_list, key=lambda x: x[0])
# groups = []
# current_group = []
# current_height = 0
# for item in sorted_files:
# dd, path, height = item
# # Calculate added height (image + separator + approx text space)
# # We add separator height only if it's not the first image
# added_overhead = SEPARATOR_HEIGHT + 30 if current_group else 0
# # Check conditions
# if (len(current_group) >= MAX_GROUP_COUNT or
# (current_height + height + added_overhead) > MAX_GROUP_HEIGHT):
# # Push current group and start new
# if current_group:
# groups.append(current_group)
# current_group = []
# current_height = 0
# added_overhead = 0 # Reset for first file of new group
# current_group.append(item)
# current_height += height + added_overhead
# if current_group:
# groups.append(current_group)
# return groups
def stitch_pdf_pages(images_list):
"""Vertically concatenates a list of PIL images with no separator."""
if not images_list:
@ -242,19 +209,11 @@ def create_jpg(identifier, group_index, group, root_dir):
print(f"Saved {output_path} with {len(group)} ({os.path.getsize(output_path)/1024/1024:.2f} MB)")
def main():
if len(sys.argv) < 2:
print("Usage: python app.py <Path_to_Dir>")
sys.exit(1)
def natural_key(text):
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
root_dir = sys.argv[1]
print("Scanning files...")
data = collect_files(root_dir)
print(f"Found {len(data)} identifiers. Processing...")
for identifier, files_info in data.items():
def process_identifier(identifier, files_info, root_dir):
# Clear output directory if it exists
target_folder = os.path.join(root_dir, identifier)
if os.path.exists(target_folder):
@ -267,6 +226,26 @@ def main():
for idx, group in enumerate(file_groups):
create_jpg(identifier, idx, group, root_dir)
def main():
if len(sys.argv) < 2:
print("Usage: python app.py <Path_to_Dir>")
sys.exit(1)
root_dir = sys.argv[1]
print("Scanning files...")
data = collect_files(root_dir)
print(f"Found {len(data)} identifiers. Processing...")
# Sort identifiers naturally
sorted_identifiers = sorted(data.keys(), key=natural_key)
# Process using 4 threads
with ThreadPoolExecutor(max_workers=4) as executor:
for identifier in sorted_identifiers:
executor.submit(process_identifier, identifier, data[identifier], root_dir)
print("Done.")
if __name__ == "__main__":