diff --git a/grouping.py b/grouping.py index 18300c6..c99d310 100644 --- a/grouping.py +++ b/grouping.py @@ -4,6 +4,7 @@ import re import sys import shutil from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor from PIL import Image, ImageDraw, ImageFont from pdf2image import convert_from_path, pdfinfo_from_path @@ -105,40 +106,6 @@ def group_files(file_list): # Return list of lists (strip the metadata) return [g['items'] for g in groups] -# def group_files(file_list): -# """Groups files based on constraints.""" -# sorted_files = sorted(file_list, key=lambda x: x[0]) - -# groups = [] -# current_group = [] -# current_height = 0 - -# for item in sorted_files: -# dd, path, height = item - -# # Calculate added height (image + separator + approx text space) -# # We add separator height only if it's not the first image -# added_overhead = SEPARATOR_HEIGHT + 30 if current_group else 0 - -# # Check conditions -# if (len(current_group) >= MAX_GROUP_COUNT or -# (current_height + height + added_overhead) > MAX_GROUP_HEIGHT): - -# # Push current group and start new -# if current_group: -# groups.append(current_group) -# current_group = [] -# current_height = 0 -# added_overhead = 0 # Reset for first file of new group - -# current_group.append(item) -# current_height += height + added_overhead - -# if current_group: -# groups.append(current_group) - -# return groups - def stitch_pdf_pages(images_list): """Vertically concatenates a list of PIL images with no separator.""" if not images_list: @@ -242,6 +209,23 @@ def create_jpg(identifier, group_index, group, root_dir): print(f"Saved {output_path} with {len(group)} ({os.path.getsize(output_path)/1024/1024:.2f} MB)") +def natural_key(text): + return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))] + + +def process_identifier(identifier, files_info, root_dir): + # Clear output directory if it exists + target_folder = os.path.join(root_dir, identifier) + if os.path.exists(target_folder): + shutil.rmtree(target_folder) + os.makedirs(target_folder, exist_ok=True) + + # files_info is list of (dd, path, height) + file_groups = group_files(files_info) + + for idx, group in enumerate(file_groups): + create_jpg(identifier, idx, group, root_dir) + def main(): if len(sys.argv) < 2: print("Usage: python app.py ") @@ -254,20 +238,15 @@ def main(): print(f"Found {len(data)} identifiers. Processing...") - for identifier, files_info in data.items(): - # Clear output directory if it exists - target_folder = os.path.join(root_dir, identifier) - if os.path.exists(target_folder): - shutil.rmtree(target_folder) - os.makedirs(target_folder, exist_ok=True) + # Sort identifiers naturally + sorted_identifiers = sorted(data.keys(), key=natural_key) - # files_info is list of (dd, path, height) - file_groups = group_files(files_info) - - for idx, group in enumerate(file_groups): - create_jpg(identifier, idx, group, root_dir) + # Process using 4 threads + with ThreadPoolExecutor(max_workers=4) as executor: + for identifier in sorted_identifiers: + executor.submit(process_identifier, identifier, data[identifier], root_dir) print("Done.") - + if __name__ == "__main__": main()