Faster grouping, in order, with threads
parent
b13ed34acf
commit
f3dc5c452a
71
grouping.py
71
grouping.py
|
|
@ -4,6 +4,7 @@ import re
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
from pdf2image import convert_from_path, pdfinfo_from_path
|
from pdf2image import convert_from_path, pdfinfo_from_path
|
||||||
|
|
||||||
|
|
@ -105,40 +106,6 @@ def group_files(file_list):
|
||||||
# Return list of lists (strip the metadata)
|
# Return list of lists (strip the metadata)
|
||||||
return [g['items'] for g in groups]
|
return [g['items'] for g in groups]
|
||||||
|
|
||||||
# def group_files(file_list):
|
|
||||||
# """Groups files based on constraints."""
|
|
||||||
# sorted_files = sorted(file_list, key=lambda x: x[0])
|
|
||||||
|
|
||||||
# groups = []
|
|
||||||
# current_group = []
|
|
||||||
# current_height = 0
|
|
||||||
|
|
||||||
# for item in sorted_files:
|
|
||||||
# dd, path, height = item
|
|
||||||
|
|
||||||
# # Calculate added height (image + separator + approx text space)
|
|
||||||
# # We add separator height only if it's not the first image
|
|
||||||
# added_overhead = SEPARATOR_HEIGHT + 30 if current_group else 0
|
|
||||||
|
|
||||||
# # Check conditions
|
|
||||||
# if (len(current_group) >= MAX_GROUP_COUNT or
|
|
||||||
# (current_height + height + added_overhead) > MAX_GROUP_HEIGHT):
|
|
||||||
|
|
||||||
# # Push current group and start new
|
|
||||||
# if current_group:
|
|
||||||
# groups.append(current_group)
|
|
||||||
# current_group = []
|
|
||||||
# current_height = 0
|
|
||||||
# added_overhead = 0 # Reset for first file of new group
|
|
||||||
|
|
||||||
# current_group.append(item)
|
|
||||||
# current_height += height + added_overhead
|
|
||||||
|
|
||||||
# if current_group:
|
|
||||||
# groups.append(current_group)
|
|
||||||
|
|
||||||
# return groups
|
|
||||||
|
|
||||||
def stitch_pdf_pages(images_list):
|
def stitch_pdf_pages(images_list):
|
||||||
"""Vertically concatenates a list of PIL images with no separator."""
|
"""Vertically concatenates a list of PIL images with no separator."""
|
||||||
if not images_list:
|
if not images_list:
|
||||||
|
|
@ -242,6 +209,23 @@ def create_jpg(identifier, group_index, group, root_dir):
|
||||||
|
|
||||||
print(f"Saved {output_path} with {len(group)} ({os.path.getsize(output_path)/1024/1024:.2f} MB)")
|
print(f"Saved {output_path} with {len(group)} ({os.path.getsize(output_path)/1024/1024:.2f} MB)")
|
||||||
|
|
||||||
|
def natural_key(text):
|
||||||
|
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
|
||||||
|
|
||||||
|
|
||||||
|
def process_identifier(identifier, files_info, root_dir):
|
||||||
|
# Clear output directory if it exists
|
||||||
|
target_folder = os.path.join(root_dir, identifier)
|
||||||
|
if os.path.exists(target_folder):
|
||||||
|
shutil.rmtree(target_folder)
|
||||||
|
os.makedirs(target_folder, exist_ok=True)
|
||||||
|
|
||||||
|
# files_info is list of (dd, path, height)
|
||||||
|
file_groups = group_files(files_info)
|
||||||
|
|
||||||
|
for idx, group in enumerate(file_groups):
|
||||||
|
create_jpg(identifier, idx, group, root_dir)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print("Usage: python app.py <Path_to_Dir>")
|
print("Usage: python app.py <Path_to_Dir>")
|
||||||
|
|
@ -254,20 +238,15 @@ def main():
|
||||||
|
|
||||||
print(f"Found {len(data)} identifiers. Processing...")
|
print(f"Found {len(data)} identifiers. Processing...")
|
||||||
|
|
||||||
for identifier, files_info in data.items():
|
# Sort identifiers naturally
|
||||||
# Clear output directory if it exists
|
sorted_identifiers = sorted(data.keys(), key=natural_key)
|
||||||
target_folder = os.path.join(root_dir, identifier)
|
|
||||||
if os.path.exists(target_folder):
|
|
||||||
shutil.rmtree(target_folder)
|
|
||||||
os.makedirs(target_folder, exist_ok=True)
|
|
||||||
|
|
||||||
# files_info is list of (dd, path, height)
|
# Process using 4 threads
|
||||||
file_groups = group_files(files_info)
|
with ThreadPoolExecutor(max_workers=4) as executor:
|
||||||
|
for identifier in sorted_identifiers:
|
||||||
for idx, group in enumerate(file_groups):
|
executor.submit(process_identifier, identifier, data[identifier], root_dir)
|
||||||
create_jpg(identifier, idx, group, root_dir)
|
|
||||||
|
|
||||||
print("Done.")
|
print("Done.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue