From 375305439aeadb7c32de32a145eaa3fe54fea5d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= Date: Wed, 25 Feb 2026 21:16:12 +0100 Subject: [PATCH] Faster annotating : 2 threads --- annotating.py | 124 ++++++++++++++++++++++++++++---------------------- grouping.py | 2 +- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/annotating.py b/annotating.py index 5eb1038..ff20930 100644 --- a/annotating.py +++ b/annotating.py @@ -385,74 +385,88 @@ def compose_label_image(base_img, label, result, hmin, def natural_key(text): return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))] +import concurrent.futures -def process_correction(root_dir, data, all_labels, overwrite=False): - for student_id, labels in data.items(): +def process_student(student_id, labels_data, root_dir, all_labels, overwrite): + """Helper function to process a single student.""" - # Prepare output directory: Dir/Anot_CopieID - output_dir = os.path.join(root_dir, "Anot", f"Copie{student_id}") + # Prepare output directory: Dir/Anot_CopieID + output_dir = os.path.join(root_dir, "Anot", f"Copie{student_id}") - # Check if already processed (Concat.jpg exists) - concat_path = os.path.join(output_dir, "Concat.jpg") - if os.path.exists(concat_path) and not overwrite: - print(f"Skipping Copie {student_id} (Concat.jpg exists)") + # Check if already processed (Concat.jpg exists) + concat_path = os.path.join(output_dir, "Concat.jpg") + if os.path.exists(concat_path) and not overwrite: + print(f"Skipping Copie {student_id} (Concat.jpg exists)") + return + + print("Processing :", student_id) + + # Clean folder if re-processing + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + d_notes = dict.fromkeys(all_labels, "") + label_images = [] + + sorted_labels = sorted(list(labels_data.items()), key=natural_key) + + for label, content in sorted_labels: + # 1. Find PDF path + copie_folder = f"Copie{student_id}" + pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf") + pdf_full_path = os.path.join(root_dir, pdf_rel_path) + + if not os.path.exists(pdf_full_path): + print(f"File not found: {pdf_full_path}") continue - print("Processing :", student_id) + # 2. Convert PDF to Image + try: + (base_img, _, _) = make_base_image(pdf_full_path) + except Exception as e: + print(f"Error converting {pdf_full_path}: {e}") + continue - # Clean folder if re-processing - if os.path.exists(output_dir): - shutil.rmtree(output_dir) - os.makedirs(output_dir) + result = content.get('result', {}) + coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax) + score = result.get('score', 0) + d_notes[label] = str(score) - d_notes = dict.fromkeys(all_labels,"") - label_images = [] + final_img, _ = compose_label_image(base_img, label, result, coordinates[0]) - labels = sorted(list(labels.items()), key=natural_key) + # 7. Save Image + save_path = os.path.join(output_dir, f"{label}.jpg") + final_img.save(save_path) + label_images.append(final_img) - for label, content in labels: - # 1. Find PDF path - copie_folder = f"Copie{student_id}" - pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf") - pdf_full_path = os.path.join(root_dir, pdf_rel_path) + # Save scores + with open(os.path.join(output_dir, "score.json"), "w") as f: + json.dump(d_notes, f, indent=4) - if not os.path.exists(pdf_full_path): - print(f"File not found: {pdf_full_path}") - continue + # Concatenate + if label_images: + max_w = max(i.width for i in label_images) + total_h = sum(i.height for i in label_images) + canvas = Image.new('RGB', (max_w, total_h)) + cy = 0 + for img in label_images: + canvas.paste(img, (0, cy)) + cy += img.height + canvas.save(concat_path) - # 2. Convert PDF to Image - try: - (base_img, _, _) = make_base_image(pdf_full_path) - except Exception as e: - print(f"Error converting {pdf_full_path}: {e}") - continue - result = content.get('result', {}) - coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax) - score = result.get('score', 0) - d_notes[label] = str(score) +def process_correction(root_dir, data, all_labels, overwrite=False): + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + # Create a list of futures + futures = [] + for student_id, labels in sorted(data.items()): + futures.append( + executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite) + ) - final_img, _ = compose_label_image(base_img, label, result, coordinates[0]) - - # 7. Save Image - save_path = os.path.join(output_dir, f"{label}.jpg") - final_img.save(save_path) - label_images.append(final_img) - - # Save scores - with open(os.path.join(output_dir, "score.json"), "w") as f: - json.dump(d_notes, f, indent=4) - - # Concatenate - if label_images: - max_w = max(i.width for i in label_images) - total_h = sum(i.height for i in label_images) - canvas = Image.new('RGB', (max_w, total_h)) - cy = 0 - for img in label_images: - canvas.paste(img, (0, cy)) - cy += img.height - canvas.save(concat_path) + # Wait for all threads to complete + concurrent.futures.wait(futures) import argparse if __name__ == "__main__": diff --git a/grouping.py b/grouping.py index c99d310..29a1175 100644 --- a/grouping.py +++ b/grouping.py @@ -247,6 +247,6 @@ def main(): executor.submit(process_identifier, identifier, data[identifier], root_dir) print("Done.") - + if __name__ == "__main__": main()