From 375305439aeadb7c32de32a145eaa3fe54fea5d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= <sebastien.miquel@posteo.eu>
Date: Wed, 25 Feb 2026 21:16:12 +0100
Subject: [PATCH] Faster annotating : 2 threads

---
 annotating.py | 124 ++++++++++++++++++++++++++++----------------------
 grouping.py   |   2 +-
 2 files changed, 70 insertions(+), 56 deletions(-)

diff --git a/annotating.py b/annotating.py
index 5eb1038..ff20930 100644
--- a/annotating.py
+++ b/annotating.py
@@ -385,74 +385,88 @@ def compose_label_image(base_img, label, result, hmin,
 def natural_key(text):
     return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
 
+import concurrent.futures
 
-def process_correction(root_dir, data, all_labels, overwrite=False):
-    for student_id, labels in data.items():
+def process_student(student_id, labels_data, root_dir, all_labels, overwrite):
+    """Helper function to process a single student."""
 
-        # Prepare output directory: Dir/Anot_CopieID
-        output_dir = os.path.join(root_dir, "Anot", f"Copie{student_id}")
+    # Prepare output directory: Dir/Anot_CopieID
+    output_dir = os.path.join(root_dir, "Anot", f"Copie{student_id}")
 
-        # Check if already processed (Concat.jpg exists)
-        concat_path = os.path.join(output_dir, "Concat.jpg")
-        if os.path.exists(concat_path) and not overwrite:
-            print(f"Skipping Copie {student_id} (Concat.jpg exists)")
+    # Check if already processed (Concat.jpg exists)
+    concat_path = os.path.join(output_dir, "Concat.jpg")
+    if os.path.exists(concat_path) and not overwrite:
+        print(f"Skipping Copie {student_id} (Concat.jpg exists)")
+        return
+
+    print("Processing :", student_id)
+
+    # Clean folder if re-processing
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir)
+
+    d_notes = dict.fromkeys(all_labels, "")
+    label_images = []
+
+    sorted_labels = sorted(list(labels_data.items()), key=natural_key)
+
+    for label, content in sorted_labels:
+        # 1. Find PDF path
+        copie_folder = f"Copie{student_id}"
+        pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf")
+        pdf_full_path = os.path.join(root_dir, pdf_rel_path)
+
+        if not os.path.exists(pdf_full_path):
+            print(f"File not found: {pdf_full_path}")
             continue
 
-        print("Processing :", student_id)
+        # 2. Convert PDF to Image
+        try:
+            (base_img, _, _) = make_base_image(pdf_full_path)
+        except Exception as e:
+            print(f"Error converting {pdf_full_path}: {e}")
+            continue
 
-        # Clean folder if re-processing
-        if os.path.exists(output_dir):
-            shutil.rmtree(output_dir)
-        os.makedirs(output_dir)
+        result = content.get('result', {})
+        coordinates = content.get('coordinates', (0, 0))  # (hmin, hmax)
+        score = result.get('score', 0)
+        d_notes[label] = str(score)
 
-        d_notes = dict.fromkeys(all_labels,"")
-        label_images = []
+        final_img, _ = compose_label_image(base_img, label, result, coordinates[0])
 
-        labels = sorted(list(labels.items()), key=natural_key)
+        # 7. Save Image
+        save_path = os.path.join(output_dir, f"{label}.jpg")
+        final_img.save(save_path)
+        label_images.append(final_img)
 
-        for label, content in labels:
-            # 1. Find PDF path
-            copie_folder = f"Copie{student_id}"
-            pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf")
-            pdf_full_path = os.path.join(root_dir, pdf_rel_path)
+    # Save scores
+    with open(os.path.join(output_dir, "score.json"), "w") as f:
+        json.dump(d_notes, f, indent=4)
 
-            if not os.path.exists(pdf_full_path):
-                print(f"File not found: {pdf_full_path}")
-                continue
+    # Concatenate
+    if label_images:
+        max_w = max(i.width for i in label_images)
+        total_h = sum(i.height for i in label_images)
+        canvas = Image.new('RGB', (max_w, total_h))
+        cy = 0
+        for img in label_images:
+            canvas.paste(img, (0, cy))
+            cy += img.height
+        canvas.save(concat_path)
 
-            # 2. Convert PDF to Image
-            try:
-                (base_img, _, _) = make_base_image(pdf_full_path)
-            except Exception as e:
-                print(f"Error converting {pdf_full_path}: {e}")
-                continue
 
-            result = content.get('result', {})
-            coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax)
-            score = result.get('score', 0)
-            d_notes[label] = str(score)
+def process_correction(root_dir, data, all_labels, overwrite=False):
+    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+        # Create a list of futures
+        futures = []
+        for student_id, labels in sorted(data.items()):
+            futures.append(
+                executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite)
+            )
 
-            final_img, _ = compose_label_image(base_img, label, result, coordinates[0])
-
-            # 7. Save Image
-            save_path = os.path.join(output_dir, f"{label}.jpg")
-            final_img.save(save_path)
-            label_images.append(final_img)
-
-        # Save scores
-        with open(os.path.join(output_dir, "score.json"), "w") as f:
-            json.dump(d_notes, f, indent=4)
-
-        # Concatenate
-        if label_images:
-            max_w = max(i.width for i in label_images)
-            total_h = sum(i.height for i in label_images)
-            canvas = Image.new('RGB', (max_w, total_h))
-            cy = 0
-            for img in label_images:
-                canvas.paste(img, (0, cy))
-                cy += img.height
-            canvas.save(concat_path)
+        # Wait for all threads to complete
+        concurrent.futures.wait(futures)
 
 import argparse
 if __name__ == "__main__":
diff --git a/grouping.py b/grouping.py
index c99d310..29a1175 100644
--- a/grouping.py
+++ b/grouping.py
@@ -247,6 +247,6 @@ def main():
             executor.submit(process_identifier, identifier, data[identifier], root_dir)
 
     print("Done.")
-    
+
 if __name__ == "__main__":
     main()