Small fixes ; Make annotating not threaded

2026-02-28 14:02:56 +01:00 · 2026-02-28 14:02:56 +01:00 · e610c80a69
parent be390cfbb1
commit e610c80a69
4 changed files with 56 additions and 35 deletions
--- a/annotating.py
+++ b/annotating.py
@ -124,6 +124,11 @@ def normalize_mathtext(text):
    text = text.replace("\\\\", "\\")
    text = text.replace("\\llbracket", "[\\![")
    text = text.replace("\\rrbracket", "]\\!]")
+    text = text.replace("\\R", "\\mathbb{R}")
+    text = text.replace("\\N", "\\mathbb{N}")
+    text = text.replace("\\Z", "\\mathbb{Z}")
+    text = text.replace("\\C", "\\mathbb{C}")
+    text = text.replace("\\Q", "\\mathbb{Q}")
    # Sometimes, Gemini doesn't escape enough. In the json, you should have \\f
    text = text.replace('\f', r'\f')
    text = re.sub('\u0010', "", text)
@ -214,8 +219,10 @@ def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=N
    final_img.alpha_composite(img)
    return final_img

+import matplotlib.colors as mcolors
+
 def render_score_text(label, score, error, width_px, fontsize=18,
-                      bg_color=(255, 255, 255, 255)
+                      bg_color=(255, 255, 255, 255),
                      with_error=True):
    # 1. Calculate Color Gradient (0.0=DarkRed -> 4.0=Green)
    # Clamp score between 0 and 4
@ -303,7 +310,7 @@ def compose_label_image(base_img, label, result, hmin,
    header_elements = []

    img_score = render_score_text(label, score, error, base_img.width // 2,
-                                  fontsize=18, with_error)
+                                  fontsize=18, with_error=with_error)
    header_elements.append({"type": "score", "img": img_score, "data": result})

    # Global Feedbacks
@ -466,16 +473,22 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite):


 def process_correction(root_dir, data, all_labels, overwrite=False):
-    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
-        # Create a list of futures
-        futures = []
-        for student_id, labels in sorted(data.items()):
-            futures.append(
-                executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite)
-            )
+    # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+    #     # Create a list of futures
+    #     futures = []
+    #     for student_id, labels in sorted(data.items()):
+    #         futures.append(
+    #             executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite)
+    #         )

-        # Wait for all threads to complete
-        concurrent.futures.wait(futures)
+    #     # Wait for all threads to complete
+    #     concurrent.futures.wait(futures)
+
+    # Ne pas thread cette applications
+    #  1. Il faut protéger les appels à matplotlib
+    #  2. tu vas perdre les erreurs
+    for student_id, labels in sorted(data.items()):
+        process_student(student_id, labels, root_dir, all_labels, overwrite)

 import argparse
 if __name__ == "__main__":
--- a/correction.py
+++ b/correction.py
@ -254,9 +254,9 @@ def process_single_task(task_tuple):
    d_data = {l[0]: (l[1], l[2], l[3]) for l in group_data}
    total_height = group_data[-1][2]
    use_flash = n >= 4 or total_height <= 500
-    if not use_flash and limit is not None:
+    if not use_flash:
        with pro_lock:
-            if pro_count < limit:
+            if limit is None or pro_count < limit:
                pro_count += 1
            else:
                # Limit reached, force switch to Flash
@ -268,9 +268,9 @@ def process_single_task(task_tuple):
    try:
        contents, config = generate_request(file_path, label)
        if use_flash:
-            print(f"Asking Flash Gemini: {label} {file_path}")
+            print(f"Asking Gemini Flash: {label} {group_name}")
        else:
-            print(f"Asking Gemini: {label} {file_path}")
+            print(f"Asking Gemini Pro  : {label} {group_name}")

        full_response_text = ""
        # Assuming client is thread-safe (usually is).
@ -286,18 +286,13 @@ def process_single_task(task_tuple):
        # Parse JSON
        json_data = json.loads(full_response_text)

-        if use_flash:
-            print(f"Gemini Flash answered for {file_path}")
-        else:
-            print(f"Gemini answered for {file_path}")
-
        # print("Debug : ",  json_data)
        # Ensure consistency of answer placements
        for p in json_data:
            pid = p["id"]
            res = p["result"]
            if res["error"] != "":
-                print("Error :", res["error"], "for Copie", pid, label, group_name)
+                print("\tError :", res["error"], "for Copie", pid, label, group_name)
            for f in res["feedback"]:
                b = f["box_2d"]
                if b:
--- a/gemini_for_labels.py
+++ b/gemini_for_labels.py
@ -180,33 +180,41 @@ def natural_key(text):

 for path_str in args.input_paths:
    input_arg = Path(path_str)
+    target_files = []

+    # 1. Determine which files to process
    if input_arg.is_file():
-        INPUT_DIR = input_arg.parent
+        target_files = [input_arg]
+    elif input_arg.is_dir():
+        target_files = list(input_arg.glob("Copie*.pdf"))
+        if not target_files:
+            print(f"Warning: No Copie*.pdf files found in {input_arg}")
+    else:
+        print(f"Error: {input_arg} is not a valid file or directory.")
+        continue
+
+    # 2. Run the logic for all collected files
+    for target_file in target_files:
+        INPUT_DIR = target_file.parent
        CUTLEFT_DIR = INPUT_DIR / 'Cutleft'

        # Matches stem_01.jpg, stem_02.jpg, etc.
-        found_files = sorted(list(CUTLEFT_DIR.glob(f"{input_arg.stem}_*.jpg")),
-                             key=natural_key)
+        found_files = sorted(
+            CUTLEFT_DIR.glob(f"{target_file.stem}_*.jpg"),
+            key=natural_key
+        )

        if found_files:
            image_files.extend(found_files)
        else:
-            print(f"Warning: No variants found for {input_arg.stem} in {CUTLEFT_DIR}")
-
-    elif input_arg.is_dir():
-        INPUT_DIR = input_arg
-        CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
-        image_files.extend(sorted(list(CUTLEFT_DIR.glob("*.jpg")), key=natural_key))
-
-    else:
-        print(f"Error: {input_arg} is not a valid file or directory.")
+            print(f"Warning: No variants found for {target_file.stem} in {CUTLEFT_DIR}")

 labels_txt = (INPUT_DIR / "labels").read_text()
 valid_labels_set = set(line.strip() for line in labels_txt.splitlines() if line.strip())
 names_txt = (INPUT_DIR / "names").read_text()
 valid_names_set = set(line.strip() for line in names_txt.splitlines() if line.strip())
 valid_names_set.add("Unknown")
+valid_names_set.add("Continued")

 client = genai.Client(api_key=api_key)

@ -256,7 +264,7 @@ def process_copy_group(group_key, files):

        print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...")

-        for attempt in range(2)
+        for attempt in range(2):
            try:
                contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels)

--- a/reading_annotations.py
+++ b/reading_annotations.py
@ -16,7 +16,12 @@ def detect_checks_and_notes(output_dir):
        actions: List of dicts {type, label, ...} for checked boxes
        notes_img: RGBA image of manual notes (checks masked out)
    """
-    pdf_path = os.path.join(output_dir, "Concat_annotated.pdf")
+
+    names = ["Concat_annotated.pdf", "Concat_a.pdf"]
+    for name in names:
+        pdf_path = os.path.join(output_dir, name)
+        if os.path.exists(pdf_path):
+            break
    # ref_path = os.path.join(output_dir, "Reference.png")
    ref_path = os.path.join(output_dir, "Reference.jpg")
    json_path = os.path.join(output_dir, "checkboxes.json")