Using folders "Copies" and "Par label", hopefully

2026-05-17 11:25:36 +02:00 · 2026-05-17 11:25:36 +02:00 · c6c1a052e1
parent 7e7045293a
commit c6c1a052e1
12 changed files with 142 additions and 95 deletions
--- a/Readme.org
+++ b/Readme.org
@ -1,7 +1,7 @@
 #+title:  Script
 #+author: Sébastien Miquel
 #+date:   14-03-2026
-# Time-stamp: <14-05-26 08:55>
+# Time-stamp: <17-05-26 10:51>
 #+OPTIONS:

 * Méta
@ -101,13 +101,13 @@ export GEMINI_API_KEY=…

    Les key bindings ne sont pas adaptés à un clavier azerty… À changer…

-    Fix issues with =python page_splitter.py Interro14/Copie01.pdf=
+    Fix issues with =python page_splitter.py Interro14/Copies/Copie01.pdf=
 4. =python cutleft.py Interro=

    Découpe la partie gauche des copies, là où il devrait y avoir les
    labels des exercices/questions.

-    Rerun on a single file with =python cutleft.py Interro/Copie01.pdf=
+    Rerun on a single file with =python cutleft.py Interro/Copies/Copie01.pdf=

 ** Génération d'information sur l'énoncé

@ -136,7 +136,7 @@ Set proxy with ~export HTTPS_PROXY="http://10.0.0.1:3128"~
      + `|…` n'est pas arrêté verticalement par son type opposé.
      + `…|` est stoppé horizontalement par le `|…` le plus proche.
    Pour modifier une seule copie :
- =python plotting.py Interro/Copie01.pdf=
+ =python plotting.py Interro/Copies/Copie01.pdf=

    It also generates les =Copie01.json=, à partir des =Copie01_01.json=
    1. En cas de soucis, (par exemple les pages ne sont pas dans le bon ordre)
@ -147,6 +147,7 @@ Set proxy with ~export HTTPS_PROXY="http://10.0.0.1:3128"~
 3. =python splitting_int.py Interro=

    Découpe les copies suivant les exercices
+    Peut-être appelé avec une seule copie.
 4. =python grouping.py Interro=

    Regroupe les mêmes questions de différentes copies en groupes de
@ -159,7 +160,7 @@ Set proxy with ~export HTTPS_PROXY="http://10.0.0.1:3128"~
 1. Il faut créer des persp, pour indication de comment corriger, et
    relancer =enonce_info.py=
 2. =python correction.py Interro --limit 240= OU
- =python correction.py Interro/Ex\ 2/Group_1.jpg= OU
+ =python correction.py Interro/Par\ label/Ex\ 2/Group_1.jpg= OU
 =python correction.py Interro --overwrite=
 =python correction.py Interro --pro-by-label= (needs `labels_for_pro`)

--- a/annotating.py
+++ b/annotating.py
@ -37,7 +37,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
            # Find coordinates
            coordinates = None
            height,width= None, None
-            label_dir = os.path.join(root_dir, label)
+            label_dir = Path(root_dir) / "Par label" / label

            # Search all json files in Dir/label
            json_files = glob.glob(os.path.join(label_dir, "*.json"))
@ -59,7 +59,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
                    break

            # Construct PDF path: Dir/Copie{id}/{label}.pdf
-            pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
+            pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"

            # Initialize dictionary structure for this ID if missing
            if student_id not in result_data:
@ -89,8 +89,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
                # On ajoute des dummies
                if labels_to_redo: # Si la liste est non vide
                    for lbl in labels_to_redo:
-                        pdf_path = os.path.join(root_dir,
-                                                f"Copie{sid}", f"{lbl}.pdf")
+                        pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf"
                        if not Path(pdf_path).exists():
                            print("Debug : asked to refaire", sid, lbl, "but pdf absent")
                            continue
@ -107,8 +106,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
            else: # Ce student id n'a jamais été corrigé
                result_data[sid] = {}
                for lbl in labels_to_redo:
-                    pdf_path = os.path.join(root_dir,
-                                            f"Copie{sid}", f"{lbl}.pdf")
+                    pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf"
                    if not pdf_path.exists():
                        print("Debug : asked to refaire", sid, lbl, "but pdf absent")
                        continue
@ -567,13 +565,13 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite):
    d_notes = dict.fromkeys(all_labels, "")
    label_images = []

+    # !! Trier par l'ordre des labels plutôt
    sorted_labels = sorted(list(labels_data.items()), key=natural_key)

    for label, content in sorted_labels:
        # 1. Find PDF path
        copie_folder = f"Copie{student_id}"
-        pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf")
-        pdf_full_path = os.path.join(root_dir, pdf_rel_path)
+        pdf_full_path = Path(root_dir) / "Copies" / copie_folder / f"{label}.pdf"

        if not os.path.exists(pdf_full_path):
            print(f"File not found: {pdf_full_path}")
@ -629,13 +627,14 @@ def process_correction(root_dir, data, all_labels, overwrite=False):
    #     # Wait for all threads to complete
    #     concurrent.futures.wait(futures)

-    # Ne pas thread cette applications
+    # Ne pas thread cette application
    #  1. Il faut protéger les appels à matplotlib
    #  2. tu vas perdre les erreurs
    for student_id, labels in sorted(data.items()):
        process_student(student_id, labels, root_dir, all_labels, overwrite)

 import argparse
+import utils

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Annotate copies")
@ -644,7 +643,7 @@ if __name__ == "__main__":

    args = parser.parse_args()
    root_dir = args.root_dir
-    labels = list(filter(None, (Path(root_dir) / "labels").read_text().splitlines()))
+    labels = utils.read_all_labels(root_dir)
    results = make_dictionary(root_dir)
    # Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates}
    # Coordinates are the real coordinates (hmin, hmax) of the image in the Group
--- a/correction.py
+++ b/correction.py
@ -38,14 +38,20 @@ for path_str in args.paths:
        # Handle individual file
        # Note: assumes structure InterroTest/Ex 2/Group_1.jpg to get parents[1]
        label = arg_path.parent.name
+        INPUT_DIR = arg_path.parent.parent.parent
+        COPIES_DIR = INPUT_DIR / "Copies"
+        GROUPS_DIR = INPUT_DIR / "Par label"
        tasks.append((str(arg_path), label))
        if label not in results:
            results[label] = []

    elif arg_path.is_dir():
+        INPUT_DIR = arg_path
+        COPIES_DIR = INPUT_DIR / "Copies"
+        GROUPS_DIR = INPUT_DIR / "Par label"
        # Handle directory (original behavior)
-        for sub in arg_path.iterdir():
-            if sub.is_dir() and sub.name.startswith("Ex"):
+        for sub in GROUPS_DIR.iterdir():
+            if sub.is_dir():
                label = sub.name
                if label not in results:
                    results[label] = []
@ -145,7 +151,7 @@ do not score or give feedback to any other question."""

 def make_prompt(full_label):
    def read_longest_prefix_file(subdir):
-        dir_path = Path(INPUT_DIR) / subdir
+        dir_path = INPUT_DIR / subdir
        matches = [f for f in dir_path.iterdir()
                   if f.is_file()
                   and full_label.startswith(f.name)
@ -167,7 +173,6 @@ from google.genai import types
 import base64
 import shlex
 import json
-from pathlib import Path
 import os
 import threading
 import concurrent.futures
@ -210,7 +215,7 @@ def flush_thread_log(tid=None):
    tid = tid or threading.current_thread().name
    with log_lock:
        if thread_logs.get(tid):
-            with open(Path(INPUT_DIR) / "correction_log", "a", encoding="utf-8") as f:
+            with open(INPUT_DIR / "correction_log", "a", encoding="utf-8") as f:
                f.write(f"--- Task Log [{tid}] ---\n")
                f.write("\n".join(thread_logs[tid]) + "\n\n")
            thread_logs[tid].clear()
@ -311,8 +316,8 @@ def generate_request(file, full_label):
    return (contents, generate_content_config)

 client = genai.Client(api_key=api_key)
-output_path = Path(INPUT_DIR) / "correction.json"
-progress_path = Path(INPUT_DIR) / "correction_progress.json"
+output_path = INPUT_DIR / "correction.json"
+progress_path = INPUT_DIR / "correction_progress.json"
 start_time = time.time()
 overwrite = args.overwrite
 limit = args.limit
@ -407,9 +412,9 @@ def get_single_image_bytes(pdf_path):
    return img_byte_arr.getvalue()

 def correct_boxes_with_gemini(pid, label, original_feedbacks,
-                              root_dir, yming, ymaxg, width_r, total_height):
+                              yming, ymaxg, width_r, total_height):
    """Requests corrected bounding boxes from Gemini Flash on the single image."""
-    pdf_path = Path(root_dir) / f"Copie{pid}" / f"{label}.pdf"
+    pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
    img_bytes = get_single_image_bytes(pdf_path)

    localized_feedbacks = [f for f in original_feedbacks if f["box_2d"]]
@ -473,9 +478,9 @@ it goes wrong, and the feedback is what went wrong.
 import shutil
 import grouping

-def get_next_group_idx(root_dir, label):
+def get_next_group_idx(label):
    """Finds the next available Group index for a given label."""
-    target_folder = Path(root_dir) / label
+    target_folder = GROUPS_DIR / label
    target_folder.mkdir(exist_ok=True)
    existing = list(target_folder.glob("Group_*.jpg"))
    if not existing: return 0
@ -489,7 +494,7 @@ def handle_label_errors(pid, label, res, pdf_path):
    error_type = res.get("error")

    all_labels = read_all_labels(INPUT_DIR)
-    labels_txt = (Path(INPUT_DIR) / "labels").read_text(encoding="utf-8", errors="replace")
+    labels_txt = (INPUT_DIR / "labels").read_text(encoding="utf-8", errors="replace")
    enonce = enonce_total(INPUT_DIR)

    if error_type == "wrong-label":
@ -523,7 +528,7 @@ Here is a list of all possible labels. You need to answer with one of these :
        if new_label == label:
            res["error"] = ""
            return []
-        new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{new_label}.pdf"
+        new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf"
        if new_pdf_path.exists():
            tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
            res["error"] = f"wrg-lbl:{new_label}?exists"
@ -533,12 +538,12 @@ Here is a list of all possible labels. You need to answer with one of these :
            shutil.move(str(pdf_path), str(new_pdf_path))
            # Since we moved the file, this Copie/label should not be taken
            # into account in the future, I think
-            idx = get_next_group_idx(INPUT_DIR, new_label)
+            idx = get_next_group_idx(new_label)
            height = grouping.get_pdf_height(str(new_pdf_path))
            grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)],
-                                INPUT_DIR)
+                                GROUPS_DIR)
            tprint(f"\t\tMaking {new_label} group {idx+1}")
-            new_tasks.append((str(Path(INPUT_DIR) / new_label / f"Group_{idx+1}.jpg"),
+            new_tasks.append((str(GROUPS_DIR / new_label / f"Group_{idx+1}.jpg"),
                              new_label, False))

    elif error_type == "additional-answer":
@ -580,15 +585,15 @@ Here is a list of all possible labels. You need to answer with a list one of the
                error += f"{add_label}??"
                keep_error = True
                continue
-            new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{add_label}.pdf"
+            new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf"
            if not new_pdf_path.exists():
                shutil.copy(str(pdf_path), str(new_pdf_path))
                tprint(f"\t\tCopying Copie{pid} : {label} -> {add_label}")
-                idx = get_next_group_idx(INPUT_DIR, add_label)
+                idx = get_next_group_idx(add_label)
                tprint(f"\t\tMaking {add_label} group {idx+1}")
                height = grouping.get_pdf_height(str(new_pdf_path))
-                grouping.create_jpg(add_label, idx, [(pid, str(new_pdf_path), height)], INPUT_DIR)
-                new_tasks.append((str(Path(INPUT_DIR) / add_label / f"Group_{idx+1}.jpg"),
+                grouping.create_jpg(add_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR)
+                new_tasks.append((str(GROUPS_DIR / add_label / f"Group_{idx+1}.jpg"),
                                  add_label, False))
                error += f"(->){add_label}"
                keep_error = True
@ -657,7 +662,7 @@ def process_single_task(task_tuple, precomputed_response=None):
                res = p["result"]
                yming, ymaxg, width_r = d_data[pid]

-                pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{label}.pdf"
+                pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
                if (not can_spawn_tasks) and res["error"] == "additional-answer":
                    tprint("\tSwallowing an additional-answer from a subsequent task.")
                    res["error"]= ""
@ -680,17 +685,22 @@ def process_single_task(task_tuple, precomputed_response=None):
                                  pid, label, group_name)
                            continue

-                        if (ymin < yming - 50 or
-                            ymax > ymaxg + 50 or
-                            xmax / 1000 > width_r):
+                        if (ymin < yming - 50 or ymax > ymaxg + 50 or xmax / 1000 > width_r):
                            needs_correction.append(i)
                            break
+                        if ymin < yming - 5:
+                            ymin = yming - 5
+                            b[0] = ymin * 1000 // total_height
+                        if ymax > ymaxg + 5:
+                            ymax = ymaxg + 5
+                            b[2] = ymax * 1000 // total_height
+

                if needs_correction:
                    tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...")
                    try:
                        res["feedback"] = correct_boxes_with_gemini(
-                            pid, label, res["feedback"], INPUT_DIR,
+                            pid, label, res["feedback"],
                            yming, ymaxg, width_r, total_height)
                    except Exception as e:
                        tprint(f"\tCorrection failed for Copie {pid}, {group_name} : {e}\n\tRemoving the boxes")
@ -726,8 +736,8 @@ def process_single_task(task_tuple, precomputed_response=None):

 if __name__ == "__main__":
    if args.refaire:
-        refaire_path = Path(INPUT_DIR) / "refaire.json"
-        overwritten_path = Path(INPUT_DIR) / "overwritten_correction.json"
+        refaire_path = INPUT_DIR / "refaire.json"
+        overwritten_path = INPUT_DIR / "overwritten_correction.json"

        if refaire_path.exists():
            with open(refaire_path, "r", encoding="utf-8") as f:
@ -742,7 +752,7 @@ if __name__ == "__main__":

            for copie_name, labels in refaire_list:
                pid = copie_name.replace("Copie", "")
-                copie_dir = Path(INPUT_DIR) / copie_name
+                copie_dir = COPIES_DIR / copie_name

                # If list is empty, redo all labels available for this Copie
                if not labels:
@ -772,10 +782,10 @@ if __name__ == "__main__":
                    # 2. Make new group and add to tasks
                    pdf_path = copie_dir / f"{label}.pdf"
                    if pdf_path.exists():
-                        idx = get_next_group_idx(INPUT_DIR, label)
+                        idx = get_next_group_idx(label)
                        height = grouping.get_pdf_height(str(pdf_path))
-                        grouping.create_jpg(label, idx, [(pid, str(pdf_path), height)], INPUT_DIR)
-                        new_group_path = str(Path(INPUT_DIR) / label / f"Group_{idx+1}.jpg")
+                        grouping.create_jpg(label, idx, [(pid, str(pdf_path), height)], GROUPS_DIR)
+                        new_group_path = str(GROUPS_DIR / label / f"Group_{idx+1}.jpg")
                        tasks_to_process.append((new_group_path, label))

            if dirty_results:
@ -813,8 +823,8 @@ if __name__ == "__main__":
            tasks_to_process = [] # Run nothing live if just `--batch`

        if batch_tasks:
-            batch_flash_file = Path(INPUT_DIR) / "batch_requests_flash.jsonl"
-            batch_pro_file = Path(INPUT_DIR) / "batch_requests_pro.jsonl"
+            batch_flash_file = INPUT_DIR / "batch_requests_flash.jsonl"
+            batch_pro_file = INPUT_DIR / "batch_requests_pro.jsonl"

            count_flash = 0
            count_pro = 0
@ -873,7 +883,7 @@ if __name__ == "__main__":

    batched_responses = {}
    if args.deal_with_batched:
-        batch_results_path = Path(INPUT_DIR) / "batched_correction_result.jsonl"
+        batch_results_path = INPUT_DIR / "batched_correction_result.jsonl"
        if batch_results_path.exists():
            print(f"Loading batch results from {batch_results_path}...")
            with open(batch_results_path, "r", encoding="utf-8") as f:
--- a/cutleft.py
+++ b/cutleft.py
@ -20,17 +20,35 @@ if len(sys.argv) < 2:
 path_arg = sys.argv[1]
 files = []
 INPUT_DIR = ""
+COPIES_DIR = ""

 if os.path.isfile(path_arg) and path_arg.lower().endswith('.pdf'):
-    INPUT_DIR = os.path.dirname(path_arg)
+    COPIES_DIR = os.path.abspath(os.path.dirname(path_arg))
+    # If the file is inside a "Copies" folder, set INPUT_DIR to the parent
+    if os.path.basename(COPIES_DIR).lower() == 'copies':
+        INPUT_DIR = os.path.dirname(COPIES_DIR)
+    else:
+        INPUT_DIR = COPIES_DIR
    files = [os.path.basename(path_arg)]
 elif os.path.isdir(path_arg):
-    INPUT_DIR = path_arg
-    files = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith('.pdf') and
-                    "nonc" not in f.lower()])
+    # Support passing either the base dir or the Copies dir directly
+    abs_path = os.path.abspath(path_arg)
+    if os.path.basename(abs_path).lower() == 'copies':
+        COPIES_DIR = abs_path
+        INPUT_DIR = os.path.dirname(abs_path)
+    else:
+        INPUT_DIR = abs_path
+        COPIES_DIR = os.path.join(INPUT_DIR, 'Copies')
+
+    if os.path.exists(COPIES_DIR):
+        files = sorted([f for f in os.listdir(COPIES_DIR) if f.lower().endswith('.pdf') and
+                        "nonc" not in f.lower()])
+    else:
+        sys.exit(f"Error: Could not find 'Copies' directory inside {INPUT_DIR}")
 else:
    sys.exit("Error: Input must be a directory or a PDF file.")

+
 OUTPUT_DIR = os.path.join(INPUT_DIR, 'Cutleft')

 if not os.path.exists(OUTPUT_DIR):
@ -90,7 +108,7 @@ pdf_cache_lock = threading.Lock()

@lru_cache(maxsize=3)
 def _get_pdf_pages_cached(filename):
-    pdf_path = os.path.join(INPUT_DIR, filename)
+    pdf_path = os.path.join(COPIES_DIR, filename)
    return convert_from_path(pdf_path)

 def get_pdf_pages(filename):
--- a/gemini_for_labels.py
+++ b/gemini_for_labels.py
@ -250,7 +250,7 @@ def process_copy_group(group_key, files):
    for image_file in files:
        start_time = time.time()
        base_name = image_file.stem
-        output_json = INPUT_DIR / f"{base_name}.json"
+        output_json = INPUT_DIR / "Copies" / f"{base_name}.json"

        # Check existing
        if output_json.exists() and not args.overwrite:
--- a/grouping.py
+++ b/grouping.py
@ -3,6 +3,7 @@ import json
 import re
 import sys
 import shutil
+from pathlib import Path
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor
 from PIL import Image, ImageDraw, ImageFont
@ -213,9 +214,9 @@ def create_jpg(identifier, group_index, group, root_dir):
 from utils import natural_key


-def process_identifier(identifier, files_info, root_dir):
+def process_identifier(identifier, files_info, output_dir):
    # Clear output directory if it exists
-    target_folder = os.path.join(root_dir, identifier)
+    target_folder = os.path.join(output_dir, identifier)
    if os.path.exists(target_folder):
        shutil.rmtree(target_folder)
    os.makedirs(target_folder, exist_ok=True)
@ -224,27 +225,31 @@ def process_identifier(identifier, files_info, root_dir):
    file_groups = group_files(files_info)

    for idx, group in enumerate(file_groups):
-        create_jpg(identifier, idx, group, root_dir)
+        create_jpg(identifier, idx, group, output_dir)

 def main():
    if len(sys.argv) < 2:
        print("Usage: python app.py <Path_to_Dir>")
        sys.exit(1)

-    root_dir = sys.argv[1]
+    root_dir = Path(sys.argv[1])
+
+    copies_dir = root_dir / "Copies"
+    par_label_dir = root_dir / "Par label"

    print("Scanning files...")
-    data = collect_files(root_dir)
+    data = collect_files(copies_dir)

    print(f"Found {len(data)} identifiers. Processing...")

    # Sort identifiers naturally
    sorted_identifiers = sorted(data.keys(), key=natural_key)

-    # Process using 4 threads
-    with ThreadPoolExecutor(max_workers=4) as executor:
+    # Process using 8 threads
+    with ThreadPoolExecutor(max_workers=8) as executor:
        for identifier in sorted_identifiers:
-            executor.submit(process_identifier, identifier, data[identifier], root_dir)
+            executor.submit(process_identifier, identifier, data[identifier],
+                            par_label_dir)

    print("Done.")

--- a/page_splitter.py
+++ b/page_splitter.py
@ -63,6 +63,9 @@ class PDFPreviewer:
            # Check for existing original in backup and restore if found
            dir_name = os.path.dirname(os.path.abspath(path))
            file_name = os.path.basename(path)
+            if os.path.basename(dir_name) == "Copies":
+                dir_name = os.path.dirname(dir_name)
+                path = os.path.join(dir_name, file_name)
            backup_path = os.path.join(dir_name, "Copies Originales", file_name)

            if os.path.exists(backup_path):
@ -313,9 +316,12 @@ class PDFPreviewer:
            file_name = os.path.basename(abs_path)

            backup_dir = os.path.join(dir_name, "Copies Originales")
+            copies_dir = os.path.join(dir_name, "Copies")
            os.makedirs(backup_dir, exist_ok=True)
+            os.makedirs(copies_dir, exist_ok=True)

            backup_path = os.path.join(backup_dir, file_name)
+            copies_path = os.path.join(copies_dir, file_name)

            # Remove backup if it already exists (overwrite)
            if os.path.exists(backup_path):
@ -325,7 +331,7 @@ class PDFPreviewer:
            shutil.move(self.pdf_path, backup_path)

            # Move the temp output file to replace the original
-            shutil.move(self.final_file, self.pdf_path)
+            shutil.move(self.final_file, copies_path)

            # print(f"Original moved to {backup_path}, new file saved at {self.pdf_path}")

--- a/plotting.py
+++ b/plotting.py
@ -101,7 +101,7 @@ def worker_thread(base_dir, files_to_process, all_labels):
    previous_copie = None
    last_label_index = None
    for img_path in files_to_process:
-        json_path = base_dir / f"{img_path.stem}.json"
+        json_path = base_dir / "Copies" / f"{img_path.stem}.json"
        copie_part = int(img_path.stem[-2:])
        copie = img_path.stem[:-3]
        if copie != previous_copie:
@ -222,7 +222,7 @@ class ImageViewer:
    def save_current_batch(self):
        """Writes the accumulated data to the main JSON file."""
        if self.active_copie_name and self.accumulated_results:
-            main_json_path = self.base_dir / f"{self.active_copie_name}.json"
+            main_json_path = self.base_dir / "Copies" / f"{self.active_copie_name}.json"
            print(f"Writing aggregated result to {main_json_path}")
            with open(main_json_path, 'w') as f:
                json.dump(self.accumulated_results, f)
@ -327,7 +327,7 @@ class ImageViewer:
    def on_open_ori_pdf(self, event):
        if self.is_viewing and self.current_json_path:
            new_filename = self.current_json_path.stem.split('_')[0] + ".pdf"
-            pdf_path = self.current_json_path.parent / "Copies Originales" / new_filename
+            pdf_path = self.base_dir / "Copies Originales" / new_filename
            print(f"Opening {pdf_path}")
            subprocess.Popen(['xdg-open', str(pdf_path.absolute())])

@ -363,20 +363,21 @@ if __name__ == "__main__":
    files_to_process = []

    if input_path.is_file():
+        # Correctly identify base_dir if we are in 'Copies' or 'Cutleft'
+        if input_path.parent.name in ["Copies", "Cutleft"]:
+            base_dir = input_path.parent.parent
+        else:
+            base_dir = input_path.parent

-        base_dir = input_path.parent
        stem = input_path.stem
-        img_path = base_dir / "Cutleft" / f"{stem}.jpg"
-        files_to_process = [img_path]
-        if not img_path.exists() and input_path.parent.name == "Cutleft":
-             base_dir = input_path.parent.parent
-             img_path = input_path
-             files_to_process = [img_path]
-        if not img_path.exists():
-            # We're given Copie01.pdf, look for parts
-            cutleft_dir = base_dir / "Cutleft"
-            files_to_process = sorted(list(cutleft_dir.glob(f"{img_path.stem}_*.jpg")),
-                             key=natural_key)
+        cutleft_dir = base_dir / "Cutleft"
+        img_path = cutleft_dir / f"{stem}.jpg"
+
+        if img_path.exists():
+            files_to_process = [img_path]
+        else:
+            # We're given something like Copie01.pdf, look for its split image parts
+            files_to_process = sorted(list(cutleft_dir.glob(f"{stem}_*.jpg")), key=natural_key)
    else:
        base_dir = input_path
        cutleft_dir = base_dir / "Cutleft"
--- a/reading_annotations.py
+++ b/reading_annotations.py
@ -3,6 +3,7 @@ import os
 import json
 import numpy as np
 import shutil
+from pathlib import Path
 from PIL import Image, ImageChops, ImageFilter
 Image.MAX_IMAGE_PIXELS = None
 from pdf2image import convert_from_path
@ -99,7 +100,7 @@ def detect_checks_and_notes(output_dir):
        density = changed_pixels / roi.size

        if density > DENSITY_THRESHOLD:
-            print("A checked box !", density, b)
+            # print("A checked box !", density, b)
            actions.append(box)
            # It's checked, so we mask this area out for manual notes
            # Expand mask slightly to catch sloppy ticks
@ -254,7 +255,7 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye

        # B. Regenerate Label Image
        # We always regenerate to ensure Concat.jpg is consistent with any modifications
-        pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
+        pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
        if not os.path.exists(pdf_path): continue

        (base_img, _, _) = annotating.make_base_image(pdf_path)
@ -328,7 +329,6 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye
        full_img.save(os.path.join(output_dir, "Concat_F.jpg"))
        print(f"  Saved regenerated Concat_F.jpg")

-from pathlib import Path
 from utils import read_all_labels
 if __name__ == "__main__":
    if len(sys.argv) < 2:
--- a/reading_grouped_annotations.py
+++ b/reading_grouped_annotations.py
@ -85,7 +85,8 @@ def save_paginated_pdf(image_groups, output_path):
    if pages:
        pages[0].save(output_path, "PDF", resolution=100.0, save_all=True, append_images=pages[1:])

-def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, label_notes, all_labels):
+def apply_actions_and_regenerate_grouped(root_dir, data, student_id,
+                                         actions, label_notes, all_labels):
    """
    Modifies data based on actions, pastes label-specific note crops,
    regenerates label images for consistency, saves dirty ones,
@ -161,7 +162,7 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la
        result = content['result']
        d_notes[label] = str(result.get('score', 0))

-        pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
+        pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
        if not os.path.exists(pdf_path): continue

        (base_img, _, _) = annotating.make_base_image(pdf_path)
@ -204,13 +205,15 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la
        concat_list.append(final_img)

        perfect_no_comment = True
-        if float(d_notes[label]) != 4.0:
+        if float(d_notes[label]) < 4.0:
            perfect_no_comment = False
        else:
-            if len(result.get('feedback', [])) != 0:
-                perfect_no_comment = False
+            lfb = result.get('feedback', [])
+            for e in lfb:
+                if "to_delete" not in e or not e["to_delete"]:
+                    perfect_no_comment = False

-        if not perfect_no_comment:
+        if not perfect_no_comment or has_notes:
            extras = get_extra_pdfs_as_images(root_dir, label, annotating)
            extras.append(final_img)
            concat_list_F.append(extras)
@ -333,7 +336,8 @@ if __name__ == "__main__":
                if hmax > hmin:
                    crop = notes_img.crop((0, hmin, notes_img.width, hmax))
                    if has_significant_notes(crop):
-                        notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)}
+                        notes_by_student[sid][lbl] = {'img': crop,
+                                                      'old_header_h': img_info.get("header_height", 0)}


    def process_refaire_entry(sid, r_labels):
@ -364,7 +368,9 @@ if __name__ == "__main__":
                        if hmax > hmin:
                            crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax))
                            if has_significant_notes(crop):
-                                notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)}
+                                notes_by_student[sid][lbl] = \
+                                    {'img': crop,
+                                     'old_header_h': img_info.get("header_height", 0)}



--- a/splitting_int.py
+++ b/splitting_int.py
@ -37,7 +37,7 @@ def decode_json(pdf_file):
 def split_an_interro(base_dir, input_pdf, coords_list):
    doc = fitz.open(input_pdf)

-    output_dir = base_dir / input_pdf.stem
+    output_dir = base_dir / "Copies" / input_pdf.stem
    generated_files = set()
    parts_by_label = defaultdict(list)

@ -197,10 +197,13 @@ if __name__ == "__main__":

    if input_arg.is_file():
        base_dir = input_arg.parent
+        if base_dir.name == "Copies":
+            base_dir = base_dir.parent
        pdf_files = [input_arg]
    elif input_arg.is_dir():
        base_dir = input_arg
-        pdf_files = sorted(base_dir.glob("*.pdf"))
+        copies_dir = base_dir / "Copies"
+        pdf_files = sorted(copies_dir.glob("*.pdf"))
    else:
        print(f"Error: {input_arg} is not a valid file or directory.")
        sys.exit(1)
--- a/utils.py
+++ b/utils.py
@ -5,9 +5,7 @@ def natural_key(text):
    return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]

 def read_all_labels(base_dir):
-    return sorted(list(filter(None,
-                              (Path(base_dir) / "labels").read_text().splitlines())),
-                  key = natural_key)
+    return list(filter(None, (Path(base_dir) / "labels").read_text().splitlines()))

 def enonce_total(base_dir):
    text_dir = Path(base_dir) / 'Text'