Miscs

2026-06-06 22:09:00 +02:00 · 2026-06-06 22:09:00 +02:00 · 27c0dae20e
parent a80187ba80
commit 27c0dae20e
9 changed files with 315 additions and 69 deletions
--- a/Readme.org
+++ b/Readme.org
@ -1,7 +1,7 @@
 #+title:  Script
 #+author: Sébastien Miquel
 #+date:   14-03-2026
-# Time-stamp: <06-06-26 10:10>
+# Time-stamp: <06-06-26 16:23>
 #+OPTIONS:
 * Méta
@ -85,7 +85,17 @@ export GEMINI_API_KEY=…
 6. Suivre les étapes plus bas.
 * Étapes et Script
-** Prétraitement
+** Prétraitement de l'énoncé
 - Dans le dossier de l'évaluation, mettre :
   + `enonce.pdf`
   + `enonce.tex`
   + `correction.tex`.
 - `python gemini_for_enonce.py Interro`
   Se charge de créer des dossiers `Text` et `Sol` avec
   + Le fichier `Text` contient 
 ** Prétraitement des copies
 1. =./rotate_all.sh Interro=
    (facultatif)
@ -248,7 +258,7 @@ OU
 4. On peut faire des changements manuels aux =score.json= ici, puis
    - `python reading_annotations.py --update-score Interro`
    - `python reading_grouped_annotations.py --update-score Interro`
-    pour mettre à jour les scores dans les images. 
+    pour mettre à jour les scores dans les images.
 4. (gestion perso)
    + =gestion_classe ne= pour créer l'interro puis
    + =gestion_classe we= (set barème here)
--- a/correction.py
+++ b/correction.py
@ -186,7 +186,7 @@ def call_gemini_with_retries(model_id, contents, config,
                tprint(f"\tGemini Pro minute limit hit. Waiting {wait_time:.1f}s...")
                time.sleep(wait_time)
                continue # Retry same model
-            
+
            # Immediately fallback to Flash without waiting if it's a Pro quota error
            if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
                tprint(f"\tGemini Pro quota hit ({e}). \n\n\tFalling back to Flash permanently...")
@ -269,8 +269,9 @@ def handle_label_errors(pid, label, res, pdf_path):
        new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf"
        if base_new_pdf_path.exists() or new_pdf_path.exists():
-            tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
+            tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists. Delaying.")
-            res["error"] = f"wrg-lbl:{new_label}?exists"
+            # res["error"] = f"wrg-lbl:{new_label}?exists"
            res["error"] = f"wrg-lbl:{new_label}?delayed"
        else:
            res["error"] = f"wrg-lbl-moved-to:{new_label}"
            tprint(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.")
@ -323,8 +324,9 @@ def handle_label_errors(pid, label, res, pdf_path):
                keep_error = True
            else:
                keep_error = True
-                error += f"(xx){add_label}"
+                # error += f"(xx){add_label}"
-                tprint(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}")
+                error += f"(delayed){add_label}"
                tprint(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}. Delaying.")
        if not keep_error:
            res["error"] = ""
        else:
@ -487,6 +489,80 @@ def process_single_task(task_tuple, precomputed_response=None):
    finally:
        flush_thread_log()
 def resolve_delayed_moves():
    """Scans the current results to find delayed moves and executes them if space was freed."""
    new_tasks = []
    with io_lock:
        for label, batches in results.items():
            for batch in batches:
                for p in batch:
                    err = p.get("result", {}).get("error", "")
                    if not err or ("?delayed" not in err and "(delayed)" not in err):
                        continue
                    pid = p["id"]
                    pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
                    if not pdf_path.exists():
                        if pdf_path.with_name(f"{label}_new.pdf").exists():
                            pdf_path = pdf_path.with_name(f"{label}_new.pdf")
                        elif pdf_path.with_name(f"{label}_old.pdf").exists():
                            pdf_path = pdf_path.with_name(f"{label}_old.pdf")
                    # 1. Résolution de wrong-label
                    if err.startswith("wrg-lbl:") and "?delayed" in err:
                        new_label = err.split(":")[1].split("?")[0]
                        base_new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf"
                        new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf"
                        # Si la place s'est libérée (l'ancien a été bougé vers _old)
                        if not base_new_pdf_path.exists() and not new_pdf_path.exists():
                            tprint(f"Resolving delayed move: Copie{pid} {label} -> {new_label}")
                            p["result"]["error"] = f"wrg-lbl-moved-to:{new_label}"
                            p["result"]["suffixe"] = "_old" # Très important pour l'ignorer ensuite
                            shutil.copy(str(pdf_path), str(new_pdf_path))
                            old_pdf_path = pdf_path.with_name(f"{label}_old.pdf")
                            if pdf_path != old_pdf_path:
                                shutil.move(str(pdf_path), str(old_pdf_path))
                            idx = get_next_group_idx(new_label)
                            height = grouping.get_pdf_height(str(new_pdf_path))
                            grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR)
                            new_tasks.append((str(GROUPS_DIR / new_label / f"Group_{idx+1}.jpg"), new_label, False))
                    # 2. Résolution de additional-answer
                    elif err.startswith("al:") and "(delayed)" in err:
                        import re
                        delayed_matches = re.findall(r'\(delayed\)([^?()]+)', err)
                        new_err = err
                        resolved_any = False
                        for add_label in delayed_matches:
                            base_add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf"
                            add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}_new.pdf"
                            if not base_add_pdf_path.exists() and not add_pdf_path.exists():
                                tprint(f"Resolving delayed additional-answer: Copie{pid} {label} -> {add_label}")
                                new_err = new_err.replace(f"(delayed){add_label}", f"(->){add_label}")
                                resolved_any = True
                                shutil.copy(str(pdf_path), str(add_pdf_path))
                                idx = get_next_group_idx(add_label)
                                height = grouping.get_pdf_height(str(add_pdf_path))
                                grouping.create_jpg(add_label, idx, [(pid, str(add_pdf_path), height)], GROUPS_DIR)
                                new_tasks.append((str(GROUPS_DIR / add_label / f"Group_{idx+1}.jpg"), add_label, False))
                        if resolved_any:
                            p["result"]["error"] = new_err
        if new_tasks:
            # Sauvegarder les modifications d'erreurs (les tags delayed enlevés)
            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(results, f, indent=2)
    return new_tasks
 if __name__ == "__main__":
    if args.refaire:
        refaire_path = INPUT_DIR / "refaire.json"
@ -666,24 +742,37 @@ if __name__ == "__main__":
        else:
            print(f"Warning: Batch results file {batch_results_path} not found.", file=sys.stderr)
-    print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...")
+    made_progress = True
-    with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor:
+    while tasks_to_process or made_progress:
-        futures = {}
+        if tasks_to_process:
-        for task in tasks_to_process:
+            print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...")
-            file_path = task[0]
+            with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor:
-            precomp = batched_responses.get(file_path)
+                futures = {}
-            futures[executor.submit(process_single_task, task, precomp)] = task
+                for task in tasks_to_process:
                    file_path = task[0]
                    precomp = batched_responses.get(file_path)
                    futures[executor.submit(process_single_task, task, precomp)] = task
-        # Process tasks as they complete, allowing dynamic task addition
+                for future in concurrent.futures.as_completed(futures):
-        for future in concurrent.futures.as_completed(futures):
+                    try:
-            try:
+                        new_generated_tasks = future.result()
-                new_generated_tasks = future.result()
+                        if new_generated_tasks:
-                if new_generated_tasks:
+                            for new_task in new_generated_tasks:
-                    for new_task in new_generated_tasks:
+                                futures[executor.submit(process_single_task, new_task)] = new_task
-                        # New tasks from wrong-label/additional-answer will fallback to live API
+                    except Exception as e:
-                        futures[executor.submit(process_single_task, new_task)] = new_task
+                        print(f"Exception during task execution: {e}", file=sys.stderr)
-            except Exception as e:
+
-                print(f"Exception during task execution: {e}", file=sys.stderr)
+            tasks_to_process = [] # Vider la liste une fois traitée
        # Après avoir traité toutes les tâches actuelles (live ou batched),
        # on tente de débloquer les mouvements qui étaient en attente
        delayed_tasks = resolve_delayed_moves()
        if delayed_tasks:
            print(f"Resolved {len(delayed_tasks)} delayed moves! Running executor for new tasks...")
            tasks_to_process.extend(delayed_tasks)
            made_progress = True
        else:
            made_progress = False
    end_time = time.time()
    print("Time elapsed : ", end_time - start_time)
--- a/gemini_for_enonce.py
+++ b/gemini_for_enonce.py
@ -1,4 +1,6 @@
 import shlex
 import os
 import subprocess
 import sys
 import argparse
 from pathlib import Path
@ -7,7 +9,9 @@ from typing import List
 from google import genai
 from google.genai import types
-MODEL_ID = "gemini-3-flash-preview"
+# Bug : l'output est limité à 8k token…
 # MODEL_ID = "gemini-3-flash-preview"
 MODEL_ID = "gemini-3.1-flash-lite"
 api_key = os.environ.get("GEMINI_API_KEY")
 class QuestionItem(BaseModel):
@ -84,14 +88,25 @@ def process_exam(folder_path: str):
        response_json_schema=ExamExtraction.model_json_schema(),
    )
-    print("Sending request to Gemini...")
+    cache_file = folder / "gemini_response.json"
    response = client.models.generate_content(
        model=MODEL_ID,
        contents=contents,
        config=config
    )
-    extracted_data = ExamExtraction.model_validate_json(response.text)
+    if cache_file.is_file():
        print("Loading cached response from gemini_response.json...")
        response_text = cache_file.read_text(encoding="utf-8")
    else:
        print("Sending request to Gemini...")
        response = client.models.generate_content(
            model=MODEL_ID,
            contents=contents,
            config=config
        )
        response_text = response.text
        print("Saving response to cache...")
        cache_file.write_text(response_text, encoding="utf-8")
    # Validate from the text variable (cached or fresh)
    extracted_data = ExamExtraction.model_validate_json(response_text)
    # 2. Setup output directories
    text_dir = folder / "Text"
@ -101,28 +116,80 @@ def process_exam(folder_path: str):
    labels_file = folder / "labels"
-    print("Writing files...")
+    # Step 1: Write initial labels
    print("Writing initial labels file...")
    with open(labels_file, "w", encoding="utf-8") as flabels:
        for q in extracted_data.questions:
-            # Sanitize label for filesystem (prevent directory traversal if label contains '/')
+            flabels.write(f"{q.label}\n")
            safe_label = q.label.replace("/", "_")
-            flabels.write(f"{safe_label}\n")
+    # Step 2: Open labels file for user editing
    print("Opening labels file for editing...")
    editor = os.environ.get("EDITOR")
    try:
        if editor:
            subprocess.run(shlex.split(editor) + [str(labels_file)])
        else:
        # Fallbacks if $EDITOR is not set
            if sys.platform.startswith("linux"):
                subprocess.Popen(["xdg-open", str(labels_file)])
            elif sys.platform == "darwin":
                subprocess.Popen(["open", str(labels_file)])
            else:
                os.startfile(str(labels_file))
-            # Fix double-escaped newlines
+        # xdg-open/open usually do not block, so we wait for user confirmation
-            q_content = q.question_content.replace("\\n", "\n")
+        input("Press ENTER here once you have saved and closed the labels file...")
-            s_content = q.solution_content.replace("\\n", "\n")
+    except Exception:
-            
+        print("Error running editor, using labels as given.")
-            # Write Text/label
+    # Step 3 & 4: Read the edited file back and create a mapping
-            with open(text_dir / safe_label, "w", encoding="utf-8") as f:
+    with open(labels_file, "r", encoding="utf-8") as flabels:
-                f.write(f"{q.label}\n{q.question_content}")
+        edited_lines = [line.strip() for line in flabels if line.strip()]
-            # Write Sol/label
+    mapping = []
-            with open(sol_dir / safe_label, "w", encoding="utf-8") as f:
+    final_labels = []
-                f.write(f"{q.label}\n{q.solution_content}")
+    orig_idx = 0
-    print(f"Success! Processed {len(extracted_data.questions)} questions.")
+    for line in edited_lines:
        if line.startswith("+"):
            new_label = line[1:].lstrip()
            final_labels.append(new_label)
            # New label, no source content
            mapping.append((new_label, None))
        else:
            new_label = line
            final_labels.append(new_label)
            # Map to initial order, advancing index only for non-'+' items
            q_item = extracted_data.questions[orig_idx] if orig_idx < len(extracted_data.questions) else None
            mapping.append((new_label, q_item))
            orig_idx += 1
    # Rewrite the labels file cleanly (removing '+' prefixes)
    with open(labels_file, "w", encoding="utf-8") as flabels:
        for lbl in final_labels:
            flabels.write(f"{lbl}\n")
    # Step 5: Write the final question and solution files
    print("Writing question and solution files...")
    for new_label, q_item in mapping:
        safe_label = new_label.replace("/", "_")
        if q_item:
            q_content = q_item.question_content.replace("\\n", "\n")
            s_content = q_item.solution_content.replace("\\n", "\n")
        else:
            q_content = ""
            s_content = ""
        # Write Text/label
        with open(text_dir / safe_label, "w", encoding="utf-8") as f:
            f.write(f"{new_label}\n{q_content}")
        # Write Sol/label
        with open(sol_dir / safe_label, "w", encoding="utf-8") as f:
            f.write(f"{new_label}\n{s_content}")
    print(f"Success! Processed {len(mapping)} labels.")
 if __name__ == "__main__":
    if not api_key:
--- a/gemini_for_labels.py
+++ b/gemini_for_labels.py
@ -61,6 +61,8 @@ be missing.
 ##labels##
 ##wrong_labels##
 Here's a list of the names of the students, pick the one that matches
 the best or `\"Unknown\"` if you cannot read the name
@ -116,6 +118,8 @@ be missing.
 ##labels##
 ##wrong_labels##
 Since this copy isn't the first part of a sequence, simply set the
 name to `\"Continued\"`."""
@ -128,7 +132,7 @@ class AnnotationData(BaseModel):
    list: List[BoxItem] = Field(description="List of bounding box items")
-def generate_request(file, labels, names, context_labels):
+def generate_request(file, labels, names, context_labels, wrong_labels):
    """Generates request for Gemini with context."""
    image_path = Path(file)
@ -142,6 +146,11 @@ def generate_request(file, labels, names, context_labels):
    else:
        text = my_prompt2.replace("##labels##", labels)\
                         .replace("##prev_context##", context_str)
    if wrong_labels:
        text= text.replace("##wrong_labels##\n\n", f"On a previous request, you answered with the following wrong labels : {wrong_labels}. These are wrong, since they do not exactly match any of the labels in the previous list.")
    else:
        text = text.replace("##wrong_labels##\n\n", "")
    contents = [
        types.Content(
@ -271,12 +280,14 @@ def process_copy_group(group_key, files):
        print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...")
        attempt = -1
        wrong_labels = []
        while True:
            attempt += 1
            if attempt > 0:
                time.sleep(10 * attempt)
            try:
-                contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels)
+                contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels,
                                                    wrong_labels)
                response = client.models.generate_content(
                    model=MODEL_ID,
@ -289,6 +300,7 @@ def process_copy_group(group_key, files):
                name = annota.name
                if unknown:
                    print(f"Error: {image_file.name} contained unknown labels: {unknown}")
                    wrong_labels.extend(unknown)
                    print("Retrying request...")
                    continue  # Retry immediately
--- a/page_splitter.py
+++ b/page_splitter.py
@ -15,7 +15,8 @@ from pypdf import PdfReader, PdfWriter
 CM_TO_POINTS = (1 / 2.54) * 72
 def list_pdf_files(directory):
-    return list(reversed(sorted(glob.glob(os.path.join(directory, "*.pdf")))))
+    l = list(reversed(sorted(glob.glob(os.path.join(directory, "*.pdf")))))
    return [u for u in l if "enonce" not in u]
 class PDFPreviewer:
@ -84,7 +85,10 @@ class PDFPreviewer:
        self.num = 0
        self.global_rotation = 0 # Rotation appliquée à tous les fichiers
        self.history = []
-        self.setup_next_file()
+        if not self.setup_next_file():
            print(f"Aucun fichier PDF valide trouvé dans : {path}")
            master.destroy()
            return
        self._resize_job = None  # For debouncing resize events
@ -462,30 +466,72 @@ class PDFPreviewer:
        ri = 0
        i = 0
        while i < len(ps):
-            # Si c'est une copie double
+            psk = ps[i]['keep']
-            if (ps[i]['keep'] == "both" or ps[i]['keep'] == "right") \
+
-               and i < len(ps)-1 and (ps[i+1]['keep'] != "right"):
+            # Si c'est une copie double (on s'assure qu'on a bien 2 pages consécutives modifiables)
-                shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
+            if psk in ["both", "right", "left", "none"] and i < len(ps)-1 and ps[i+1]['keep'] in ["both", "right", "left", "none"]:
-                ri += 1
+
-                if ps[i+1]['keep'] != "none":
+                # 1. Page de garde (Extérieur Droit)
                if ps[i]['keep'] in ["both", "right"]:
                    shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
                    ri += 1
                # 2. Intérieur Gauche
                if ps[i+1]['keep'] in ["both", "left"]:
                    shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
                    ri += 1
-                    if ps[i+1]['keep'] != "left":
+
-                        shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
+                # 3. Intérieur Droit
-                        ri += 1
+                if ps[i+1]['keep'] in ["both", "right"]:
-                        if ps[i]['keep'] == "both":
+                    shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
-                            shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
+                    ri += 1
-                            ri += 1
+
-                i += 2
+                # 4. Dos de la copie (Extérieur Gauche)
-            else:
+                if ps[i]['keep'] in ["both", "left"]:
                psk = ps[i]['keep']
                if psk == "left" or psk == "both" or psk == "as_is":
                    shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
                    ri += 1
-                if psk == "right" or psk == "both":
+
                i += 2
            else:
                # Si c'est une page simple (ou as_is)
                if psk in ["left", "both", "as_is"]:
                    shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
                    ri += 1
                if psk in ["right", "both"]:
                    shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
                    ri += 1
                i += 1
    # def reorder_pdfs(self):
    #     """Reordonne les pages, si ce sont des copies doubles."""
    #     self.clean_up_dir(self.reorder_dir)
    #     ps = self.page_settings
    #     ri = 0
    #     i = 0
    #     while i < len(ps):
    #         # Si c'est une copie double
    #         if (ps[i]['keep'] == "both" or ps[i]['keep'] == "right") \
    #            and i < len(ps)-1 and (ps[i+1]['keep'] != "right"):
    #             shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
    #             ri += 1
    #             if ps[i+1]['keep'] != "none":
    #                 shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
    #                 ri += 1
    #                 if ps[i+1]['keep'] != "left":
    #                     shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
    #                     ri += 1
    #                     if ps[i]['keep'] == "both":
    #                         shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
    #                         ri += 1
    #             i += 2
    #         else:
    #             psk = ps[i]['keep']
    #             if psk == "left" or psk == "both" or psk == "as_is":
    #                 shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
    #                 ri += 1
    #             if psk == "right" or psk == "both":
    #                 shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
    #                 ri += 1
    #             i += 1
    def concate_files(self):
        writer = PdfWriter()
--- a/plotting.py
+++ b/plotting.py
@ -157,6 +157,14 @@ class ImageViewer:
        self.root = root
        self.root.resizable(False, False) # If you resize, coordinates will be wrong
        screen_w = root.winfo_screenwidth()
        screen_h = root.winfo_screenheight()
        x = int(screen_w * 0.1)
        y = int(screen_h * 0.05)
        root.geometry(f"+{x}+{y}")
        self.base_dir = base_dir
        self.root.title("Bounding Box Viewer")
        self.label = tk.Label(root, text="Waiting for images...")
--- a/prompting.py
+++ b/prompting.py
@ -88,6 +88,10 @@ do not score or give feedback to any other question."""
 def make_prompt(input_dir,full_label):
    def read_longest_prefix_file(subdir):
        dir_path = input_dir / subdir
        if not dir_path.exists():
            if subdir != "Persp":
                print("Warning !! Directory doesn't exist : ", dir_path)
            return ""
        matches = [f for f in dir_path.iterdir()
                   if f.is_file()
                   and full_label.startswith(f.name)
--- a/rename_to_copie.sh
+++ b/rename_to_copie.sh
@ -14,6 +14,11 @@ for file in *.pdf; do
    # Handle case where no pdfs exist
    [ -e "$file" ] || continue
    if [ "$file" = "enonce.pdf" ]; then
        echo "Skipping: $file"
        continue
    fi
    # Rename with 0-padding (e.g., Copie01.pdf)
    mv -- "$file" "$(printf "Copie%02d.pdf" "$count")"
    ((count++))
--- a/rotate_all.sh
+++ b/rotate_all.sh
@ -13,6 +13,11 @@ cd "$1" || { echo "Error: Cannot access directory '$1'"; exit 1; }
 shopt -s nullglob
 for file in *.pdf; do
    if [ "$file" = "enonce.pdf" ]; then
        echo "Skipping: $file"
        continue
    fi
    # Rotate to a temporary file
    if qpdf --rotate=+180 "$file" "temp_rotated.pdf"; then
        mv "temp_rotated.pdf" "$file"