Miscs for Interro31

2026-06-06 09:27:48 +02:00 · 2026-06-06 09:27:48 +02:00 · 80d06e4693
parent 9e8b9ac191
commit 80d06e4693
9 changed files with 35 additions and 31 deletions
--- a/Readme.org
+++ b/Readme.org
@ -1,7 +1,7 @@
 #+title:  Script
 #+author: Sébastien Miquel
 #+date:   14-03-2026
-# Time-stamp: <17-05-26 10:51>
+# Time-stamp: <02-06-26 09:26>
 #+OPTIONS:
 * Méta
--- a/annotating.py
+++ b/annotating.py
@ -106,7 +106,6 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
                            "pdf_path": pdf_path,
                            "result": {
                                "score": 0.0,
                                "confidence": 1.0,
                                "feedback": [],
                                "error": "non traité"
                            },
@ -127,7 +126,6 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
                        "pdf_path": pdf_path,
                        "result": {
                            "score": 0.0,
                            "confidence": 1.0,
                            "feedback": [],
                            "error": "non traité"
                        },
--- a/correction.py
+++ b/correction.py
@ -175,6 +175,17 @@ def call_gemini_with_retries(model_id, contents, config,
        except Exception as e:
            error_msg = str(e).lower()
            is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg
            is_minute_limit = "minute" in error_msg or "rpm" in error_msg or "tpm" in error_msg
            if is_minute_limit:
                import re
                # Extract wait time if present, else use default delay
                retry_match = re.search(r"retry in ([\d.]+)s", error_msg)
                wait_time = float(retry_match.group(1)) + 1.0 if retry_match else delays[attempt]
                tprint(f"\tGemini Pro minute limit hit. Waiting {wait_time:.1f}s...")
                time.sleep(wait_time)
                continue # Retry same model
            # Immediately fallback to Flash without waiting if it's a Pro quota error
            if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
@ -552,7 +563,7 @@ if __name__ == "__main__":
            for label in all_labels:
                if label.startswith(args.batch_from):
                    args.batch_from = label
-                    print("Batching from : ", args.batch_from)
+                    input(f"About to batch from: {args.batch_from}. Press Enter to confirm...")
                    break
            if args.batch_from not in all_labels:
                sys.exit(f"Error: Label '{args.batch_from}' not found. Available labels: {all_labels}")
--- a/enonce_info.py
+++ b/enonce_info.py
@ -16,6 +16,7 @@ def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge
 \\usepackage{{lmodern}}
 \\usepackage{{amsmath, amssymb}}
 \\usepackage{{commands}}
 \\usepackage{{minted}}
 \\usepackage{{graphicx}}
 \\usepackage{{enumitem}}
 \\begin{{document}}
@ -45,6 +46,13 @@ def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge
                stderr=subprocess.DEVNULL,
                check=False
            )
            if "minted" in text:
                subprocess.run(
                    ['pdflatex', '-interaction=nonstopmode', tex_filename],
                    cwd=temp_dir,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    check=False)                
            generated_pdf = os.path.join(temp_dir, pdf_filename)
            if os.path.exists(generated_pdf):
--- a/plotting.py
+++ b/plotting.py
@ -155,6 +155,8 @@ def worker_thread(base_dir, files_to_process, all_labels):
 class ImageViewer:
    def __init__(self, root, base_dir):
        self.root = root
        self.root.resizable(False, False) # If you resize, coordinates will be wrong
        self.base_dir = base_dir
        self.root.title("Bounding Box Viewer")
        self.label = tk.Label(root, text="Waiting for images...")
--- a/post-correction.py
+++ b/post-correction.py
@ -131,7 +131,13 @@ def clean_obj(obj):
        return [clean_obj(x) for x in obj]
    elif isinstance(obj, dict):
-        return {k: clean_obj(v) for k, v in obj.items()}
+        r = {}
        for k, v in obj.items():
            if k != "suffix":
                r[k] = clean_obj(v)
            else:
                r[k] = v
        return r
    else:
        return obj
--- a/prompting.py
+++ b/prompting.py
@ -39,11 +39,6 @@ Avoid giving feedback about confusing letters `n` with `m`, `x` with
 `n` or `h` with `k`. If it looks wrong, assume you read it wrong,
 unless the distinction is very important.
 You should also give me a measure of confidence, from 0 to 1 that you
 were able to correctly understand the answer. A score below 0.5 means
 that you think it is likely that you couldn't understand an important
 part.
 In some case, you may find that either
 - The student didn't answer the right question. Set the score to 0.
   Since it could be a labeling error, indicate is by setting `error`
@ -57,19 +52,17 @@ In some case, you may find that either
 If there's no error, set `error` to `\"\"`.
 You will answer using json describing a list of dictionary with a key
-\"id\", and a key \"result\" that contains the \"score\", the \"confidence\", a
+\"id\", and a key \"result\" that contains the \"score\", a list
-list \"feedback\", and possibly an \"error\". Like this example :
+\"feedback\", and possibly an \"error\". Like this example :
 [{ \"id\": \"01\",
   \"result\": {\"score\" : 2.5,
              \"confidence\" : 0.8,
              \"feedback\": [{text: \"Un retour générique. Il faut apprendre le cours.\", box_2d: null},
              {text: \"Non, la fonction n'est pas forcément continue\", pos: [145, 280, 340, 500]}],
               \"error\": \"\"}
 },
 { \"id\": \"04\",
   \"result\": {\"score\" : 4.,
              \"confidence\" : 0.9,
              \"feedback\" : []
              \"error\": \"\" }
 }
@ -121,7 +114,6 @@ class FeedbackItem(BaseModel):
 class ResultData(BaseModel):
    score: float = Field(description="The numeric score")
    confidence: float = Field(description="Confidence level")
    feedback: List[FeedbackItem] = Field(description="List of feedback items")
    error: str = Field(description="Indicates if an error occurred")
@ -140,7 +132,6 @@ UNROLLED_SCHEMA = {
                                "type": "OBJECT",
                                "properties": {
                                    "score": {"type": "NUMBER", "description": "The numeric score"},
                                    "confidence": {"type": "NUMBER", "description": "Confidence level"},
                                    "error": {"type": "STRING", "description": "Indicates if an error occurred"},
                                    "feedback": {
                                        "type": "ARRAY",
@ -160,7 +151,7 @@ UNROLLED_SCHEMA = {
                                        }
                                    }
                                },
-                                "required": ["score", "confidence", "feedback", "error"]
+                                "required": ["score", "feedback", "error"]
                            }
                        },
                        "required": ["id", "result"]
--- a/reading_grouped_annotations.py
+++ b/reading_grouped_annotations.py
@ -247,18 +247,6 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id,
        save_paginated_pdf(concat_list_F, pdf_out_path)
        logs.append(f"  Saved regenerated Concat_F.pdf")
        # max_w = max(i.width for i in concat_list_F)
        # total_h = sum(i.height for i in concat_list_F)
        # full_img = Image.new("RGB", (max_w, total_h), "white")
        # y = 0
        # for img in concat_list_F:
        #     full_img.paste(img, (0, y))
        #     y += img.height
        # full_img.save(os.path.join(output_dir, "Concat_F.jpg"))
        # logs.append(f"  Saved regenerated Concat_F.jpg")
    return "\n".join(logs)
 from utils import read_all_labels
--- a/splitting_int.py
+++ b/splitting_int.py
@ -112,8 +112,8 @@ def split_an_interro(base_dir, input_pdf, coords_list):
            if is_stop:
                end_page = n_pn
                # end_y_target_raw = n_y_start
-                # On avait retiré un carreau précédemment, on le rajoute…
+                # On avait retiré un carreau précédemment inutilement, on le rajoute, plus un demi carreau
-                end_y_target_raw = min(n_y_start + int(1.25 * carreau), 1000)
+                end_y_target_raw = min(n_y_start + int(1.5 * carreau), 1000)
                break
        # RULES 3 & 4: Calculate horizontal boundaries (0.0 to 1.0 fraction of local page width)