diff --git a/Readme.org b/Readme.org index 3b87382..5f7731e 100644 --- a/Readme.org +++ b/Readme.org @@ -1,7 +1,7 @@ #+title: Script #+author: Sébastien Miquel #+date: 14-03-2026 -# Time-stamp: <17-05-26 10:51> +# Time-stamp: <02-06-26 09:26> #+OPTIONS: * Méta diff --git a/annotating.py b/annotating.py index 8503637..7e63162 100644 --- a/annotating.py +++ b/annotating.py @@ -106,7 +106,6 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]): "pdf_path": pdf_path, "result": { "score": 0.0, - "confidence": 1.0, "feedback": [], "error": "non traité" }, @@ -127,7 +126,6 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]): "pdf_path": pdf_path, "result": { "score": 0.0, - "confidence": 1.0, "feedback": [], "error": "non traité" }, diff --git a/correction.py b/correction.py index 3b93a5f..2fa2a78 100644 --- a/correction.py +++ b/correction.py @@ -175,7 +175,18 @@ def call_gemini_with_retries(model_id, contents, config, except Exception as e: error_msg = str(e).lower() is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg + is_minute_limit = "minute" in error_msg or "rpm" in error_msg or "tpm" in error_msg + if is_minute_limit: + import re + # Extract wait time if present, else use default delay + retry_match = re.search(r"retry in ([\d.]+)s", error_msg) + wait_time = float(retry_match.group(1)) + 1.0 if retry_match else delays[attempt] + + tprint(f"\tGemini Pro minute limit hit. Waiting {wait_time:.1f}s...") + time.sleep(wait_time) + continue # Retry same model + # Immediately fallback to Flash without waiting if it's a Pro quota error if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id: tprint(f"\tGemini Pro quota hit ({e}). \n\n\tFalling back to Flash permanently...") @@ -552,7 +563,7 @@ if __name__ == "__main__": for label in all_labels: if label.startswith(args.batch_from): args.batch_from = label - print("Batching from : ", args.batch_from) + input(f"About to batch from: {args.batch_from}. Press Enter to confirm...") break if args.batch_from not in all_labels: sys.exit(f"Error: Label '{args.batch_from}' not found. Available labels: {all_labels}") diff --git a/enonce_info.py b/enonce_info.py index 07ad7c9..e6ea5bb 100644 --- a/enonce_info.py +++ b/enonce_info.py @@ -16,6 +16,7 @@ def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge \\usepackage{{lmodern}} \\usepackage{{amsmath, amssymb}} \\usepackage{{commands}} +\\usepackage{{minted}} \\usepackage{{graphicx}} \\usepackage{{enumitem}} \\begin{{document}} @@ -45,6 +46,13 @@ def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge stderr=subprocess.DEVNULL, check=False ) + if "minted" in text: + subprocess.run( + ['pdflatex', '-interaction=nonstopmode', tex_filename], + cwd=temp_dir, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False) generated_pdf = os.path.join(temp_dir, pdf_filename) if os.path.exists(generated_pdf): diff --git a/plotting.py b/plotting.py index 61fa241..09064dc 100644 --- a/plotting.py +++ b/plotting.py @@ -155,6 +155,8 @@ def worker_thread(base_dir, files_to_process, all_labels): class ImageViewer: def __init__(self, root, base_dir): self.root = root + self.root.resizable(False, False) # If you resize, coordinates will be wrong + self.base_dir = base_dir self.root.title("Bounding Box Viewer") self.label = tk.Label(root, text="Waiting for images...") diff --git a/post-correction.py b/post-correction.py index e8e4af7..6bb5b7b 100644 --- a/post-correction.py +++ b/post-correction.py @@ -131,7 +131,13 @@ def clean_obj(obj): return [clean_obj(x) for x in obj] elif isinstance(obj, dict): - return {k: clean_obj(v) for k, v in obj.items()} + r = {} + for k, v in obj.items(): + if k != "suffix": + r[k] = clean_obj(v) + else: + r[k] = v + return r else: return obj diff --git a/prompting.py b/prompting.py index 1adf2a7..a2cf779 100644 --- a/prompting.py +++ b/prompting.py @@ -39,11 +39,6 @@ Avoid giving feedback about confusing letters `n` with `m`, `x` with `n` or `h` with `k`. If it looks wrong, assume you read it wrong, unless the distinction is very important. -You should also give me a measure of confidence, from 0 to 1 that you -were able to correctly understand the answer. A score below 0.5 means -that you think it is likely that you couldn't understand an important -part. - In some case, you may find that either - The student didn't answer the right question. Set the score to 0. Since it could be a labeling error, indicate is by setting `error` @@ -57,19 +52,17 @@ In some case, you may find that either If there's no error, set `error` to `\"\"`. You will answer using json describing a list of dictionary with a key -\"id\", and a key \"result\" that contains the \"score\", the \"confidence\", a -list \"feedback\", and possibly an \"error\". Like this example : +\"id\", and a key \"result\" that contains the \"score\", a list +\"feedback\", and possibly an \"error\". Like this example : [{ \"id\": \"01\", \"result\": {\"score\" : 2.5, - \"confidence\" : 0.8, \"feedback\": [{text: \"Un retour générique. Il faut apprendre le cours.\", box_2d: null}, {text: \"Non, la fonction n'est pas forcément continue\", pos: [145, 280, 340, 500]}], \"error\": \"\"} }, { \"id\": \"04\", \"result\": {\"score\" : 4., - \"confidence\" : 0.9, \"feedback\" : [] \"error\": \"\" } } @@ -121,7 +114,6 @@ class FeedbackItem(BaseModel): class ResultData(BaseModel): score: float = Field(description="The numeric score") - confidence: float = Field(description="Confidence level") feedback: List[FeedbackItem] = Field(description="List of feedback items") error: str = Field(description="Indicates if an error occurred") @@ -140,7 +132,6 @@ UNROLLED_SCHEMA = { "type": "OBJECT", "properties": { "score": {"type": "NUMBER", "description": "The numeric score"}, - "confidence": {"type": "NUMBER", "description": "Confidence level"}, "error": {"type": "STRING", "description": "Indicates if an error occurred"}, "feedback": { "type": "ARRAY", @@ -160,7 +151,7 @@ UNROLLED_SCHEMA = { } } }, - "required": ["score", "confidence", "feedback", "error"] + "required": ["score", "feedback", "error"] } }, "required": ["id", "result"] diff --git a/reading_grouped_annotations.py b/reading_grouped_annotations.py index d4d5b44..030bceb 100644 --- a/reading_grouped_annotations.py +++ b/reading_grouped_annotations.py @@ -247,18 +247,6 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, save_paginated_pdf(concat_list_F, pdf_out_path) logs.append(f" Saved regenerated Concat_F.pdf") - # max_w = max(i.width for i in concat_list_F) - # total_h = sum(i.height for i in concat_list_F) - # full_img = Image.new("RGB", (max_w, total_h), "white") - - # y = 0 - # for img in concat_list_F: - # full_img.paste(img, (0, y)) - # y += img.height - - # full_img.save(os.path.join(output_dir, "Concat_F.jpg")) - # logs.append(f" Saved regenerated Concat_F.jpg") - return "\n".join(logs) from utils import read_all_labels diff --git a/splitting_int.py b/splitting_int.py index bd25442..94cf318 100644 --- a/splitting_int.py +++ b/splitting_int.py @@ -112,8 +112,8 @@ def split_an_interro(base_dir, input_pdf, coords_list): if is_stop: end_page = n_pn # end_y_target_raw = n_y_start - # On avait retiré un carreau précédemment, on le rajoute… - end_y_target_raw = min(n_y_start + int(1.25 * carreau), 1000) + # On avait retiré un carreau précédemment inutilement, on le rajoute, plus un demi carreau + end_y_target_raw = min(n_y_start + int(1.5 * carreau), 1000) break # RULES 3 & 4: Calculate horizontal boundaries (0.0 to 1.0 fraction of local page width)