Miscs for Interro31

master
Sébastien Miquel 2026-06-06 09:27:48 +02:00
parent 9e8b9ac191
commit 80d06e4693
9 changed files with 35 additions and 31 deletions

View File

@ -1,7 +1,7 @@
#+title: Script #+title: Script
#+author: Sébastien Miquel #+author: Sébastien Miquel
#+date: 14-03-2026 #+date: 14-03-2026
# Time-stamp: <17-05-26 10:51> # Time-stamp: <02-06-26 09:26>
#+OPTIONS: #+OPTIONS:
* Méta * Méta

View File

@ -106,7 +106,6 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
"pdf_path": pdf_path, "pdf_path": pdf_path,
"result": { "result": {
"score": 0.0, "score": 0.0,
"confidence": 1.0,
"feedback": [], "feedback": [],
"error": "non traité" "error": "non traité"
}, },
@ -127,7 +126,6 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
"pdf_path": pdf_path, "pdf_path": pdf_path,
"result": { "result": {
"score": 0.0, "score": 0.0,
"confidence": 1.0,
"feedback": [], "feedback": [],
"error": "non traité" "error": "non traité"
}, },

View File

@ -175,6 +175,17 @@ def call_gemini_with_retries(model_id, contents, config,
except Exception as e: except Exception as e:
error_msg = str(e).lower() error_msg = str(e).lower()
is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg is_quota_error = "429" in error_msg or "quota" in error_msg or "exhausted" in error_msg
is_minute_limit = "minute" in error_msg or "rpm" in error_msg or "tpm" in error_msg
if is_minute_limit:
import re
# Extract wait time if present, else use default delay
retry_match = re.search(r"retry in ([\d.]+)s", error_msg)
wait_time = float(retry_match.group(1)) + 1.0 if retry_match else delays[attempt]
tprint(f"\tGemini Pro minute limit hit. Waiting {wait_time:.1f}s...")
time.sleep(wait_time)
continue # Retry same model
# Immediately fallback to Flash without waiting if it's a Pro quota error # Immediately fallback to Flash without waiting if it's a Pro quota error
if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id: if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
@ -552,7 +563,7 @@ if __name__ == "__main__":
for label in all_labels: for label in all_labels:
if label.startswith(args.batch_from): if label.startswith(args.batch_from):
args.batch_from = label args.batch_from = label
print("Batching from : ", args.batch_from) input(f"About to batch from: {args.batch_from}. Press Enter to confirm...")
break break
if args.batch_from not in all_labels: if args.batch_from not in all_labels:
sys.exit(f"Error: Label '{args.batch_from}' not found. Available labels: {all_labels}") sys.exit(f"Error: Label '{args.batch_from}' not found. Available labels: {all_labels}")

View File

@ -16,6 +16,7 @@ def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge
\\usepackage{{lmodern}} \\usepackage{{lmodern}}
\\usepackage{{amsmath, amssymb}} \\usepackage{{amsmath, amssymb}}
\\usepackage{{commands}} \\usepackage{{commands}}
\\usepackage{{minted}}
\\usepackage{{graphicx}} \\usepackage{{graphicx}}
\\usepackage{{enumitem}} \\usepackage{{enumitem}}
\\begin{{document}} \\begin{{document}}
@ -45,6 +46,13 @@ def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
check=False check=False
) )
if "minted" in text:
subprocess.run(
['pdflatex', '-interaction=nonstopmode', tex_filename],
cwd=temp_dir,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False)
generated_pdf = os.path.join(temp_dir, pdf_filename) generated_pdf = os.path.join(temp_dir, pdf_filename)
if os.path.exists(generated_pdf): if os.path.exists(generated_pdf):

View File

@ -155,6 +155,8 @@ def worker_thread(base_dir, files_to_process, all_labels):
class ImageViewer: class ImageViewer:
def __init__(self, root, base_dir): def __init__(self, root, base_dir):
self.root = root self.root = root
self.root.resizable(False, False) # If you resize, coordinates will be wrong
self.base_dir = base_dir self.base_dir = base_dir
self.root.title("Bounding Box Viewer") self.root.title("Bounding Box Viewer")
self.label = tk.Label(root, text="Waiting for images...") self.label = tk.Label(root, text="Waiting for images...")

View File

@ -131,7 +131,13 @@ def clean_obj(obj):
return [clean_obj(x) for x in obj] return [clean_obj(x) for x in obj]
elif isinstance(obj, dict): elif isinstance(obj, dict):
return {k: clean_obj(v) for k, v in obj.items()} r = {}
for k, v in obj.items():
if k != "suffix":
r[k] = clean_obj(v)
else:
r[k] = v
return r
else: else:
return obj return obj

View File

@ -39,11 +39,6 @@ Avoid giving feedback about confusing letters `n` with `m`, `x` with
`n` or `h` with `k`. If it looks wrong, assume you read it wrong, `n` or `h` with `k`. If it looks wrong, assume you read it wrong,
unless the distinction is very important. unless the distinction is very important.
You should also give me a measure of confidence, from 0 to 1 that you
were able to correctly understand the answer. A score below 0.5 means
that you think it is likely that you couldn't understand an important
part.
In some case, you may find that either In some case, you may find that either
- The student didn't answer the right question. Set the score to 0. - The student didn't answer the right question. Set the score to 0.
Since it could be a labeling error, indicate is by setting `error` Since it could be a labeling error, indicate is by setting `error`
@ -57,19 +52,17 @@ In some case, you may find that either
If there's no error, set `error` to `\"\"`. If there's no error, set `error` to `\"\"`.
You will answer using json describing a list of dictionary with a key You will answer using json describing a list of dictionary with a key
\"id\", and a key \"result\" that contains the \"score\", the \"confidence\", a \"id\", and a key \"result\" that contains the \"score\", a list
list \"feedback\", and possibly an \"error\". Like this example : \"feedback\", and possibly an \"error\". Like this example :
[{ \"id\": \"01\", [{ \"id\": \"01\",
\"result\": {\"score\" : 2.5, \"result\": {\"score\" : 2.5,
\"confidence\" : 0.8,
\"feedback\": [{text: \"Un retour générique. Il faut apprendre le cours.\", box_2d: null}, \"feedback\": [{text: \"Un retour générique. Il faut apprendre le cours.\", box_2d: null},
{text: \"Non, la fonction n'est pas forcément continue\", pos: [145, 280, 340, 500]}], {text: \"Non, la fonction n'est pas forcément continue\", pos: [145, 280, 340, 500]}],
\"error\": \"\"} \"error\": \"\"}
}, },
{ \"id\": \"04\", { \"id\": \"04\",
\"result\": {\"score\" : 4., \"result\": {\"score\" : 4.,
\"confidence\" : 0.9,
\"feedback\" : [] \"feedback\" : []
\"error\": \"\" } \"error\": \"\" }
} }
@ -121,7 +114,6 @@ class FeedbackItem(BaseModel):
class ResultData(BaseModel): class ResultData(BaseModel):
score: float = Field(description="The numeric score") score: float = Field(description="The numeric score")
confidence: float = Field(description="Confidence level")
feedback: List[FeedbackItem] = Field(description="List of feedback items") feedback: List[FeedbackItem] = Field(description="List of feedback items")
error: str = Field(description="Indicates if an error occurred") error: str = Field(description="Indicates if an error occurred")
@ -140,7 +132,6 @@ UNROLLED_SCHEMA = {
"type": "OBJECT", "type": "OBJECT",
"properties": { "properties": {
"score": {"type": "NUMBER", "description": "The numeric score"}, "score": {"type": "NUMBER", "description": "The numeric score"},
"confidence": {"type": "NUMBER", "description": "Confidence level"},
"error": {"type": "STRING", "description": "Indicates if an error occurred"}, "error": {"type": "STRING", "description": "Indicates if an error occurred"},
"feedback": { "feedback": {
"type": "ARRAY", "type": "ARRAY",
@ -160,7 +151,7 @@ UNROLLED_SCHEMA = {
} }
} }
}, },
"required": ["score", "confidence", "feedback", "error"] "required": ["score", "feedback", "error"]
} }
}, },
"required": ["id", "result"] "required": ["id", "result"]

View File

@ -247,18 +247,6 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id,
save_paginated_pdf(concat_list_F, pdf_out_path) save_paginated_pdf(concat_list_F, pdf_out_path)
logs.append(f" Saved regenerated Concat_F.pdf") logs.append(f" Saved regenerated Concat_F.pdf")
# max_w = max(i.width for i in concat_list_F)
# total_h = sum(i.height for i in concat_list_F)
# full_img = Image.new("RGB", (max_w, total_h), "white")
# y = 0
# for img in concat_list_F:
# full_img.paste(img, (0, y))
# y += img.height
# full_img.save(os.path.join(output_dir, "Concat_F.jpg"))
# logs.append(f" Saved regenerated Concat_F.jpg")
return "\n".join(logs) return "\n".join(logs)
from utils import read_all_labels from utils import read_all_labels

View File

@ -112,8 +112,8 @@ def split_an_interro(base_dir, input_pdf, coords_list):
if is_stop: if is_stop:
end_page = n_pn end_page = n_pn
# end_y_target_raw = n_y_start # end_y_target_raw = n_y_start
# On avait retiré un carreau précédemment, on le rajoute… # On avait retiré un carreau précédemment inutilement, on le rajoute, plus un demi carreau
end_y_target_raw = min(n_y_start + int(1.25 * carreau), 1000) end_y_target_raw = min(n_y_start + int(1.5 * carreau), 1000)
break break
# RULES 3 & 4: Calculate horizontal boundaries (0.0 to 1.0 fraction of local page width) # RULES 3 & 4: Calculate horizontal boundaries (0.0 to 1.0 fraction of local page width)