Initial support for _old and _new

master
Sébastien Miquel 2026-06-01 09:51:21 +02:00
parent c2e915226e
commit 9e8b9ac191
4 changed files with 86 additions and 41 deletions

View File

@ -34,6 +34,9 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
student_id = item['id']
result_obj = item['result']
if result_obj.get("suffix") == "_old":
continue
# Find coordinates
coordinates = None
height,width= None, None
@ -58,9 +61,11 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
if coordinates:
break
# Construct PDF path: Dir/Copie{id}/{label}.pdf
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
suffix = result_obj.get("suffix", "")
if suffix == "_new":
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}_new.pdf"
else:
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
# Initialize dictionary structure for this ID if missing
if student_id not in result_data:
result_data[student_id] = {}
@ -91,8 +96,12 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
for lbl in labels_to_redo:
pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf"
if not Path(pdf_path).exists():
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
continue
pdf_path_new = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}_new.pdf"
if pdf_path_new.exists():
pdf_path = pdf_path_new
else:
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
continue
result_data[sid][lbl] = {
"pdf_path": pdf_path,
"result": {
@ -108,8 +117,12 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
for lbl in labels_to_redo:
pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf"
if not pdf_path.exists():
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
continue
pdf_path_new = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}_new.pdf"
if pdf_path_new.exists():
pdf_path = pdf_path_new
else:
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
continue
result_data[sid][lbl] = {
"pdf_path": pdf_path,
"result": {
@ -278,7 +291,7 @@ def render_real_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_li
\\usepackage[T1]{{fontenc}}
\\usepackage{{lmodern}} % Enables arbitrary font scaling
\\usepackage{{amsmath, amssymb}}
\\usepackage{{mathabx}} % larger inline operators.
\\usepackage{{mathabx}} % larger inline operators.
\\usepackage{{commands}}
%\\usepackage{{anyfontsize}} % replaced by lmodern
\\begin{{document}}
@ -572,9 +585,9 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite):
for label, content in sorted_labels:
# 1. Find PDF path
copie_folder = f"Copie{student_id}"
pdf_full_path = Path(root_dir) / "Copies" / copie_folder / f"{label}.pdf"
pdf_full_path = content.get('pdf_path')
if not os.path.exists(pdf_full_path):
if not pdf_full_path or not os.path.exists(pdf_full_path):
print(f"File not found: {pdf_full_path}")
continue
@ -616,18 +629,6 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite):
def process_correction(root_dir, data, all_labels, overwrite=False):
# with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
# # Create a list of futures
# futures = []
# for student_id, labels in sorted(data.items()):
# futures.append(
# executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite)
# )
# # Wait for all threads to complete
# concurrent.futures.wait(futures)
# Ne pas thread cette application
# 1. Il faut protéger les appels à matplotlib
# 2. tu vas perdre les erreurs

View File

@ -190,10 +190,10 @@ def call_gemini_with_retries(model_id, contents, config,
tprint(f"\tGemini API failure: {e}. Maximum retries reached.")
raise
def correct_boxes_with_gemini(pid, label, original_feedbacks,
def correct_boxes_with_gemini(pid, label, pdf_path, original_feedbacks,
yming, ymaxg, width_r, total_height):
"""Requests corrected bounding boxes from Gemini Flash on the single image."""
pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
# pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
contents, config = prompting.request_for_box_correction(pdf_path, original_feedbacks)
response_text = call_gemini_with_retries(MODEL_ID_flash, contents, config)
@ -253,20 +253,26 @@ def handle_label_errors(pid, label, res, pdf_path):
if new_label == label:
res["error"] = ""
return []
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf"
if new_pdf_path.exists():
base_new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf"
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf"
if base_new_pdf_path.exists() or new_pdf_path.exists():
tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
res["error"] = f"wrg-lbl:{new_label}?exists"
else:
res["error"] = f"wrg-lbl-moved-to:{new_label}"
tprint(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.")
shutil.move(str(pdf_path), str(new_pdf_path))
# Since we moved the file, this Copie/label should not be taken
# into account in the future, I think
# Copie vers _new, puis renommage de l'original vers _old
shutil.copy(str(pdf_path), str(new_pdf_path))
old_pdf_path = pdf_path.with_name(f"{label}_old.pdf")
if pdf_path != old_pdf_path:
shutil.move(str(pdf_path), str(old_pdf_path))
idx = get_next_group_idx(new_label)
height = grouping.get_pdf_height(str(new_pdf_path))
grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)],
GROUPS_DIR)
grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR)
tprint(f"\t\tMaking {new_label} group {idx+1}")
new_tasks.append((str(GROUPS_DIR / new_label / f"Group_{idx+1}.jpg"),
new_label, False))
@ -289,14 +295,17 @@ def handle_label_errors(pid, label, res, pdf_path):
error += f"{add_label}??"
keep_error = True
continue
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf"
if not new_pdf_path.exists():
shutil.copy(str(pdf_path), str(new_pdf_path))
base_add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf"
add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}_new.pdf"
if not base_add_pdf_path.exists() and not add_pdf_path.exists():
shutil.copy(str(pdf_path), str(add_pdf_path))
tprint(f"\t\tCopying Copie{pid} : {label} -> {add_label}")
idx = get_next_group_idx(add_label)
tprint(f"\t\tMaking {add_label} group {idx+1}")
height = grouping.get_pdf_height(str(new_pdf_path))
grouping.create_jpg(add_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR)
height = grouping.get_pdf_height(str(add_pdf_path))
grouping.create_jpg(add_label, idx, [(pid, str(add_pdf_path), height)], GROUPS_DIR)
new_tasks.append((str(GROUPS_DIR / add_label / f"Group_{idx+1}.jpg"),
add_label, False))
error += f"(->){add_label}"
@ -305,7 +314,6 @@ def handle_label_errors(pid, label, res, pdf_path):
keep_error = True
error += f"(xx){add_label}"
tprint(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}")
if not keep_error:
res["error"] = ""
else:
@ -367,6 +375,26 @@ def process_single_task(task_tuple, precomputed_response=None):
yming, ymaxg, width_r = d_data[pid]
pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
current_suffix = ""
# Détection du vrai fichier s'il a un suffixe
if not pdf_path.exists():
if pdf_path.with_name(f"{label}_new.pdf").exists():
pdf_path = pdf_path.with_name(f"{label}_new.pdf")
current_suffix = "_new"
# Quand est-ce que ce chemin est utilisé ? Jamais ?
elif pdf_path.with_name(f"{label}_old.pdf").exists():
pdf_path = pdf_path.with_name(f"{label}_old.pdf")
current_suffix = "_old"
# 1. Gestion de empty-answer
if res.get("error") == "empty-answer":
old_path = pdf_path.with_name(f"{label}_old.pdf")
if pdf_path.exists() and pdf_path != old_path:
shutil.move(str(pdf_path), str(old_path))
pdf_path = old_path
current_suffix = "_old"
if (not can_spawn_tasks) and res["error"] == "additional-answer":
tprint("\tSwallowing an additional-answer from a subsequent task.")
res["error"]= ""
@ -375,6 +403,13 @@ def process_single_task(task_tuple, precomputed_response=None):
if can_spawn_tasks and res.get("error") in ["wrong-label", "additional-answer"]:
new_tasks.extend(handle_label_errors(pid, label, res, pdf_path))
# Si "wrong-label" a déplacé le fichier courant vers _old
if res.get("error", "").startswith("wrg-lbl-moved-to:"):
current_suffix = "_old"
# 5. Enregistrer l'information dans correction.json
if current_suffix:
res["suffix"] = current_suffix
needs_correction = []
for (i,f) in enumerate(res["feedback"]):
@ -403,8 +438,9 @@ def process_single_task(task_tuple, precomputed_response=None):
if needs_correction:
tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...")
try:
# Pensez à passer pdf_path à la fonction modifiée !
res["feedback"] = correct_boxes_with_gemini(
pid, label, res["feedback"],
pid, label, pdf_path, res["feedback"],
yming, ymaxg, width_r, total_height)
except Exception as e:
tprint(f"\tCorrection failed for Copie {pid}, {group_name} : {e}\n\tRemoving the boxes")
@ -430,7 +466,7 @@ def process_single_task(task_tuple, precomputed_response=None):
except json.JSONDecodeError:
tprint(f"Error decoding JSON for {file_path}", file=sys.stderr)
with io_lock:
errors_summary.append(("Error decoding JSON response", file_path))
errors_summary.append(("Error decoding JSON response", file_path))
except Exception as e:
error_msg = f"Exception processing {file_path}: {e}"
print(error_msg, file=sys.stderr)
@ -487,6 +523,12 @@ if __name__ == "__main__":
# 2. Make new group and add to tasks
pdf_path = copie_dir / f"{label}.pdf"
if not pdf_path.exists():
if (copie_dir / f"{label}_new.pdf").exists():
pdf_path = copie_dir / f"{label}_new.pdf"
# elif (copie_dir / f"{label}_old.pdf").exists():
# pdf_path = copie_dir / f"{label}_old.pdf"
if pdf_path.exists():
idx = get_next_group_idx(label)
height = grouping.get_pdf_height(str(pdf_path))

View File

@ -255,7 +255,8 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye
# B. Regenerate Label Image
# We always regenerate to ensure Concat.jpg is consistent with any modifications
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
# pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
pdf_path = content.get('pdf_path') # Contient le suffixe _new si nécessaire
if not os.path.exists(pdf_path): continue
(base_img, _, _) = annotating.make_base_image(pdf_path)

View File

@ -167,7 +167,8 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id,
result = content['result']
d_notes[label] = str(result.get('score', 0))
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
# pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
pdf_path = content.get('pdf_path')
if not os.path.exists(pdf_path): continue
(base_img, _, _) = annotating.make_base_image(pdf_path)