diff --git a/annotating.py b/annotating.py index c73568d..8503637 100644 --- a/annotating.py +++ b/annotating.py @@ -34,6 +34,9 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]): student_id = item['id'] result_obj = item['result'] + if result_obj.get("suffix") == "_old": + continue + # Find coordinates coordinates = None height,width= None, None @@ -58,9 +61,11 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]): if coordinates: break - # Construct PDF path: Dir/Copie{id}/{label}.pdf - pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" - + suffix = result_obj.get("suffix", "") + if suffix == "_new": + pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}_new.pdf" + else: + pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" # Initialize dictionary structure for this ID if missing if student_id not in result_data: result_data[student_id] = {} @@ -91,8 +96,12 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]): for lbl in labels_to_redo: pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf" if not Path(pdf_path).exists(): - print("Debug : asked to refaire", sid, lbl, "but pdf absent") - continue + pdf_path_new = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}_new.pdf" + if pdf_path_new.exists(): + pdf_path = pdf_path_new + else: + print("Debug : asked to refaire", sid, lbl, "but pdf absent") + continue result_data[sid][lbl] = { "pdf_path": pdf_path, "result": { @@ -108,8 +117,12 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]): for lbl in labels_to_redo: pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf" if not pdf_path.exists(): - print("Debug : asked to refaire", sid, lbl, "but pdf absent") - continue + pdf_path_new = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}_new.pdf" + if pdf_path_new.exists(): + pdf_path = pdf_path_new + else: + print("Debug : asked to refaire", sid, lbl, "but pdf absent") + continue result_data[sid][lbl] = { "pdf_path": pdf_path, "result": { @@ -278,7 +291,7 @@ def render_real_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_li \\usepackage[T1]{{fontenc}} \\usepackage{{lmodern}} % Enables arbitrary font scaling \\usepackage{{amsmath, amssymb}} -\\usepackage{{mathabx}} % larger inline operators. +\\usepackage{{mathabx}} % larger inline operators. \\usepackage{{commands}} %\\usepackage{{anyfontsize}} % replaced by lmodern \\begin{{document}} @@ -572,9 +585,9 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite): for label, content in sorted_labels: # 1. Find PDF path copie_folder = f"Copie{student_id}" - pdf_full_path = Path(root_dir) / "Copies" / copie_folder / f"{label}.pdf" + pdf_full_path = content.get('pdf_path') - if not os.path.exists(pdf_full_path): + if not pdf_full_path or not os.path.exists(pdf_full_path): print(f"File not found: {pdf_full_path}") continue @@ -616,18 +629,6 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite): def process_correction(root_dir, data, all_labels, overwrite=False): - - # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: - # # Create a list of futures - # futures = [] - # for student_id, labels in sorted(data.items()): - # futures.append( - # executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite) - # ) - - # # Wait for all threads to complete - # concurrent.futures.wait(futures) - # Ne pas thread cette application # 1. Il faut protéger les appels à matplotlib # 2. tu vas perdre les erreurs diff --git a/correction.py b/correction.py index 1195c1f..3b93a5f 100644 --- a/correction.py +++ b/correction.py @@ -190,10 +190,10 @@ def call_gemini_with_retries(model_id, contents, config, tprint(f"\tGemini API failure: {e}. Maximum retries reached.") raise -def correct_boxes_with_gemini(pid, label, original_feedbacks, +def correct_boxes_with_gemini(pid, label, pdf_path, original_feedbacks, yming, ymaxg, width_r, total_height): """Requests corrected bounding boxes from Gemini Flash on the single image.""" - pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf" + # pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf" contents, config = prompting.request_for_box_correction(pdf_path, original_feedbacks) response_text = call_gemini_with_retries(MODEL_ID_flash, contents, config) @@ -253,20 +253,26 @@ def handle_label_errors(pid, label, res, pdf_path): if new_label == label: res["error"] = "" return [] - new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf" - if new_pdf_path.exists(): + + base_new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf" + new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf" + + if base_new_pdf_path.exists() or new_pdf_path.exists(): tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.") res["error"] = f"wrg-lbl:{new_label}?exists" else: res["error"] = f"wrg-lbl-moved-to:{new_label}" tprint(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.") - shutil.move(str(pdf_path), str(new_pdf_path)) - # Since we moved the file, this Copie/label should not be taken - # into account in the future, I think + + # Copie vers _new, puis renommage de l'original vers _old + shutil.copy(str(pdf_path), str(new_pdf_path)) + old_pdf_path = pdf_path.with_name(f"{label}_old.pdf") + if pdf_path != old_pdf_path: + shutil.move(str(pdf_path), str(old_pdf_path)) + idx = get_next_group_idx(new_label) height = grouping.get_pdf_height(str(new_pdf_path)) - grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], - GROUPS_DIR) + grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR) tprint(f"\t\tMaking {new_label} group {idx+1}") new_tasks.append((str(GROUPS_DIR / new_label / f"Group_{idx+1}.jpg"), new_label, False)) @@ -289,14 +295,17 @@ def handle_label_errors(pid, label, res, pdf_path): error += f"{add_label}??" keep_error = True continue - new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf" - if not new_pdf_path.exists(): - shutil.copy(str(pdf_path), str(new_pdf_path)) + + base_add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf" + add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}_new.pdf" + + if not base_add_pdf_path.exists() and not add_pdf_path.exists(): + shutil.copy(str(pdf_path), str(add_pdf_path)) tprint(f"\t\tCopying Copie{pid} : {label} -> {add_label}") idx = get_next_group_idx(add_label) tprint(f"\t\tMaking {add_label} group {idx+1}") - height = grouping.get_pdf_height(str(new_pdf_path)) - grouping.create_jpg(add_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR) + height = grouping.get_pdf_height(str(add_pdf_path)) + grouping.create_jpg(add_label, idx, [(pid, str(add_pdf_path), height)], GROUPS_DIR) new_tasks.append((str(GROUPS_DIR / add_label / f"Group_{idx+1}.jpg"), add_label, False)) error += f"(->){add_label}" @@ -305,7 +314,6 @@ def handle_label_errors(pid, label, res, pdf_path): keep_error = True error += f"(xx){add_label}" tprint(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}") - if not keep_error: res["error"] = "" else: @@ -367,6 +375,26 @@ def process_single_task(task_tuple, precomputed_response=None): yming, ymaxg, width_r = d_data[pid] pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf" + current_suffix = "" + + # Détection du vrai fichier s'il a un suffixe + if not pdf_path.exists(): + if pdf_path.with_name(f"{label}_new.pdf").exists(): + pdf_path = pdf_path.with_name(f"{label}_new.pdf") + current_suffix = "_new" + # Quand est-ce que ce chemin est utilisé ? Jamais ? + elif pdf_path.with_name(f"{label}_old.pdf").exists(): + pdf_path = pdf_path.with_name(f"{label}_old.pdf") + current_suffix = "_old" + + # 1. Gestion de empty-answer + if res.get("error") == "empty-answer": + old_path = pdf_path.with_name(f"{label}_old.pdf") + if pdf_path.exists() and pdf_path != old_path: + shutil.move(str(pdf_path), str(old_path)) + pdf_path = old_path + current_suffix = "_old" + if (not can_spawn_tasks) and res["error"] == "additional-answer": tprint("\tSwallowing an additional-answer from a subsequent task.") res["error"]= "" @@ -375,6 +403,13 @@ def process_single_task(task_tuple, precomputed_response=None): if can_spawn_tasks and res.get("error") in ["wrong-label", "additional-answer"]: new_tasks.extend(handle_label_errors(pid, label, res, pdf_path)) + # Si "wrong-label" a déplacé le fichier courant vers _old + if res.get("error", "").startswith("wrg-lbl-moved-to:"): + current_suffix = "_old" + + # 5. Enregistrer l'information dans correction.json + if current_suffix: + res["suffix"] = current_suffix needs_correction = [] for (i,f) in enumerate(res["feedback"]): @@ -403,8 +438,9 @@ def process_single_task(task_tuple, precomputed_response=None): if needs_correction: tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...") try: + # Pensez à passer pdf_path à la fonction modifiée ! res["feedback"] = correct_boxes_with_gemini( - pid, label, res["feedback"], + pid, label, pdf_path, res["feedback"], yming, ymaxg, width_r, total_height) except Exception as e: tprint(f"\tCorrection failed for Copie {pid}, {group_name} : {e}\n\tRemoving the boxes") @@ -430,7 +466,7 @@ def process_single_task(task_tuple, precomputed_response=None): except json.JSONDecodeError: tprint(f"Error decoding JSON for {file_path}", file=sys.stderr) with io_lock: - errors_summary.append(("Error decoding JSON response", file_path)) + errors_summary.append(("Error decoding JSON response", file_path)) except Exception as e: error_msg = f"Exception processing {file_path}: {e}" print(error_msg, file=sys.stderr) @@ -487,6 +523,12 @@ if __name__ == "__main__": # 2. Make new group and add to tasks pdf_path = copie_dir / f"{label}.pdf" + if not pdf_path.exists(): + if (copie_dir / f"{label}_new.pdf").exists(): + pdf_path = copie_dir / f"{label}_new.pdf" + # elif (copie_dir / f"{label}_old.pdf").exists(): + # pdf_path = copie_dir / f"{label}_old.pdf" + if pdf_path.exists(): idx = get_next_group_idx(label) height = grouping.get_pdf_height(str(pdf_path)) diff --git a/reading_annotations.py b/reading_annotations.py index d507649..a639b09 100644 --- a/reading_annotations.py +++ b/reading_annotations.py @@ -255,7 +255,8 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye # B. Regenerate Label Image # We always regenerate to ensure Concat.jpg is consistent with any modifications - pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" + # pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" + pdf_path = content.get('pdf_path') # Contient le suffixe _new si nécessaire if not os.path.exists(pdf_path): continue (base_img, _, _) = annotating.make_base_image(pdf_path) diff --git a/reading_grouped_annotations.py b/reading_grouped_annotations.py index 27a2808..d4d5b44 100644 --- a/reading_grouped_annotations.py +++ b/reading_grouped_annotations.py @@ -167,7 +167,8 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, result = content['result'] d_notes[label] = str(result.get('score', 0)) - pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" + # pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" + pdf_path = content.get('pdf_path') if not os.path.exists(pdf_path): continue (base_img, _, _) = annotating.make_base_image(pdf_path)