diff --git a/annotating.py b/annotating.py index e5e1fa9..dea9370 100644 --- a/annotating.py +++ b/annotating.py @@ -124,6 +124,11 @@ def normalize_mathtext(text): text = text.replace("\\\\", "\\") text = text.replace("\\llbracket", "[\\![") text = text.replace("\\rrbracket", "]\\!]") + text = text.replace("\\R", "\\mathbb{R}") + text = text.replace("\\N", "\\mathbb{N}") + text = text.replace("\\Z", "\\mathbb{Z}") + text = text.replace("\\C", "\\mathbb{C}") + text = text.replace("\\Q", "\\mathbb{Q}") # Sometimes, Gemini doesn't escape enough. In the json, you should have \\f text = text.replace('\f', r'\f') text = re.sub('\u0010', "", text) @@ -214,8 +219,10 @@ def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=N final_img.alpha_composite(img) return final_img +import matplotlib.colors as mcolors + def render_score_text(label, score, error, width_px, fontsize=18, - bg_color=(255, 255, 255, 255) + bg_color=(255, 255, 255, 255), with_error=True): # 1. Calculate Color Gradient (0.0=DarkRed -> 4.0=Green) # Clamp score between 0 and 4 @@ -303,7 +310,7 @@ def compose_label_image(base_img, label, result, hmin, header_elements = [] img_score = render_score_text(label, score, error, base_img.width // 2, - fontsize=18, with_error) + fontsize=18, with_error=with_error) header_elements.append({"type": "score", "img": img_score, "data": result}) # Global Feedbacks @@ -466,16 +473,22 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite): def process_correction(root_dir, data, all_labels, overwrite=False): - with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: - # Create a list of futures - futures = [] - for student_id, labels in sorted(data.items()): - futures.append( - executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite) - ) + # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + # # Create a list of futures + # futures = [] + # for student_id, labels in sorted(data.items()): + # futures.append( + # executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite) + # ) - # Wait for all threads to complete - concurrent.futures.wait(futures) + # # Wait for all threads to complete + # concurrent.futures.wait(futures) + + # Ne pas thread cette applications + # 1. Il faut protéger les appels à matplotlib + # 2. tu vas perdre les erreurs + for student_id, labels in sorted(data.items()): + process_student(student_id, labels, root_dir, all_labels, overwrite) import argparse if __name__ == "__main__": diff --git a/correction.py b/correction.py index 793bf35..7d31f64 100644 --- a/correction.py +++ b/correction.py @@ -254,9 +254,9 @@ def process_single_task(task_tuple): d_data = {l[0]: (l[1], l[2], l[3]) for l in group_data} total_height = group_data[-1][2] use_flash = n >= 4 or total_height <= 500 - if not use_flash and limit is not None: + if not use_flash: with pro_lock: - if pro_count < limit: + if limit is None or pro_count < limit: pro_count += 1 else: # Limit reached, force switch to Flash @@ -268,9 +268,9 @@ def process_single_task(task_tuple): try: contents, config = generate_request(file_path, label) if use_flash: - print(f"Asking Flash Gemini: {label} {file_path}") + print(f"Asking Gemini Flash: {label} {group_name}") else: - print(f"Asking Gemini: {label} {file_path}") + print(f"Asking Gemini Pro : {label} {group_name}") full_response_text = "" # Assuming client is thread-safe (usually is). @@ -286,18 +286,13 @@ def process_single_task(task_tuple): # Parse JSON json_data = json.loads(full_response_text) - if use_flash: - print(f"Gemini Flash answered for {file_path}") - else: - print(f"Gemini answered for {file_path}") - # print("Debug : ", json_data) # Ensure consistency of answer placements for p in json_data: pid = p["id"] res = p["result"] if res["error"] != "": - print("Error :", res["error"], "for Copie", pid, label, group_name) + print("\tError :", res["error"], "for Copie", pid, label, group_name) for f in res["feedback"]: b = f["box_2d"] if b: diff --git a/gemini_for_labels.py b/gemini_for_labels.py index c87895a..5e77b64 100644 --- a/gemini_for_labels.py +++ b/gemini_for_labels.py @@ -180,33 +180,41 @@ def natural_key(text): for path_str in args.input_paths: input_arg = Path(path_str) + target_files = [] + # 1. Determine which files to process if input_arg.is_file(): - INPUT_DIR = input_arg.parent + target_files = [input_arg] + elif input_arg.is_dir(): + target_files = list(input_arg.glob("Copie*.pdf")) + if not target_files: + print(f"Warning: No Copie*.pdf files found in {input_arg}") + else: + print(f"Error: {input_arg} is not a valid file or directory.") + continue + + # 2. Run the logic for all collected files + for target_file in target_files: + INPUT_DIR = target_file.parent CUTLEFT_DIR = INPUT_DIR / 'Cutleft' # Matches stem_01.jpg, stem_02.jpg, etc. - found_files = sorted(list(CUTLEFT_DIR.glob(f"{input_arg.stem}_*.jpg")), - key=natural_key) + found_files = sorted( + CUTLEFT_DIR.glob(f"{target_file.stem}_*.jpg"), + key=natural_key + ) if found_files: image_files.extend(found_files) else: - print(f"Warning: No variants found for {input_arg.stem} in {CUTLEFT_DIR}") - - elif input_arg.is_dir(): - INPUT_DIR = input_arg - CUTLEFT_DIR = INPUT_DIR / 'Cutleft' - image_files.extend(sorted(list(CUTLEFT_DIR.glob("*.jpg")), key=natural_key)) - - else: - print(f"Error: {input_arg} is not a valid file or directory.") + print(f"Warning: No variants found for {target_file.stem} in {CUTLEFT_DIR}") labels_txt = (INPUT_DIR / "labels").read_text() valid_labels_set = set(line.strip() for line in labels_txt.splitlines() if line.strip()) names_txt = (INPUT_DIR / "names").read_text() valid_names_set = set(line.strip() for line in names_txt.splitlines() if line.strip()) valid_names_set.add("Unknown") +valid_names_set.add("Continued") client = genai.Client(api_key=api_key) @@ -256,7 +264,7 @@ def process_copy_group(group_key, files): print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...") - for attempt in range(2) + for attempt in range(2): try: contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels) diff --git a/reading_annotations.py b/reading_annotations.py index 74303ed..2a15f79 100644 --- a/reading_annotations.py +++ b/reading_annotations.py @@ -16,7 +16,12 @@ def detect_checks_and_notes(output_dir): actions: List of dicts {type, label, ...} for checked boxes notes_img: RGBA image of manual notes (checks masked out) """ - pdf_path = os.path.join(output_dir, "Concat_annotated.pdf") + + names = ["Concat_annotated.pdf", "Concat_a.pdf"] + for name in names: + pdf_path = os.path.join(output_dir, name) + if os.path.exists(pdf_path): + break # ref_path = os.path.join(output_dir, "Reference.png") ref_path = os.path.join(output_dir, "Reference.jpg") json_path = os.path.join(output_dir, "checkboxes.json")