Small fixes ; Make annotating not threaded

master
Sébastien Miquel 2026-02-28 14:02:56 +01:00
parent be390cfbb1
commit e610c80a69
4 changed files with 56 additions and 35 deletions

View File

@ -124,6 +124,11 @@ def normalize_mathtext(text):
text = text.replace("\\\\", "\\") text = text.replace("\\\\", "\\")
text = text.replace("\\llbracket", "[\\![") text = text.replace("\\llbracket", "[\\![")
text = text.replace("\\rrbracket", "]\\!]") text = text.replace("\\rrbracket", "]\\!]")
text = text.replace("\\R", "\\mathbb{R}")
text = text.replace("\\N", "\\mathbb{N}")
text = text.replace("\\Z", "\\mathbb{Z}")
text = text.replace("\\C", "\\mathbb{C}")
text = text.replace("\\Q", "\\mathbb{Q}")
# Sometimes, Gemini doesn't escape enough. In the json, you should have \\f # Sometimes, Gemini doesn't escape enough. In the json, you should have \\f
text = text.replace('\f', r'\f') text = text.replace('\f', r'\f')
text = re.sub('\u0010', "", text) text = re.sub('\u0010', "", text)
@ -214,8 +219,10 @@ def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=N
final_img.alpha_composite(img) final_img.alpha_composite(img)
return final_img return final_img
import matplotlib.colors as mcolors
def render_score_text(label, score, error, width_px, fontsize=18, def render_score_text(label, score, error, width_px, fontsize=18,
bg_color=(255, 255, 255, 255) bg_color=(255, 255, 255, 255),
with_error=True): with_error=True):
# 1. Calculate Color Gradient (0.0=DarkRed -> 4.0=Green) # 1. Calculate Color Gradient (0.0=DarkRed -> 4.0=Green)
# Clamp score between 0 and 4 # Clamp score between 0 and 4
@ -303,7 +310,7 @@ def compose_label_image(base_img, label, result, hmin,
header_elements = [] header_elements = []
img_score = render_score_text(label, score, error, base_img.width // 2, img_score = render_score_text(label, score, error, base_img.width // 2,
fontsize=18, with_error) fontsize=18, with_error=with_error)
header_elements.append({"type": "score", "img": img_score, "data": result}) header_elements.append({"type": "score", "img": img_score, "data": result})
# Global Feedbacks # Global Feedbacks
@ -466,16 +473,22 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite):
def process_correction(root_dir, data, all_labels, overwrite=False): def process_correction(root_dir, data, all_labels, overwrite=False):
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
# Create a list of futures # # Create a list of futures
futures = [] # futures = []
for student_id, labels in sorted(data.items()): # for student_id, labels in sorted(data.items()):
futures.append( # futures.append(
executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite) # executor.submit(process_student, student_id, labels, root_dir, all_labels, overwrite)
) # )
# Wait for all threads to complete # # Wait for all threads to complete
concurrent.futures.wait(futures) # concurrent.futures.wait(futures)
# Ne pas thread cette applications
# 1. Il faut protéger les appels à matplotlib
# 2. tu vas perdre les erreurs
for student_id, labels in sorted(data.items()):
process_student(student_id, labels, root_dir, all_labels, overwrite)
import argparse import argparse
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -254,9 +254,9 @@ def process_single_task(task_tuple):
d_data = {l[0]: (l[1], l[2], l[3]) for l in group_data} d_data = {l[0]: (l[1], l[2], l[3]) for l in group_data}
total_height = group_data[-1][2] total_height = group_data[-1][2]
use_flash = n >= 4 or total_height <= 500 use_flash = n >= 4 or total_height <= 500
if not use_flash and limit is not None: if not use_flash:
with pro_lock: with pro_lock:
if pro_count < limit: if limit is None or pro_count < limit:
pro_count += 1 pro_count += 1
else: else:
# Limit reached, force switch to Flash # Limit reached, force switch to Flash
@ -268,9 +268,9 @@ def process_single_task(task_tuple):
try: try:
contents, config = generate_request(file_path, label) contents, config = generate_request(file_path, label)
if use_flash: if use_flash:
print(f"Asking Flash Gemini: {label} {file_path}") print(f"Asking Gemini Flash: {label} {group_name}")
else: else:
print(f"Asking Gemini: {label} {file_path}") print(f"Asking Gemini Pro : {label} {group_name}")
full_response_text = "" full_response_text = ""
# Assuming client is thread-safe (usually is). # Assuming client is thread-safe (usually is).
@ -286,18 +286,13 @@ def process_single_task(task_tuple):
# Parse JSON # Parse JSON
json_data = json.loads(full_response_text) json_data = json.loads(full_response_text)
if use_flash:
print(f"Gemini Flash answered for {file_path}")
else:
print(f"Gemini answered for {file_path}")
# print("Debug : ", json_data) # print("Debug : ", json_data)
# Ensure consistency of answer placements # Ensure consistency of answer placements
for p in json_data: for p in json_data:
pid = p["id"] pid = p["id"]
res = p["result"] res = p["result"]
if res["error"] != "": if res["error"] != "":
print("Error :", res["error"], "for Copie", pid, label, group_name) print("\tError :", res["error"], "for Copie", pid, label, group_name)
for f in res["feedback"]: for f in res["feedback"]:
b = f["box_2d"] b = f["box_2d"]
if b: if b:

View File

@ -180,33 +180,41 @@ def natural_key(text):
for path_str in args.input_paths: for path_str in args.input_paths:
input_arg = Path(path_str) input_arg = Path(path_str)
target_files = []
# 1. Determine which files to process
if input_arg.is_file(): if input_arg.is_file():
INPUT_DIR = input_arg.parent target_files = [input_arg]
elif input_arg.is_dir():
target_files = list(input_arg.glob("Copie*.pdf"))
if not target_files:
print(f"Warning: No Copie*.pdf files found in {input_arg}")
else:
print(f"Error: {input_arg} is not a valid file or directory.")
continue
# 2. Run the logic for all collected files
for target_file in target_files:
INPUT_DIR = target_file.parent
CUTLEFT_DIR = INPUT_DIR / 'Cutleft' CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
# Matches stem_01.jpg, stem_02.jpg, etc. # Matches stem_01.jpg, stem_02.jpg, etc.
found_files = sorted(list(CUTLEFT_DIR.glob(f"{input_arg.stem}_*.jpg")), found_files = sorted(
key=natural_key) CUTLEFT_DIR.glob(f"{target_file.stem}_*.jpg"),
key=natural_key
)
if found_files: if found_files:
image_files.extend(found_files) image_files.extend(found_files)
else: else:
print(f"Warning: No variants found for {input_arg.stem} in {CUTLEFT_DIR}") print(f"Warning: No variants found for {target_file.stem} in {CUTLEFT_DIR}")
elif input_arg.is_dir():
INPUT_DIR = input_arg
CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
image_files.extend(sorted(list(CUTLEFT_DIR.glob("*.jpg")), key=natural_key))
else:
print(f"Error: {input_arg} is not a valid file or directory.")
labels_txt = (INPUT_DIR / "labels").read_text() labels_txt = (INPUT_DIR / "labels").read_text()
valid_labels_set = set(line.strip() for line in labels_txt.splitlines() if line.strip()) valid_labels_set = set(line.strip() for line in labels_txt.splitlines() if line.strip())
names_txt = (INPUT_DIR / "names").read_text() names_txt = (INPUT_DIR / "names").read_text()
valid_names_set = set(line.strip() for line in names_txt.splitlines() if line.strip()) valid_names_set = set(line.strip() for line in names_txt.splitlines() if line.strip())
valid_names_set.add("Unknown") valid_names_set.add("Unknown")
valid_names_set.add("Continued")
client = genai.Client(api_key=api_key) client = genai.Client(api_key=api_key)
@ -256,7 +264,7 @@ def process_copy_group(group_key, files):
print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...") print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...")
for attempt in range(2) for attempt in range(2):
try: try:
contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels) contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels)

View File

@ -16,7 +16,12 @@ def detect_checks_and_notes(output_dir):
actions: List of dicts {type, label, ...} for checked boxes actions: List of dicts {type, label, ...} for checked boxes
notes_img: RGBA image of manual notes (checks masked out) notes_img: RGBA image of manual notes (checks masked out)
""" """
pdf_path = os.path.join(output_dir, "Concat_annotated.pdf")
names = ["Concat_annotated.pdf", "Concat_a.pdf"]
for name in names:
pdf_path = os.path.join(output_dir, name)
if os.path.exists(pdf_path):
break
# ref_path = os.path.join(output_dir, "Reference.png") # ref_path = os.path.join(output_dir, "Reference.png")
ref_path = os.path.join(output_dir, "Reference.jpg") ref_path = os.path.join(output_dir, "Reference.jpg")
json_path = os.path.join(output_dir, "checkboxes.json") json_path = os.path.join(output_dir, "checkboxes.json")