Miscs changes (Interro22)

master
Sébastien Miquel 2026-03-17 14:19:43 +01:00
parent e703662d9e
commit ebc7a9aadc
10 changed files with 129 additions and 95 deletions

View File

@ -301,57 +301,61 @@ def color(score):
green = 150 * t green = 150 * t
return mcolors.to_hex((red/255, green/255, 0)) return mcolors.to_hex((red/255, green/255, 0))
def render_score_text(label, score, error, width_px, fontsize=18, from PIL import Image, ImageDraw, ImageFont
def render_score_text(label, score, error, width_px, fontsize=30,
bg_color=(255, 255, 255, 255), bg_color=(255, 255, 255, 255),
with_error=True, id=None): with_error=True, id=None):
# 2. Build highlight-text String & Properties
# Wrap colored parts in < > # 1. Build text segments: (text, color, is_bold)
score_str = f"{label} Note : <{score}>" parts = []
hl_props = [{"color": color(score), "fontweight": "bold"}] default_color = (0, 0, 0, 255)
prefix = f"{id} " if id else ""
prefix += f"{label} Note : "
parts.append((prefix, default_color, False))
parts.append((str(score), color(score), True))
if error and error != "null" and with_error: if error and error != "null" and with_error:
score_str += f" <{error}>" fontsize=18
hl_props.append({"color": "orange", "fontweight": "bold"}) parts.append((" ", default_color, False))
parts.append((str(error), "orange", True))
if id: # 2. Setup Image
score_str = f"{id} " + score_str height_px = 80 # roughly matches fig_height=0.8 at 100 dpi
img = Image.new("RGBA", (int(width_px), height_px), bg_color)
draw = ImageDraw.Draw(img)
# 3. Wrap Text # 3. Load Fonts
dpi = 100 try:
fig_width = width_px / dpi font_regular = ImageFont.truetype("DejaVuSans.ttf", fontsize)
chars_per_line = int(fig_width * 10) font_bold = ImageFont.truetype("DejaVuSans-Bold.ttf", fontsize)
except IOError:
# Fallback for systems without specific TTFs readily available
print("here")
try:
font_regular = ImageFont.load_default(size=fontsize) # Pillow >= 10.1.0
except TypeError:
print("there")
font_regular = ImageFont.load_default()
font_bold = font_regular
# fig_height = 0.4 + 0.2 # 4. Draw segments horizontally
fig_height = 0.8 x, y = int(width_px * 0.125), int(height_px * 0.2)
fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=dpi) for text, text_color, is_bold in parts:
ax.axis('off') f = font_bold if is_bold else font_regular
draw.text((x, y), text, fill=text_color, font=f)
# Replaces plt.text # Advance X position by the width of the drawn text
ax_text(0.02, 0.98, score_str, bbox = draw.textbbox((x, y), text, font=f)
fontsize=fontsize, x = bbox[2]
verticalalignment='top',
horizontalalignment='left',
highlight_textprops=hl_props,
ax=ax)
buf = io.BytesIO() return img
# Issues with tight bbox_inches.
# plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0.05, transparent=True)
plt.savefig(buf, format='png', pad_inches=0.05, transparent=True)
plt.close(fig)
buf.seek(0)
img = Image.open(buf).convert("RGBA")
# Apply background
final_img = Image.new("RGBA", img.size, bg_color)
final_img.alpha_composite(img)
return final_img
def compose_label_image(base_img, label, result, hmin, def compose_label_image(base_img, label, result, hmin,
render_fn=render_latex_text, render_fn=render_real_latex_text,
draw_callback=None, draw_callback=None,
with_error=True, with_error=True,
with_empty=False, with_empty=False,
@ -391,8 +395,7 @@ def compose_label_image(base_img, label, result, hmin,
width = base_img.width // 2 width = base_img.width // 2
else: else:
width = base_img.width // 2 - 150 width = base_img.width // 2 - 150
img_score = render_score_text(label, score, error, width, img_score = render_score_text(label, score, error, width, with_error=with_error,
fontsize=18, with_error=with_error,
id=with_id) id=with_id)
header_elements.append({"type": "score", "img": img_score, "data": result}) header_elements.append({"type": "score", "img": img_score, "data": result})

View File

@ -10,16 +10,16 @@ from reportlab.pdfgen import canvas
import annotating import annotating
import annotating_with_checks import annotating_with_checks
from utils import natural_key
# Roughly 10 A4 pages at 100 DPI # Roughly 10 A4 pages at 100 DPI
# MAX_HEIGHT_PX = 11690 MAX_HEIGHT_PX = 18500 # Can be increased by 10%.
MAX_HEIGHT_PX = 17000 # Can be increased by 10%.
# MAX_HEIGHT_PX = 16000
def render_item(item): def render_item(item):
student_id, label, content = item student_id, label, content = item
pdf_path = content['pdf_path'] pdf_path = content['pdf_path']
if not os.path.exists(pdf_path): if not os.path.exists(pdf_path):
print("no pdf path") print("no pdf path for ", pdf_path)
return None return None
base_img, _, _ = annotating.make_base_image(pdf_path) base_img, _, _ = annotating.make_base_image(pdf_path)
@ -27,7 +27,6 @@ def render_item(item):
final_img, header_h = annotating.compose_label_image( final_img, header_h = annotating.compose_label_image(
base_img, label, content['result'], content['coordinates'][0], base_img, label, content['result'], content['coordinates'][0],
render_fn=annotating_with_checks.safe_render_latex,
draw_callback=cb_renderer.callback, draw_callback=cb_renderer.callback,
more_right=True, more_right=True,
with_id=student_id with_id=student_id
@ -137,10 +136,11 @@ def main():
for line in lines: for line in lines:
labels = [l.strip() for l in line.split(',') if l.strip()] labels = [l.strip() for l in line.split(',') if l.strip()]
safe_labels = [l.replace(":", "").strip() for l in line.split(',') if l.strip()]
if not labels: if not labels:
continue continue
prefix = os.path.commonprefix(labels).strip() prefix = os.path.commonprefix(safe_labels).strip()
if not prefix: if not prefix:
prefix = "Group" prefix = "Group"
@ -151,10 +151,7 @@ def main():
items_to_render.append((sid, lbl, lbls[lbl])) items_to_render.append((sid, lbl, lbls[lbl]))
# Sort structurally: by student id and label # Sort structurally: by student id and label
items_to_render.sort(key=lambda x: ( items_to_render.sort(key=lambda x: (natural_key(x[0]), natural_key(x[1])))
annotating_with_checks.natural_key(x[0]),
annotating_with_checks.natural_key(x[1])
))
# Render images in parallel using the pre-existing lock & render function # Render images in parallel using the pre-existing lock & render function
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
rendered = list(executor.map(render_item, items_to_render)) rendered = list(executor.map(render_item, items_to_render))

View File

@ -132,7 +132,6 @@ def process_student(args):
# Render using the shared engine # Render using the shared engine
final_img, header_h = annotating.compose_label_image( final_img, header_h = annotating.compose_label_image(
base_img, label, content['result'], content['coordinates'][0], base_img, label, content['result'], content['coordinates'][0],
render_fn=safe_render_latex,
draw_callback=cb_renderer.callback draw_callback=cb_renderer.callback
) )
if final_img == None: if final_img == None:

View File

@ -400,14 +400,22 @@ Here is a list of all possible lables. You need to answer with one of these :
types.Part.from_text(text=prompt) ])] types.Part.from_text(text=prompt) ])]
config = types.GenerateContentConfig(temperature=0.0) config = types.GenerateContentConfig(temperature=0.0)
new_label = call_gemini_with_retries(MODEL_ID_flash, contents, config).strip().strip('"\'') new_label = call_gemini_with_retries(MODEL_ID_flash, contents, config).strip().strip('"\'')
if new_label not in all_labels:
print(f"\t\tCopie{pid} returned an incorrect label {new_label} from an initial wrong label {label}. Ignoring")
res["error"] = "wrg-lbl:cldtfix"
return []
if new_label == label:
res["error"] =""
return []
new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{new_label}.pdf" new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{new_label}.pdf"
if new_pdf_path.exists(): if new_pdf_path.exists():
print(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.") print(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
res["error"] = f"wrong-label:{new_label}?" res["error"] = f"wrg-lbl:{new_label}?exists"
else: else:
print(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.") print(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.")
shutil.move(str(pdf_path), str(new_pdf_path)) shutil.move(str(pdf_path), str(new_pdf_path))
# Since we moved the file, this Copie/label should not be taken
# into account in the future, I think
idx = get_next_group_idx(INPUT_DIR, new_label) idx = get_next_group_idx(INPUT_DIR, new_label)
height = grouping.get_pdf_height(str(new_pdf_path)) height = grouping.get_pdf_height(str(new_pdf_path))
grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], INPUT_DIR) grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], INPUT_DIR)
@ -444,10 +452,14 @@ Here is a list of all possible labels. You need to answer with a list one of the
add_labels = [] add_labels = []
print(f"\tHandling additional-answer for {pid} {label}") print(f"\tHandling additional-answer for {pid} {label}")
some_present = False keep_error = False
for add_label in add_labels: for add_label in add_labels:
if add_label == label: if add_label == label:
continue continue
if add_label not in all_labels:
print(f"\t\t Inexistent label from additional-answer processing {pid} {label}. Ignoring")
keep_error = True
continue
new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{add_label}.pdf" new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{add_label}.pdf"
if not new_pdf_path.exists(): if not new_pdf_path.exists():
shutil.copy(str(pdf_path), str(new_pdf_path)) shutil.copy(str(pdf_path), str(new_pdf_path))
@ -459,11 +471,11 @@ Here is a list of all possible labels. You need to answer with a list one of the
new_tasks.append((str(Path(INPUT_DIR) / add_label / f"Group_{idx+1}.jpg"), new_tasks.append((str(Path(INPUT_DIR) / add_label / f"Group_{idx+1}.jpg"),
add_label, False)) add_label, False))
else: else:
some_present = True keep_error = True
print(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}") print(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}")
if not some_present: if not keep_error:
res["error"] = "" res["error"] = ""
return new_tasks return new_tasks
@ -578,6 +590,11 @@ def process_single_task(task_tuple):
with open(output_path, "w", encoding="utf-8") as f: with open(output_path, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2) json.dump(results, f, indent=2)
# To track progress
completed_tasks.append((file_path, label))
with open(progress_path, "w", encoding="utf-8") as f:
json.dump(completed_tasks, f, indent=2)
except json.JSONDecodeError: except json.JSONDecodeError:
print(f"Error decoding JSON for {file_path}", file=sys.stderr) print(f"Error decoding JSON for {file_path}", file=sys.stderr)
except Exception as e: except Exception as e:
@ -587,28 +604,29 @@ def process_single_task(task_tuple):
errors_summary.append((error_msg, file_path)) errors_summary.append((error_msg, file_path))
return new_tasks return new_tasks
print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...") if __name__ == "__main__":
print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...")
with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor:
futures = {executor.submit(process_single_task, task): task for task in tasks_to_process} futures = {executor.submit(process_single_task, task): task for task in tasks_to_process}
# Process tasks as they complete, allowing dynamic task addition # Process tasks as they complete, allowing dynamic task addition
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
try: try:
new_generated_tasks = future.result() new_generated_tasks = future.result()
if new_generated_tasks: if new_generated_tasks:
for new_task in new_generated_tasks: for new_task in new_generated_tasks:
futures[executor.submit(process_single_task, new_task)] = new_task futures[executor.submit(process_single_task, new_task)] = new_task
except Exception as e: except Exception as e:
print(f"Exception during task execution: {e}", file=sys.stderr) print(f"Exception during task execution: {e}", file=sys.stderr)
end_time = time.time() end_time = time.time()
print("Time elapsed : ", end_time - start_time) print("Time elapsed : ", end_time - start_time)
print("Requests to pro / flash : ", pro_count, flash_count) print("Requests to pro / flash : ", pro_count, flash_count)
if errors_summary: if errors_summary:
print("\n--- Summary of Exceptions ---", file=sys.stderr) print("\n--- Summary of Exceptions ---", file=sys.stderr)
for (err, file) in errors_summary: for (err, file) in errors_summary:
print(err, file=sys.stderr) print(err, file=sys.stderr)
escaped_path = shlex.quote(str(file)) escaped_path = shlex.quote(str(file))
print(f"Run : python correction.py {escaped_path}") print(f"Run : python correction.py {escaped_path}")

View File

@ -263,7 +263,11 @@ def process_copy_group(group_key, files):
print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...") print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...")
for attempt in range(2): attempt = -1
while True:
attempt += 1
if attempt > 0:
time.sleep(10 * attempt)
try: try:
contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels) contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels)
@ -278,15 +282,16 @@ def process_copy_group(group_key, files):
name = annota.name name = annota.name
if unknown: if unknown:
print(f"Error: {image_file.name} contained unknown labels: {unknown}") print(f"Error: {image_file.name} contained unknown labels: {unknown}")
if attempt == 0: print("Retrying request...")
print("Retrying request...") continue # Retry immediately
continue # Retry immediately
if name not in valid_names_set: if name not in valid_names_set:
print(f"Error: {image_file.name} returned unknown name : {name}") print(f"Error: {image_file.name} returned unknown name : {name}")
if attempt == 0: if attempt == 0:
print("Retrying request...") print("Retrying request...")
continue # Retry immediately continue # Retry immediately
else:
name = "Unknown"
# Save result # Save result
with open(output_json, "w", encoding="utf-8") as f: with open(output_json, "w", encoding="utf-8") as f:
@ -305,7 +310,7 @@ def process_copy_group(group_key, files):
# Run ThreadPool on GROUPS (Copies), not individual files # Run ThreadPool on GROUPS (Copies), not individual files
# Each thread handles one student's full exam copy sequentially # Each thread handles one student's full exam copy sequentially
with ThreadPoolExecutor(max_workers=8) as executor: with ThreadPoolExecutor(max_workers=12) as executor:
# Convert dict items to arguments for map # Convert dict items to arguments for map
# executor.map expects a function and an iterable. # executor.map expects a function and an iterable.
# We use a lambda or separate function to unpack the tuple if needed, # We use a lambda or separate function to unpack the tuple if needed,

View File

@ -68,7 +68,11 @@ def main():
dest_path = os.path.join(target_subdir, dest_folder_name) dest_path = os.path.join(target_subdir, dest_folder_name)
os.makedirs(dest_path, exist_ok=True) os.makedirs(dest_path, exist_ok=True)
print(f"Linking '{source_folder}' -> '{dest_path}'") common = os.path.commonpath([source_folder, dest_path])
s = os.path.relpath(source_folder, common)
d = os.path.relpath(dest_path, common)
print(f"Linking '{s}' -> '{d}'")
# Link configuration: (source_filename, dest_filename) # Link configuration: (source_filename, dest_filename)
links = [ links = [

View File

@ -5,6 +5,7 @@ import re
import queue import queue
import subprocess import subprocess
import tkinter as tk import tkinter as tk
from tkinter import messagebox
from pathlib import Path from pathlib import Path
from PIL import Image, ImageDraw, ImageFont, ImageTk from PIL import Image, ImageDraw, ImageFont, ImageTk
@ -112,22 +113,27 @@ def worker_thread(base_dir, files_to_process, all_labels):
nb_pages = json_schema["columns_per_file"][copie_part-1] nb_pages = json_schema["columns_per_file"][copie_part-1]
if json_path.exists(): if json_path.exists():
# Read strictly for visualization purposes
bb_list = []
json_name = ""
try: try:
# Read strictly for visualization purposes
with open(json_path, 'r') as f: with open(json_path, 'r') as f:
json_result = json.load(f) json_result = json.load(f)
bb_list = json_result.get("list", []) bb_list = json_result.get("list", [])
print(f"Buffering {img_path.name}...") json_name = json_result.get("name", "")
except Exception as e:
print(f"Warning: {json_path.name} is malformed! Loading blank. {e}")
# We do NOT skip; we continue so the user can fix it in the GUI
try:
print(f"Buffering {img_path.name}...")
pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages) pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages)
# Package metadata needed for final calculation later
metadata = { metadata = {
"copie": copie, "copie": copie,
"part": copie_part, "part": copie_part,
"schema": json_schema, "schema": json_schema,
"name": json_result.get("name", "") "name": json_name
} }
image_queue.put((pil_image, json_path, metadata)) image_queue.put((pil_image, json_path, metadata))
@ -222,7 +228,6 @@ class ImageViewer:
if self.is_viewing: if self.is_viewing:
print(f"Committing data for {self.current_json_path.name}...") print(f"Committing data for {self.current_json_path.name}...")
# --- CRITICAL CHANGE: Re-read JSON here to capture user edits ---
try: try:
with open(self.current_json_path, 'r') as f: with open(self.current_json_path, 'r') as f:
current_data = json.load(f) current_data = json.load(f)
@ -242,7 +247,11 @@ class ImageViewer:
self.accumulated_results["name"] = current_data["name"] self.accumulated_results["name"] = current_data["name"]
except Exception as e: except Exception as e:
print(f"Error re-reading/saving {self.current_json_path}: {e}") # Warn user and STOP (do not advance to next image)
msg = f"Error reading {self.current_json_path.name}:\n\n{e}\n\nPlease press 'e' to fix it, then press Enter again."
print(msg)
messagebox.showerror("JSON Error", msg)
return # Abort advancement
# Advance UI # Advance UI
self.is_viewing = False self.is_viewing = False

View File

@ -321,7 +321,7 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye
print(f" Saved regenerated Concat_F.jpg") print(f" Saved regenerated Concat_F.jpg")
from pathlib import Path from pathlib import Path
from utils import read_all_labelse from utils import read_all_labels
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: python reading_annotations.py <Dir>") print("Usage: python reading_annotations.py <Dir>")

View File

@ -7,7 +7,8 @@ from pathlib import Path
from PIL import Image from PIL import Image
import annotating import annotating
from annotating_with_checks import natural_key
from utils import natural_key
from reading_annotations import detect_checks_and_notes, has_significant_notes from reading_annotations import detect_checks_and_notes, has_significant_notes
def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, label_notes, all_labels): def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, label_notes, all_labels):

View File

@ -8,8 +8,6 @@ import shutil
from pathlib import Path from pathlib import Path
from collections import defaultdict from collections import defaultdict
# input_pdf = "Une Interro/Split.pdf"
def decode_json(pdf_file): def decode_json(pdf_file):
file_path = Path(pdf_file) file_path = Path(pdf_file)
with open(file_path.with_suffix(".json"), "r") as f: with open(file_path.with_suffix(".json"), "r") as f: