diff --git a/annotating.py b/annotating.py index d5ce46f..babb15c 100644 --- a/annotating.py +++ b/annotating.py @@ -301,57 +301,61 @@ def color(score): green = 150 * t return mcolors.to_hex((red/255, green/255, 0)) -def render_score_text(label, score, error, width_px, fontsize=18, +from PIL import Image, ImageDraw, ImageFont + +def render_score_text(label, score, error, width_px, fontsize=30, bg_color=(255, 255, 255, 255), with_error=True, id=None): - # 2. Build highlight-text String & Properties - # Wrap colored parts in < > - score_str = f"{label} Note : <{score}>" - hl_props = [{"color": color(score), "fontweight": "bold"}] + + # 1. Build text segments: (text, color, is_bold) + parts = [] + default_color = (0, 0, 0, 255) + + prefix = f"{id} " if id else "" + prefix += f"{label} Note : " + parts.append((prefix, default_color, False)) + + parts.append((str(score), color(score), True)) if error and error != "null" and with_error: - score_str += f" <{error}>" - hl_props.append({"color": "orange", "fontweight": "bold"}) + fontsize=18 + parts.append((" ", default_color, False)) + parts.append((str(error), "orange", True)) - if id: - score_str = f"{id} " + score_str + # 2. Setup Image + height_px = 80 # roughly matches fig_height=0.8 at 100 dpi + img = Image.new("RGBA", (int(width_px), height_px), bg_color) + draw = ImageDraw.Draw(img) - # 3. Wrap Text - dpi = 100 - fig_width = width_px / dpi - chars_per_line = int(fig_width * 10) + # 3. Load Fonts + try: + font_regular = ImageFont.truetype("DejaVuSans.ttf", fontsize) + font_bold = ImageFont.truetype("DejaVuSans-Bold.ttf", fontsize) + except IOError: + # Fallback for systems without specific TTFs readily available + print("here") + try: + font_regular = ImageFont.load_default(size=fontsize) # Pillow >= 10.1.0 + except TypeError: + print("there") + font_regular = ImageFont.load_default() + font_bold = font_regular - # fig_height = 0.4 + 0.2 - fig_height = 0.8 + # 4. Draw segments horizontally + x, y = int(width_px * 0.125), int(height_px * 0.2) - fig, ax = plt.subplots(figsize=(fig_width, fig_height), dpi=dpi) - ax.axis('off') + for text, text_color, is_bold in parts: + f = font_bold if is_bold else font_regular + draw.text((x, y), text, fill=text_color, font=f) - # Replaces plt.text - ax_text(0.02, 0.98, score_str, - fontsize=fontsize, - verticalalignment='top', - horizontalalignment='left', - highlight_textprops=hl_props, - ax=ax) + # Advance X position by the width of the drawn text + bbox = draw.textbbox((x, y), text, font=f) + x = bbox[2] - buf = io.BytesIO() - # Issues with tight bbox_inches. - # plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0.05, transparent=True) - plt.savefig(buf, format='png', pad_inches=0.05, transparent=True) - plt.close(fig) - buf.seek(0) - - img = Image.open(buf).convert("RGBA") - - # Apply background - final_img = Image.new("RGBA", img.size, bg_color) - final_img.alpha_composite(img) - - return final_img + return img def compose_label_image(base_img, label, result, hmin, - render_fn=render_latex_text, + render_fn=render_real_latex_text, draw_callback=None, with_error=True, with_empty=False, @@ -391,8 +395,7 @@ def compose_label_image(base_img, label, result, hmin, width = base_img.width // 2 else: width = base_img.width // 2 - 150 - img_score = render_score_text(label, score, error, width, - fontsize=18, with_error=with_error, + img_score = render_score_text(label, score, error, width, with_error=with_error, id=with_id) header_elements.append({"type": "score", "img": img_score, "data": result}) diff --git a/annotating_by_label.py b/annotating_by_label.py index 43bc9be..85bd491 100644 --- a/annotating_by_label.py +++ b/annotating_by_label.py @@ -10,16 +10,16 @@ from reportlab.pdfgen import canvas import annotating import annotating_with_checks +from utils import natural_key + # Roughly 10 A4 pages at 100 DPI -# MAX_HEIGHT_PX = 11690 -MAX_HEIGHT_PX = 17000 # Can be increased by 10%. -# MAX_HEIGHT_PX = 16000 +MAX_HEIGHT_PX = 18500 # Can be increased by 10%. def render_item(item): student_id, label, content = item pdf_path = content['pdf_path'] if not os.path.exists(pdf_path): - print("no pdf path") + print("no pdf path for ", pdf_path) return None base_img, _, _ = annotating.make_base_image(pdf_path) @@ -27,7 +27,6 @@ def render_item(item): final_img, header_h = annotating.compose_label_image( base_img, label, content['result'], content['coordinates'][0], - render_fn=annotating_with_checks.safe_render_latex, draw_callback=cb_renderer.callback, more_right=True, with_id=student_id @@ -137,10 +136,11 @@ def main(): for line in lines: labels = [l.strip() for l in line.split(',') if l.strip()] + safe_labels = [l.replace(":", "").strip() for l in line.split(',') if l.strip()] if not labels: continue - prefix = os.path.commonprefix(labels).strip() + prefix = os.path.commonprefix(safe_labels).strip() if not prefix: prefix = "Group" @@ -151,10 +151,7 @@ def main(): items_to_render.append((sid, lbl, lbls[lbl])) # Sort structurally: by student id and label - items_to_render.sort(key=lambda x: ( - annotating_with_checks.natural_key(x[0]), - annotating_with_checks.natural_key(x[1]) - )) + items_to_render.sort(key=lambda x: (natural_key(x[0]), natural_key(x[1]))) # Render images in parallel using the pre-existing lock & render function with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: rendered = list(executor.map(render_item, items_to_render)) diff --git a/annotating_with_checks.py b/annotating_with_checks.py index 56b1cae..d747baa 100644 --- a/annotating_with_checks.py +++ b/annotating_with_checks.py @@ -132,7 +132,6 @@ def process_student(args): # Render using the shared engine final_img, header_h = annotating.compose_label_image( base_img, label, content['result'], content['coordinates'][0], - render_fn=safe_render_latex, draw_callback=cb_renderer.callback ) if final_img == None: diff --git a/correction.py b/correction.py index bf82ec8..cf374a7 100644 --- a/correction.py +++ b/correction.py @@ -400,14 +400,22 @@ Here is a list of all possible lables. You need to answer with one of these : types.Part.from_text(text=prompt) ])] config = types.GenerateContentConfig(temperature=0.0) new_label = call_gemini_with_retries(MODEL_ID_flash, contents, config).strip().strip('"\'') - + if new_label not in all_labels: + print(f"\t\tCopie{pid} returned an incorrect label {new_label} from an initial wrong label {label}. Ignoring") + res["error"] = "wrg-lbl:cldtfix" + return [] + if new_label == label: + res["error"] ="" + return [] new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{new_label}.pdf" if new_pdf_path.exists(): print(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.") - res["error"] = f"wrong-label:{new_label}?" + res["error"] = f"wrg-lbl:{new_label}?exists" else: print(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.") shutil.move(str(pdf_path), str(new_pdf_path)) + # Since we moved the file, this Copie/label should not be taken + # into account in the future, I think idx = get_next_group_idx(INPUT_DIR, new_label) height = grouping.get_pdf_height(str(new_pdf_path)) grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], INPUT_DIR) @@ -444,10 +452,14 @@ Here is a list of all possible labels. You need to answer with a list one of the add_labels = [] print(f"\tHandling additional-answer for {pid} {label}") - some_present = False + keep_error = False for add_label in add_labels: if add_label == label: continue + if add_label not in all_labels: + print(f"\t\t Inexistent label from additional-answer processing {pid} {label}. Ignoring") + keep_error = True + continue new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{add_label}.pdf" if not new_pdf_path.exists(): shutil.copy(str(pdf_path), str(new_pdf_path)) @@ -459,11 +471,11 @@ Here is a list of all possible labels. You need to answer with a list one of the new_tasks.append((str(Path(INPUT_DIR) / add_label / f"Group_{idx+1}.jpg"), add_label, False)) else: - some_present = True + keep_error = True print(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}") - if not some_present: + if not keep_error: res["error"] = "" return new_tasks @@ -578,6 +590,11 @@ def process_single_task(task_tuple): with open(output_path, "w", encoding="utf-8") as f: json.dump(results, f, indent=2) + # To track progress + completed_tasks.append((file_path, label)) + with open(progress_path, "w", encoding="utf-8") as f: + json.dump(completed_tasks, f, indent=2) + except json.JSONDecodeError: print(f"Error decoding JSON for {file_path}", file=sys.stderr) except Exception as e: @@ -587,28 +604,29 @@ def process_single_task(task_tuple): errors_summary.append((error_msg, file_path)) return new_tasks -print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...") +if __name__ == "__main__": + print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...") -with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor: - futures = {executor.submit(process_single_task, task): task for task in tasks_to_process} + with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor: + futures = {executor.submit(process_single_task, task): task for task in tasks_to_process} - # Process tasks as they complete, allowing dynamic task addition - for future in concurrent.futures.as_completed(futures): - try: - new_generated_tasks = future.result() - if new_generated_tasks: - for new_task in new_generated_tasks: - futures[executor.submit(process_single_task, new_task)] = new_task - except Exception as e: - print(f"Exception during task execution: {e}", file=sys.stderr) + # Process tasks as they complete, allowing dynamic task addition + for future in concurrent.futures.as_completed(futures): + try: + new_generated_tasks = future.result() + if new_generated_tasks: + for new_task in new_generated_tasks: + futures[executor.submit(process_single_task, new_task)] = new_task + except Exception as e: + print(f"Exception during task execution: {e}", file=sys.stderr) -end_time = time.time() -print("Time elapsed : ", end_time - start_time) -print("Requests to pro / flash : ", pro_count, flash_count) -if errors_summary: - print("\n--- Summary of Exceptions ---", file=sys.stderr) - for (err, file) in errors_summary: - print(err, file=sys.stderr) - escaped_path = shlex.quote(str(file)) - print(f"Run : python correction.py {escaped_path}") + end_time = time.time() + print("Time elapsed : ", end_time - start_time) + print("Requests to pro / flash : ", pro_count, flash_count) + if errors_summary: + print("\n--- Summary of Exceptions ---", file=sys.stderr) + for (err, file) in errors_summary: + print(err, file=sys.stderr) + escaped_path = shlex.quote(str(file)) + print(f"Run : python correction.py {escaped_path}") diff --git a/gemini_for_labels.py b/gemini_for_labels.py index 6eefb4d..ffab75e 100644 --- a/gemini_for_labels.py +++ b/gemini_for_labels.py @@ -263,7 +263,11 @@ def process_copy_group(group_key, files): print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...") - for attempt in range(2): + attempt = -1 + while True: + attempt += 1 + if attempt > 0: + time.sleep(10 * attempt) try: contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels) @@ -278,15 +282,16 @@ def process_copy_group(group_key, files): name = annota.name if unknown: print(f"Error: {image_file.name} contained unknown labels: {unknown}") - if attempt == 0: - print("Retrying request...") - continue # Retry immediately + print("Retrying request...") + continue # Retry immediately if name not in valid_names_set: print(f"Error: {image_file.name} returned unknown name : {name}") if attempt == 0: print("Retrying request...") continue # Retry immediately + else: + name = "Unknown" # Save result with open(output_json, "w", encoding="utf-8") as f: @@ -305,7 +310,7 @@ def process_copy_group(group_key, files): # Run ThreadPool on GROUPS (Copies), not individual files # Each thread handles one student's full exam copy sequentially -with ThreadPoolExecutor(max_workers=8) as executor: +with ThreadPoolExecutor(max_workers=12) as executor: # Convert dict items to arguments for map # executor.map expects a function and an iterable. # We use a lambda or separate function to unpack the tuple if needed, diff --git a/giving_names.py b/giving_names.py index 342ad22..98a4654 100644 --- a/giving_names.py +++ b/giving_names.py @@ -68,7 +68,11 @@ def main(): dest_path = os.path.join(target_subdir, dest_folder_name) os.makedirs(dest_path, exist_ok=True) - print(f"Linking '{source_folder}' -> '{dest_path}'") + common = os.path.commonpath([source_folder, dest_path]) + s = os.path.relpath(source_folder, common) + d = os.path.relpath(dest_path, common) + + print(f"Linking '{s}' -> '{d}'") # Link configuration: (source_filename, dest_filename) links = [ diff --git a/plotting.py b/plotting.py index c29e9ba..35d52ab 100644 --- a/plotting.py +++ b/plotting.py @@ -5,6 +5,7 @@ import re import queue import subprocess import tkinter as tk +from tkinter import messagebox from pathlib import Path from PIL import Image, ImageDraw, ImageFont, ImageTk @@ -112,22 +113,27 @@ def worker_thread(base_dir, files_to_process, all_labels): nb_pages = json_schema["columns_per_file"][copie_part-1] if json_path.exists(): + # Read strictly for visualization purposes + bb_list = [] + json_name = "" try: - # Read strictly for visualization purposes with open(json_path, 'r') as f: json_result = json.load(f) - bb_list = json_result.get("list", []) - print(f"Buffering {img_path.name}...") + json_name = json_result.get("name", "") + except Exception as e: + print(f"Warning: {json_path.name} is malformed! Loading blank. {e}") + # We do NOT skip; we continue so the user can fix it in the GUI + try: + print(f"Buffering {img_path.name}...") pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages) - # Package metadata needed for final calculation later metadata = { "copie": copie, "part": copie_part, "schema": json_schema, - "name": json_result.get("name", "") + "name": json_name } image_queue.put((pil_image, json_path, metadata)) @@ -222,7 +228,6 @@ class ImageViewer: if self.is_viewing: print(f"Committing data for {self.current_json_path.name}...") - # --- CRITICAL CHANGE: Re-read JSON here to capture user edits --- try: with open(self.current_json_path, 'r') as f: current_data = json.load(f) @@ -242,7 +247,11 @@ class ImageViewer: self.accumulated_results["name"] = current_data["name"] except Exception as e: - print(f"Error re-reading/saving {self.current_json_path}: {e}") + # Warn user and STOP (do not advance to next image) + msg = f"Error reading {self.current_json_path.name}:\n\n{e}\n\nPlease press 'e' to fix it, then press Enter again." + print(msg) + messagebox.showerror("JSON Error", msg) + return # Abort advancement # Advance UI self.is_viewing = False diff --git a/reading_annotations.py b/reading_annotations.py index cc41380..dd0e326 100644 --- a/reading_annotations.py +++ b/reading_annotations.py @@ -321,7 +321,7 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye print(f" Saved regenerated Concat_F.jpg") from pathlib import Path -from utils import read_all_labelse +from utils import read_all_labels if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python reading_annotations.py