import sys import os import json import glob from pathlib import Path import subprocess from PIL import Image MARGIN_LEFT = 300 ANNOT_WIDTH = 600 # Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates} # Coordinates are the real coordinates (hmin, hmax) of the image in the Group # The gemini_result coordinates should be un-normalized ! def make_dictionary(root_dir, refaire=False, refaire_list=[]): correction_path = os.path.join(root_dir, "correction.json") # Load correction data try: with open(correction_path, 'r', encoding='utf-8') as f: corrections = json.load(f) except FileNotFoundError: print(f"Error: {correction_path} not found.") sys.exit(1) # Dictionary: keys are IDs result_data = {} # Iterate through labels and items in correction.json for label, items in corrections.items(): items = sum(items, []) # Flatten for item in items: # print(item) student_id = item['id'] result_obj = item['result'] if result_obj.get("suffix") == "_old": continue # Find coordinates coordinates = None height,width= None, None label_dir = Path(root_dir) / "Par label" / label # Search all json files in Dir/label json_files = glob.glob(os.path.join(label_dir, "*.json")) for jf in json_files: try: with open(jf, 'r', encoding='utf-8') as f: coord_list = json.load(f) # Format: [["id", x, y, width_r, "label"], ...] for entry in coord_list: if entry[0] == student_id: coordinates = (entry[1], entry[2]) img_path = os.path.splitext(jf)[0] + ".jpg" with Image.open(img_path) as img: width, height = img.size break except json.JSONDecodeError: continue if coordinates: break suffix = result_obj.get("suffix", "") if suffix == "_new": pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}_new.pdf" else: pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf" # Initialize dictionary structure for this ID if missing if student_id not in result_data: result_data[student_id] = {} fb = result_obj.get("feedback", []) for i in range(len(fb)): el = fb[i] if "box_2d" in el and el["box_2d"]: el["box_2d"][0] = (el["box_2d"][0] * height)//1000 el["box_2d"][2] = (el["box_2d"][2] * height)//1000 el["box_2d"][1] = (el["box_2d"][1] * width)//1000 el["box_2d"][3] = (el["box_2d"][3] * width)//1000 # Populate the object result_data[student_id][label] = { "pdf_path": pdf_path, "result": result_obj, "coordinates": coordinates } if refaire: for copie_name, labels_to_redo in refaire_list: sid = copie_name.replace("Copie", "") # Extract "01" from "Copie01" if sid in result_data: # Si des labels à refaire ne sont pas présent dans la correction # On ajoute des dummies if labels_to_redo: # Si la liste est non vide for lbl in labels_to_redo: pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf" if not Path(pdf_path).exists(): pdf_path_new = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}_new.pdf" if pdf_path_new.exists(): pdf_path = pdf_path_new else: print("Debug : asked to refaire", sid, lbl, "but pdf absent") continue result_data[sid][lbl] = { "pdf_path": pdf_path, "result": { "score": 0.0, "confidence": 1.0, "feedback": [], "error": "non traité" }, "coordinates": (0,0) } else: # Ce student id n'a jamais été corrigé result_data[sid] = {} for lbl in labels_to_redo: pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf" if not pdf_path.exists(): pdf_path_new = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}_new.pdf" if pdf_path_new.exists(): pdf_path = pdf_path_new else: print("Debug : asked to refaire", sid, lbl, "but pdf absent") continue result_data[sid][lbl] = { "pdf_path": pdf_path, "result": { "score": 0.0, "confidence": 1.0, "feedback": [], "error": "non traité" }, "coordinates": (0,0) } return result_data def make_base_image(pdf_path): pages = convert_from_path(pdf_path) # Calculate total dimensions total_h = sum(page.height for page in pages) max_w = max(page.width for page in pages) # Create concatenated base image base_img = Image.new("RGBA", (max_w, total_h), "white") current_y = 0 for page in pages: base_img.paste(page.convert("RGBA"), (0, current_y)) current_y += page.height return (base_img, total_h, max_w) import io import shutil from pdf2image import convert_from_path from PIL import Image, ImageDraw, ImageFont import matplotlib matplotlib.use('Agg') # Force headless rendering import matplotlib.pyplot as plt # plt.rcParams.update({ "text.usetex": True, # "text.latex.preamble": r"\usepackage{bbold}"}) import re import textwrap def normalize_mathtext(text): """ Replaces LaTeX shortcuts not supported by Matplotlib's mathtext parser. e.g. \\le -> \\leq, \\ge -> \\geq Using lookahead (?![a-zA-Z]) prevents replacing \\left with \\leqft. """ text = re.sub(r'\\le(?![a-zA-Z])', r'\\leq', text) text = re.sub(r'\\ge(?![a-zA-Z])', r'\\geq', text) text = re.sub(r'\\implies', r'\\Rightarrow', text) # Sometimes, Gemini escapes too much ? Not sure text = text.replace("\\\\", "\\") text = text.replace("\\llbracket", "[\\![") text = text.replace("\\rrbracket", "]\\!]") text = text.replace("\\R", "\\mathbb{R}") text = text.replace("\\N", "\\mathbb{N}") text = text.replace("\\Z", "\\mathbb{Z}") text = text.replace("\\C", "\\mathbb{C}") text = text.replace("\\Q", "\\mathbb{Q}") # Sometimes, Gemini doesn't escape enough. In the json, you should have \\f text = text.replace('\f', r'\f') text = re.sub('\u0010', "", text) return text def wrap_latex_text(text, width_chars): """ Wraps text but keeps LaTeX math blocks ($...$) intact. """ # 1. Split text into chunks of: text, math, text, math... # The regex looks for $...$ (non-greedy). parts = re.split(r'(\$[^\$]+\$)', text) # 2. Tokenize: Break plain text by spaces, keep math blocks whole. tokens = [] for part in parts: if part.startswith('$') and part.endswith('$'): tokens.append(part) # Keep math block distinct else: tokens.extend(part.split()) # Split normal text by whitespace # 3. Reconstruct lines using textwrap logic lines = [] current_line = [] current_length = 0 for token in tokens: # +1 for the space we will add token_len = len(token) if current_length + token_len + 1 > width_chars: lines.append(" ".join(current_line)) current_line = [token] current_length = token_len else: current_line.append(token) current_length += token_len + 1 if current_line: lines.append(" ".join(current_line)) res = "\n".join(lines) return res def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=None, fontsize=14): # 1. Fix unsupported symbols text = normalize_mathtext(text) dpi = 100 fig_width = width_px / dpi # Estimate characters per line based on width and font size (heuristic) # FontSize 12 approx 0.5 inches wide for ~15 chars usually, # but let's approximate: Width (inches) * ~10 chars/inch for size 12 chars_per_line = int(fig_width * 10) # Pre-wrap the text respecting LaTeX boundaries wrapped_text = wrap_latex_text(text, chars_per_line) # Dynamic height based on actual number of lines num_lines = wrapped_text.count('\n') + 1 if max_lines and num_lines > max_lines: # logic to truncate if strictly necessary, or just expand pass # 0.3 inches per line buffer fig_height = num_lines * 0.3 + 0.2 fig = plt.figure(figsize=(fig_width, fig_height), dpi=dpi) # NOTE: wrap=False because we did it ourselves plt.text(0.01, 0.95, wrapped_text, fontsize=fontsize, verticalalignment='top', horizontalalignment='left', wrap=False) plt.axis('off') buf = io.BytesIO() plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0.1, transparent=True) plt.close(fig) buf.seek(0) img = Image.open(buf).convert("RGBA") # Create background final_img = Image.new("RGBA", img.size, bg_color) final_img.alpha_composite(img) return final_img import os import tempfile import subprocess import PIL.ImageOps def render_real_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=None, fontsize=19): dpi = 100 width_in = width_px / dpi line_spacing = int(fontsize * 1.2) # Use the 'standalone' class with 'varwidth' to auto-crop height while restricting width latex_template = f"""\\documentclass[varwidth={width_in}in,margin=0.2cm]{{standalone}} \\usepackage[utf8]{{inputenc}} \\usepackage[T1]{{fontenc}} \\usepackage{{lmodern}} % Enables arbitrary font scaling \\usepackage{{amsmath, amssymb}} \\usepackage{{mathabx}} % larger inline operators. \\usepackage{{commands}} %\\usepackage{{anyfontsize}} % replaced by lmodern \\begin{{document}} \\fontsize{{{fontsize}}}{{{line_spacing}}}\\selectfont {text} \\end{{document}} """ with tempfile.TemporaryDirectory() as temp_dir: tex_path = os.path.join(temp_dir, 'text.tex') pdf_path = os.path.join(temp_dir, 'text.pdf') with open(tex_path, 'w', encoding='utf-8') as f: f.write(latex_template) # Compile to PDF result = subprocess.run( ['pdflatex', '-interaction=nonstopmode', 'text.tex'], cwd=temp_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) if not os.path.exists(pdf_path): raise RuntimeError("LaTeX compilation failed. Check your LaTeX syntax.") # Convert PDF to grayscale (ignoring pdf2image's broken transparency) images = convert_from_path(pdf_path, dpi=dpi) gray_img = images[0].convert("L") # 1. Invert grayscale to create an alpha mask (white bg = 0, black text = 255) alpha_mask = PIL.ImageOps.invert(gray_img) # 2. Create a transparent image with black text using the mask text_img = Image.new("RGBA", gray_img.size, (0, 0, 0, 255)) text_img.putalpha(alpha_mask) # 3. Create the requested background and composite the text over it final_img = Image.new("RGBA", text_img.size, bg_color) final_img.alpha_composite(text_img) # (Optional) Truncate image height if max_lines is strictly enforced if max_lines: max_height_px = int((fontsize * 1.2 / 72.0) * dpi * max_lines) # Points to pixels if final_img.height > max_height_px: final_img = final_img.crop((0, 0, final_img.width, max_height_px)) return final_img import io from PIL import Image import matplotlib.pyplot as plt import matplotlib.colors as mcolors from highlight_text import ax_text def color(score): t = max(0.0, min(1.0, float(score) / 4.0)) t = t*1.5 - 0.25 t = max(0.0, min(1.0, t)) red = 200 * (1 - t) green = 150 * t return mcolors.to_hex((red/255, green/255, 0)) from PIL import Image, ImageDraw, ImageFont def render_score_text(label, score, error, width_px, fontsize=30, bg_color=(255, 255, 255, 255), with_error=True, id=None): # 1. Build text segments: (text, color, is_bold) parts = [] default_color = (0, 0, 0, 255) prefix = f"{id} " if id else "" prefix += f"{label} Note : " parts.append((prefix, default_color, False)) parts.append((str(score), color(score), True)) if error and error != "null" and with_error: fontsize=18 parts.append((" ", default_color, False)) parts.append((str(error), "orange", True)) # 2. Setup Image height_px = 80 # roughly matches fig_height=0.8 at 100 dpi img = Image.new("RGBA", (int(width_px), height_px), bg_color) draw = ImageDraw.Draw(img) # 3. Load Fonts try: font_regular = ImageFont.truetype("DejaVuSans.ttf", fontsize) font_bold = ImageFont.truetype("DejaVuSans-Bold.ttf", fontsize) except IOError: # Fallback for systems without specific TTFs readily available print("here") try: font_regular = ImageFont.load_default(size=fontsize) # Pillow >= 10.1.0 except TypeError: print("there") font_regular = ImageFont.load_default() font_bold = font_regular # 4. Draw segments horizontally x, y = int(width_px * 0.125), int(height_px * 0.2) for text, text_color, is_bold in parts: f = font_bold if is_bold else font_regular draw.text((x, y), text, fill=text_color, font=f) # Advance X position by the width of the drawn text bbox = draw.textbbox((x, y), text, font=f) x = bbox[2] return img A4_WIDTH_200DPI = 1654 TARGET_MIN_WIDTH = int(A4_WIDTH_200DPI * 0.9) # 1406 pixels def compose_label_image(base_img, label, result, hmin, render_fn=render_real_latex_text, draw_callback=None, with_error=True, with_empty=False, more_right=False, with_id=None): """ Composes the final image with annotations. Args: base_img: The source PDF converted to image. label: Label name (e.g. "Ex1"). result: The JSON result object (score, feedbacks). hmin: Vertical offset coordinate. render_fn: Function to render text to image (allows threading injection). draw_callback: Optional function(type, draw_obj, position_dict, data_dict) called when elements are placed. Used for checkboxes. """ left_pad = 0 if base_img.width < TARGET_MIN_WIDTH: total_missing = TARGET_MIN_WIDTH - base_img.width left_pad = min(total_missing, MARGIN_LEFT) right_pad = total_missing - left_pad new_base = Image.new("RGB", (TARGET_MIN_WIDTH, base_img.height), "white") new_base.paste(base_img, (left_pad, 0)) base_img = new_base score = result.get('score', 0) error = result.get('error', "") feedbacks = result.get('feedback', []) if error == "empty-answer" and not with_empty: return None, 0 # Filter deleted items (used by reading_annotations.py) feedbacks = [f for f in feedbacks if "to_delete" not in f] global_fb = [f for f in feedbacks if not f.get('box_2d')] local_fb = [f for f in feedbacks if f.get('box_2d')] local_fb.sort(key=lambda x: x['box_2d'][0]) # 1. Prepare Headers header_elements = [] if more_right: width = base_img.width // 2 else: width = base_img.width // 2 - 150 img_score = render_score_text(label, score, error, width, with_error=with_error, id=with_id) header_elements.append({"type": "score", "img": img_score, "data": result}) # Global Feedbacks for idx, fb in enumerate(global_fb): img_fb = render_fn(fb['text'], base_img.width) header_elements.append({"type": "global_fb", "img": img_fb, "data": fb, "index": idx}) # Calculate Header Height header_height = sum(el["img"].height for el in header_elements) total_height = base_img.height + header_height # Create Canvas final_img = Image.new("RGB", (base_img.width + MARGIN_LEFT, total_height), "white") # Draw Headers current_y = 0 draw = ImageDraw.Draw(final_img, "RGBA") for el in header_elements: if el["type"] == "score" and more_right: final_img.paste(el["img"], (150, current_y)) else: final_img.paste(el["img"], (0, current_y)) if draw_callback: # Hook for checkboxes draw_callback("header_item", draw, {"x": 0, "y": current_y, "w": el["img"].width, "h": el["img"].height}, el) current_y += el["img"].height # Paste Base Image image_offset_y = current_y final_img.paste(base_img, (MARGIN_LEFT, image_offset_y)) # 2. Draw Local Annotations draw = ImageDraw.Draw(final_img, "RGBA") # Refresh draw object last_text_bottom = 0 for idx, fb in enumerate(local_fb): box = fb.get('box_2d') ymin, xmin, ymax, xmax = box target_ymin = (ymin - hmin) + image_offset_y target_ymax = (ymax - hmin) + image_offset_y target_xmin = xmin + MARGIN_LEFT + left_pad target_xmax = xmax + MARGIN_LEFT + left_pad # Draw Rectangle (if not suppressed) if "norectangle" not in fb: draw.rectangle([target_xmin, target_ymin, target_xmax, target_ymax], outline="red", width=3) if draw_callback: draw_callback("local_rect", draw, {"box": [target_xmin, target_ymin, target_xmax, target_ymax]}, {"data": fb, "index": idx}) # Render Text txt_img = render_fn(fb['text'], width_px=ANNOT_WIDTH, bg_color=(255, 200, 200, 180), max_lines=None) # Calculate Position center_y = (target_ymin + target_ymax) / 2 paste_y = center_y - (txt_img.height / 2) paste_y = max(paste_y, image_offset_y) if paste_y < last_text_bottom: paste_y = last_text_bottom + 5 # Resize canvas if needed required_height = int(paste_y + txt_img.height + 20) if required_height > final_img.height: new_final = Image.new("RGB", (final_img.width, required_height), "white") new_final.paste(final_img, (0, 0)) final_img = new_final draw = ImageDraw.Draw(final_img, "RGBA") # Paste Text final_img.paste(txt_img, (10, int(paste_y)), mask=txt_img) if draw_callback: draw_callback("local_text", draw, {"x": 10, "y": int(paste_y), "w": txt_img.width, "h": txt_img.height}, {"data": fb, "index": idx}) last_text_bottom = paste_y + txt_img.height return final_img, header_height from utils import natural_key import concurrent.futures def process_student(student_id, labels_data, root_dir, all_labels, overwrite): """Helper function to process a single student.""" # Prepare output directory: Dir/Anot_CopieID output_dir = os.path.join(root_dir, "Anot", f"Copie{student_id}") # Check if already processed (Concat.jpg exists) concat_path = os.path.join(output_dir, "Concat.jpg") if os.path.exists(concat_path) and not overwrite: print(f"Skipping Copie {student_id} (Concat.jpg exists)") return print("Processing :", student_id) # Clean folder if re-processing if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) d_notes = dict.fromkeys(all_labels, "") label_images = [] # !! Trier par l'ordre des labels plutôt sorted_labels = sorted(list(labels_data.items()), key=natural_key) for label, content in sorted_labels: # 1. Find PDF path copie_folder = f"Copie{student_id}" pdf_full_path = content.get('pdf_path') if not pdf_full_path or not os.path.exists(pdf_full_path): print(f"File not found: {pdf_full_path}") continue # 2. Convert PDF to Image try: (base_img, _, _) = make_base_image(pdf_full_path) except Exception as e: print(f"Error converting {pdf_full_path}: {e}") continue result = content.get('result', {}) coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax) score = result.get('score', 0) d_notes[label] = str(score) final_img, _ = compose_label_image(base_img, label, result, coordinates[0], with_empty=True, render_fn=render_real_latex_text) # 7. Save Image save_path = os.path.join(output_dir, f"{label}.jpg") final_img.save(save_path) if result.get('error', "") != "empty-answer": label_images.append(final_img) # Save scores with open(os.path.join(output_dir, "score.json"), "w") as f: json.dump(d_notes, f, indent=4) # Concatenate if label_images: max_w = max(i.width for i in label_images) total_h = sum(i.height for i in label_images) canvas = Image.new('RGB', (max_w, total_h)) cy = 0 for img in label_images: canvas.paste(img, (0, cy)) cy += img.height canvas.save(concat_path) def process_correction(root_dir, data, all_labels, overwrite=False): # Ne pas thread cette application # 1. Il faut protéger les appels à matplotlib # 2. tu vas perdre les erreurs for student_id, labels in sorted(data.items()): process_student(student_id, labels, root_dir, all_labels, overwrite) import argparse import utils if __name__ == "__main__": parser = argparse.ArgumentParser(description="Annotate copies") parser.add_argument("root_dir", help="Directory containing the copies") parser.add_argument("--overwrite", action="store_true", help="Reprocess even if Concat.jpg exists") args = parser.parse_args() root_dir = args.root_dir labels = utils.read_all_labels(root_dir) results = make_dictionary(root_dir) # Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates} # Coordinates are the real coordinates (hmin, hmax) of the image in the Group # print(results,"\n\n\n") process_correction(root_dir, results, labels,overwrite=args.overwrite)