commit 6a0c1a39589654fd70f4d735558299f896cc052a Author: Sébastien Miquel Date: Sun Jan 18 14:46:54 2026 +0100 Initial commit diff --git a/annotating.py b/annotating.py new file mode 100644 index 0000000..2e88cb9 --- /dev/null +++ b/annotating.py @@ -0,0 +1,450 @@ +import sys +import os +import json +import glob +from PIL import Image + + +# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates} +# Coordinates are the real coordinates (hmin, hmax) of the image in the Group +# The gemini_result coordinates should be un-normalized ! +def make_dictionary(root_dir): + correction_path = os.path.join(root_dir, "correction.json") + + # Load correction data + try: + with open(correction_path, 'r', encoding='utf-8') as f: + corrections = json.load(f) + except FileNotFoundError: + print(f"Error: {correction_path} not found.") + sys.exit(1) + + # Dictionary: keys are IDs + result_data = {} + + # Iterate through labels and items in correction.json + for label, items in corrections.items(): + items = sum(items, []) # Flatten + for item in items: + # print(item) + student_id = item['id'] + result_obj = item['result'] + + # Find coordinates + coordinates = None + height,width= None, None + label_dir = os.path.join(root_dir, label) + + # Search all json files in Dir/label + json_files = glob.glob(os.path.join(label_dir, "*.json")) + for jf in json_files: + try: + with open(jf, 'r', encoding='utf-8') as f: + coord_list = json.load(f) + # Format: [["id", x, y], ...] + for entry in coord_list: + if entry[0] == student_id: + coordinates = (entry[1], entry[2]) + img_path = os.path.splitext(jf)[0] + ".jpg" + with Image.open(img_path) as img: + width, height = img.size + break + except json.JSONDecodeError: + continue + if coordinates: + break + + # Construct PDF path: Dir/Copie{id}/{label}.pdf + pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf") + + # Initialize dictionary structure for this ID if missing + if student_id not in result_data: + result_data[student_id] = {} + + fb = result_obj.get("feedback", []) + for i in range(len(fb)): + el = fb[i] + if "box_2d" in el and el["box_2d"]: + el["box_2d"][0] = (el["box_2d"][0] * height)//1000 + el["box_2d"][2] = (el["box_2d"][2] * height)//1000 + el["box_2d"][1] = (el["box_2d"][1] * width)//1000 + el["box_2d"][3] = (el["box_2d"][3] * width)//1000 + + # Populate the object + result_data[student_id][label] = { + "pdf_path": pdf_path, + "result": result_obj, + "coordinates": coordinates + } + + return result_data +# output the resulting dictionary +# print(json.dumps(result_data, indent=2, ensure_ascii=False)) + +import io +import shutil +from pdf2image import convert_from_path +from PIL import Image, ImageDraw, ImageFont +import matplotlib.pyplot as plt + +# plt.rcParams.update({ "text.usetex": True, + # "text.latex.preamble": r"\usepackage{bbold}"}) + +import re +import textwrap + +def normalize_mathtext(text): + """ + Replaces LaTeX shortcuts not supported by Matplotlib's mathtext parser. + e.g. \\le -> \\leq, \\ge -> \\geq + Using lookahead (?![a-zA-Z]) prevents replacing \\left with \\leqft. + """ + text = re.sub(r'\\le(?![a-zA-Z])', r'\\leq', text) + text = re.sub(r'\\ge(?![a-zA-Z])', r'\\geq', text) + text = re.sub(r'\\implies', r'\\Rightarrow', text) + # Sometimes, Gemini escapes too much ? Not sure + text = text.replace("\\\\", "\\") + text = text.replace("\\llbracket", "[\\![") + text = text.replace("\\rrbracket", "]\\!]") + # Sometimes, Gemini doesn't escape enough. In the json, you should have \\f + text = text.replace('\f', r'\f') + text = re.sub('\u0010', "", text) + return text + +import re +def wrap_latex_text(text, width_chars): + """ + Wraps text but keeps LaTeX math blocks ($...$) intact. + """ + # 1. Split text into chunks of: text, math, text, math... + # The regex looks for $...$ (non-greedy). + parts = re.split(r'(\$[^\$]+\$)', text) + + # 2. Tokenize: Break plain text by spaces, keep math blocks whole. + tokens = [] + for part in parts: + if part.startswith('$') and part.endswith('$'): + tokens.append(part) # Keep math block distinct + else: + tokens.extend(part.split()) # Split normal text by whitespace + + # 3. Reconstruct lines using textwrap logic + lines = [] + current_line = [] + current_length = 0 + + for token in tokens: + # +1 for the space we will add + token_len = len(token) + + if current_length + token_len + 1 > width_chars: + lines.append(" ".join(current_line)) + current_line = [token] + current_length = token_len + else: + current_line.append(token) + current_length += token_len + 1 + + if current_line: + lines.append(" ".join(current_line)) + + res = "\n".join(lines) + return res + +def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=None, + fontsize=14): + # 1. Fix unsupported symbols + text = normalize_mathtext(text) + + dpi = 100 + fig_width = width_px / dpi + + # Estimate characters per line based on width and font size (heuristic) + # FontSize 12 approx 0.5 inches wide for ~15 chars usually, + # but let's approximate: Width (inches) * ~10 chars/inch for size 12 + chars_per_line = int(fig_width * 10) + + # Pre-wrap the text respecting LaTeX boundaries + wrapped_text = wrap_latex_text(text, chars_per_line) + + # Dynamic height based on actual number of lines + num_lines = wrapped_text.count('\n') + 1 + if max_lines and num_lines > max_lines: + # logic to truncate if strictly necessary, or just expand + pass + + # 0.3 inches per line buffer + fig_height = num_lines * 0.3 + 0.2 + + fig = plt.figure(figsize=(fig_width, fig_height), dpi=dpi) + + # print(wrapped_text) + # print("\n\n") + # NOTE: wrap=False because we did it ourselves + plt.text(0.01, 0.95, wrapped_text, fontsize=fontsize, + verticalalignment='top', horizontalalignment='left', + wrap=False) + + plt.axis('off') + + buf = io.BytesIO() + plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0.1, transparent=True) + plt.close(fig) + buf.seek(0) + + img = Image.open(buf).convert("RGBA") + + # Create background + final_img = Image.new("RGBA", img.size, bg_color) + final_img.alpha_composite(img) + return final_img + + +def process_correction(root_dir, data, all_labels): + margin_left = 200 + + for student_id, labels in data.items(): + # Prepare output directory: Dir/Anot_CopieID + output_dir = os.path.join(root_dir, f"Anot_Copie{student_id}") + + # Check if already processed (Concat.jpg exists) + concat_path = os.path.join(output_dir, "Concat.jpg") + if os.path.exists(concat_path): + print(f"Skipping Copie {student_id} (Concat.jpg exists)") + continue + + print("Processing :", student_id) + + # Clean folder if re-processing + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + d_notes = dict.fromkeys(all_labels,"") + + for label, content in labels.items(): + # 1. Find PDF path + copie_folder = f"Copie{student_id}" + pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf") + pdf_full_path = os.path.join(root_dir, pdf_rel_path) + + if not os.path.exists(pdf_full_path): + print(f"File not found: {pdf_full_path}") + continue + + # 2. Convert PDF to Image + try: + pages = convert_from_path(pdf_full_path) + + # Calculate total dimensions + total_h = sum(page.height for page in pages) + max_w = max(page.width for page in pages) + + # Create concatenated base image + base_img = Image.new("RGBA", (max_w, total_h), "white") + + current_y = 0 + for page in pages: + base_img.paste(page.convert("RGBA"), (0, current_y)) + current_y += page.height + except Exception as e: + print(f"Error converting {pdf_full_path}: {e}") + continue + + coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax) + hmin = coordinates[0] + result = content.get('result', {}) + score = result.get('score', 0) + error = result.get('error', "") + feedbacks = result.get('feedback', []) + + # Organize feedbacks + global_fb = [f for f in feedbacks if not f.get('box_2d')] + local_fb = [f for f in feedbacks if f.get('box_2d')] + # Sort local feedback by Y position + local_fb.sort(key=lambda x: x['box_2d'][0]) + + # --- PREPARE HEADERS --- + header_elements = [] + score_text = f"{label} ; Note : {score}" + d_notes[label] = str(score) + if error and error != "null": + score_text += f" | Error: {error}" + + # Render Row 1 + row1_img = render_latex_text(score_text, base_img.width,fontsize=18) + header_elements.append(row1_img) + + # --- OTHER HEADERS + # Render Global Feedbacks (Rows 2+) + for fb in global_fb: + fb_img = render_latex_text(fb['text'], base_img.width) + header_elements.append(fb_img) + + # Calculate total new height + header_height = sum(img.height for img in header_elements) + total_height = base_img.height + header_height + + # Create Canvas + final_img = Image.new("RGB", (base_img.width + margin_left, total_height), "white") + + # Paste Headers + current_y = 0 + for elem in header_elements: + final_img.paste(elem, (0, current_y)) + current_y += elem.height + + # Paste Original Image + # Note: current_y is now the offset for the actual image content + image_offset_y = current_y + final_img.paste(base_img, (margin_left, image_offset_y)) + + # --- DRAW LOCAL ANNOTATIONS --- + draw = ImageDraw.Draw(final_img, "RGBA") + + last_text_bottom = 0 + + for fb in local_fb: + # raw_pos = fb.get('pos') + box = fb.get('box_2d') + if not box or len(box) < 4: + continue + + ymin, xmin, ymax, xmax = box[0], box[1], box[2], box[3] + + target_ymin = (ymin - hmin) + image_offset_y + target_ymax = (ymax - hmin) + image_offset_y + target_xmin = xmin + margin_left + target_xmax = xmax + margin_left + + # Draw Rectangle + draw.rectangle([target_xmin, target_ymin, target_xmax, target_ymax], outline="red", width=3) + + # Render Text with transparent red background + # (255, 0, 0, 50) is transparent red + txt_img = render_latex_text( + fb['text'], + width_px=500, + bg_color=(255, 200, 200, 180), # Light Red semi-transparent + max_lines=3 + ) + + # Calculate placement + txt_h = txt_img.height + center_y = (target_ymin + target_ymax) / 2 + paste_y = center_y - (txt_h / 2) + + paste_y = max(paste_y, image_offset_y) + + # Prevent overlap with previous text + if paste_y < last_text_bottom: + paste_y = last_text_bottom + 5 # Move down + padding + + # Check for overflow and resize if necessary + required_height = int(paste_y + txt_h + 20) # +20 for bottom padding + if required_height > final_img.height: + # Create a new taller image + new_final = Image.new("RGB", (final_img.width, required_height), "white") + # Paste the current image content onto the new one + new_final.paste(final_img, (0, 0)) + final_img = new_final + # Re-initialize the draw object for the new image so subsequent rectangles are drawn correctly + draw = ImageDraw.Draw(final_img, "RGBA") + + + # Paste in the left margin + final_img.paste(txt_img, (10, int(paste_y)), mask=txt_img) + last_text_bottom = paste_y + txt_h + + # 7. Save Image + save_path = os.path.join(output_dir, f"{label}.jpg") + final_img.save(save_path) + + json_path = os.path.join(output_dir, "score.json") + with open(json_path, "w") as f: + json.dump(d_notes, f, indent=4) + concat_display_image(output_dir) + + + +from pathlib import Path +import subprocess + +def concat_display_image(subdir): + subdir = Path(subdir) + # Find valid images, excluding previous concatenations + images = sorted([ + f for f in subdir.glob("*.jpg") + if f.name != "Concat.jpg" + ]) + + if not images: + return + + # Load images + opened_imgs = [Image.open(img) for img in images] + + # Calculate dimensions (max width, sum of heights) + max_w = max(i.width for i in opened_imgs) + total_h = sum(i.height for i in opened_imgs) + + # Create canvas and paste vertically + canvas = Image.new('RGB', (max_w, total_h)) + current_y = 0 + for img in opened_imgs: + canvas.paste(img, (0, current_y)) + current_y += img.height + + # Save + save_path = subdir / "Concat.jpg" + canvas.save(save_path) + print(f"Saved: {save_path}") + # subprocess.call(('xdg-open', save_path)) + +def concat_anot_images(directory): + root = Path(directory) + + for subdir in root.iterdir(): + if subdir.is_dir() and subdir.name.startswith("Anot"): + # Find valid images, excluding previous concatenations + images = sorted([ + f for f in subdir.glob("*.jpg") + if f.name != "Concat.jpg" + ]) + + if not images: + continue + + # Load images + opened_imgs = [Image.open(img) for img in images] + + # Calculate dimensions (max width, sum of heights) + max_w = max(i.width for i in opened_imgs) + total_h = sum(i.height for i in opened_imgs) + + # Create canvas and paste vertically + canvas = Image.new('RGB', (max_w, total_h)) + current_y = 0 + for img in opened_imgs: + canvas.paste(img, (0, current_y)) + current_y += img.height + + # Save + save_path = subdir / "Concat.jpg" + canvas.save(save_path) + print(f"Saved: {save_path}") + subprocess.call(('xdg-open', save_path)) + + +if len(sys.argv) < 2: + print("Usage: python script.py ") + sys.exit(1) + +root_dir = sys.argv[1] +labels = list(filter(None, (Path(root_dir) / "labels").read_text().splitlines())) +results = make_dictionary(root_dir) +# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates} +# Coordinates are the real coordinates (hmin, hmax) of the image in the Group +# print(results,"\n\n\n") +process_correction(root_dir, results, labels) +# concat_anot_images(root_dir) diff --git a/correction.py b/correction.py new file mode 100644 index 0000000..97e11fe --- /dev/null +++ b/correction.py @@ -0,0 +1,291 @@ +import sys +import os +import time +from pathlib import Path +import argparse + +if len(sys.argv) < 2: + sys.exit("Usage: python script.py InterroTest/Ex 2/Group_1.jpg OR ") + +arg_path = Path(sys.argv[1]) +tasks = [] # List of tuples: (filepath_str, label_str) +results = {} + +# Parse Arguments +parser = argparse.ArgumentParser() +parser.add_argument("--overwrite", action="store_true", help="Force redo requests even if output exists") +# parse_known_args is used to avoid conflicts if run inside an environment passing other flags +args, _ = parser.parse_known_args() + + +if arg_path.suffix == ".jpg": + # Preserve original behaviour + INPUT_DIR = str(arg_path.parents[1]) + FULL_LABEL = arg_path.parent.name + tasks.append((str(arg_path), FULL_LABEL)) + results[FULL_LABEL] = [] +else: + # Directory behaviour + INPUT_DIR = str(arg_path) + if not arg_path.exists(): + sys.exit(f"Directory {INPUT_DIR} not found.") + + for sub in arg_path.iterdir(): + if sub.is_dir() and sub.name.startswith("Ex"): + label = sub.name + results[label] = [] + for img in sub.glob("*.jpg"): + tasks.append((str(img), label)) + +my_prompt = """I'm giving you an image of several written answers to an exam. + +Each answer is separated by a black horizontal line, and underneath, +to the left, is indicated the ID of the answer, from `01` to `50`. + +I want you to score each answer, from 0 to 4, you may score half +points, such as 2.5. Even if a result is wrong, if the reasoning is +correct and could lead to a right answer, you should give at least +half the points. + +You also need to give feedback to the student, in french : + - which part of his answer is wrong, + - why is it wrong + - possibly, what he should have done instead. +Your feedback may contain LaTeX fragments written like `$a^2 + b^2 = c^2$`. + +If your score is note 4, you should always provide some feedback +explaining what's missing. + +For each piece of feedback, if it is related to a specific part of the +answer that is wrong, you may provide a `box_2d`, to locate this +specific part of the answer. This `box_2d` should be in the form +[ymin, xmin, ymax, xmax] normalized to 0-1000. If you do not provide +one, set `box_2d` to `null`. + +If the answer is correct, there is no need to provide feedback. + +For example, if the student says a function is continuous when it +isn't, provide the coordinates where the word «continuous» is. If a +calculation went wrong, gives the coordinates of the step where it +goes wrong, and as feedback, what went wrong. + +You should also give me a measure of confidence, from 0 to 1 that you +were able to correctly understand the answer. A score below 0.5 means +that you think it is likely that you couldn't understand an important +part. + +In some case, you may find that either + - The student didn't answer the right question. Set the score to 0. + Since it could be a labeling error, indicate is by setting `error` + to \"wrong-label\". + - You can find an answer to another question of the exercice (taking + more than a couple of lines). Score the question you are supposed + to score, but set `error` to \"additional-answer\". +If there's no error, set `error` to `\"\"`. + +You will answer using json describing a list of dictionary with a key +\"id\", and a key \"result\" that contains the \"score\", the \"confidence\", a +list \"feedback\", and possibly an \"error\". Like this example : + +[{ \"id\": \"01\", + \"result\": {\"score\" : 2.5, + \"confidence\" : 0.8, + \"feedback\": [{text: \"Un retour générique. Il faut apprendre le cours.\", box_2d: null}, + {text: \"Non, la fonction n'est pas forcément continue\", pos: [145, 280, 340, 500]}], + \"error\": \"\"} + }, + { \"id\": \"04\", + \"result\": {\"score\" : 4., + \"confidence\" : 0.9, + \"feedback\" : [] + \"error\": \"\" } + } +] + +Here is the text of the exercice of the exam : + +``` +<> +``` + +Here is a possible correct answer : + +``` +<> +``` + +Here is some additional scoring instructions : + +``` +<> +``` + +You are asked to score the question or exercice labeled `<