import sys import os import json import glob from PIL import Image MARGIN_LEFT = 200 # Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates} # Coordinates are the real coordinates (hmin, hmax) of the image in the Group # The gemini_result coordinates should be un-normalized ! def make_dictionary(root_dir): correction_path = os.path.join(root_dir, "correction.json") # Load correction data try: with open(correction_path, 'r', encoding='utf-8') as f: corrections = json.load(f) except FileNotFoundError: print(f"Error: {correction_path} not found.") sys.exit(1) # Dictionary: keys are IDs result_data = {} # Iterate through labels and items in correction.json for label, items in corrections.items(): items = sum(items, []) # Flatten for item in items: # print(item) student_id = item['id'] result_obj = item['result'] # Find coordinates coordinates = None height,width= None, None label_dir = os.path.join(root_dir, label) # Search all json files in Dir/label json_files = glob.glob(os.path.join(label_dir, "*.json")) for jf in json_files: try: with open(jf, 'r', encoding='utf-8') as f: coord_list = json.load(f) # Format: [["id", x, y], ...] for entry in coord_list: if entry[0] == student_id: coordinates = (entry[1], entry[2]) img_path = os.path.splitext(jf)[0] + ".jpg" with Image.open(img_path) as img: width, height = img.size break except json.JSONDecodeError: continue if coordinates: break # Construct PDF path: Dir/Copie{id}/{label}.pdf pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf") # Initialize dictionary structure for this ID if missing if student_id not in result_data: result_data[student_id] = {} fb = result_obj.get("feedback", []) for i in range(len(fb)): el = fb[i] if "box_2d" in el and el["box_2d"]: el["box_2d"][0] = (el["box_2d"][0] * height)//1000 el["box_2d"][2] = (el["box_2d"][2] * height)//1000 el["box_2d"][1] = (el["box_2d"][1] * width)//1000 el["box_2d"][3] = (el["box_2d"][3] * width)//1000 # Populate the object result_data[student_id][label] = { "pdf_path": pdf_path, "result": result_obj, "coordinates": coordinates } return result_data # output the resulting dictionary # print(json.dumps(result_data, indent=2, ensure_ascii=False)) import io import shutil from pdf2image import convert_from_path from PIL import Image, ImageDraw, ImageFont import matplotlib.pyplot as plt # plt.rcParams.update({ "text.usetex": True, # "text.latex.preamble": r"\usepackage{bbold}"}) import re import textwrap def normalize_mathtext(text): """ Replaces LaTeX shortcuts not supported by Matplotlib's mathtext parser. e.g. \\le -> \\leq, \\ge -> \\geq Using lookahead (?![a-zA-Z]) prevents replacing \\left with \\leqft. """ text = re.sub(r'\\le(?![a-zA-Z])', r'\\leq', text) text = re.sub(r'\\ge(?![a-zA-Z])', r'\\geq', text) text = re.sub(r'\\implies', r'\\Rightarrow', text) # Sometimes, Gemini escapes too much ? Not sure text = text.replace("\\\\", "\\") text = text.replace("\\llbracket", "[\\![") text = text.replace("\\rrbracket", "]\\!]") # Sometimes, Gemini doesn't escape enough. In the json, you should have \\f text = text.replace('\f', r'\f') text = re.sub('\u0010', "", text) return text import re def wrap_latex_text(text, width_chars): """ Wraps text but keeps LaTeX math blocks ($...$) intact. """ # 1. Split text into chunks of: text, math, text, math... # The regex looks for $...$ (non-greedy). parts = re.split(r'(\$[^\$]+\$)', text) # 2. Tokenize: Break plain text by spaces, keep math blocks whole. tokens = [] for part in parts: if part.startswith('$') and part.endswith('$'): tokens.append(part) # Keep math block distinct else: tokens.extend(part.split()) # Split normal text by whitespace # 3. Reconstruct lines using textwrap logic lines = [] current_line = [] current_length = 0 for token in tokens: # +1 for the space we will add token_len = len(token) if current_length + token_len + 1 > width_chars: lines.append(" ".join(current_line)) current_line = [token] current_length = token_len else: current_line.append(token) current_length += token_len + 1 if current_line: lines.append(" ".join(current_line)) res = "\n".join(lines) return res def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=None, fontsize=14): # 1. Fix unsupported symbols text = normalize_mathtext(text) dpi = 100 fig_width = width_px / dpi # Estimate characters per line based on width and font size (heuristic) # FontSize 12 approx 0.5 inches wide for ~15 chars usually, # but let's approximate: Width (inches) * ~10 chars/inch for size 12 chars_per_line = int(fig_width * 10) # Pre-wrap the text respecting LaTeX boundaries wrapped_text = wrap_latex_text(text, chars_per_line) # Dynamic height based on actual number of lines num_lines = wrapped_text.count('\n') + 1 if max_lines and num_lines > max_lines: # logic to truncate if strictly necessary, or just expand pass # 0.3 inches per line buffer fig_height = num_lines * 0.3 + 0.2 fig = plt.figure(figsize=(fig_width, fig_height), dpi=dpi) # print(wrapped_text) # print("\n\n") # NOTE: wrap=False because we did it ourselves plt.text(0.01, 0.95, wrapped_text, fontsize=fontsize, verticalalignment='top', horizontalalignment='left', wrap=False) plt.axis('off') buf = io.BytesIO() plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0.1, transparent=True) plt.close(fig) buf.seek(0) img = Image.open(buf).convert("RGBA") # Create background final_img = Image.new("RGBA", img.size, bg_color) final_img.alpha_composite(img) return final_img def process_correction(root_dir, data, all_labels): for student_id, labels in data.items(): # Prepare output directory: Dir/Anot_CopieID output_dir = os.path.join(root_dir, f"Anot_Copie{student_id}") # Check if already processed (Concat.jpg exists) concat_path = os.path.join(output_dir, "Concat.jpg") if os.path.exists(concat_path): print(f"Skipping Copie {student_id} (Concat.jpg exists)") continue print("Processing :", student_id) # Clean folder if re-processing if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) d_notes = dict.fromkeys(all_labels,"") for label, content in labels.items(): # 1. Find PDF path copie_folder = f"Copie{student_id}" pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf") pdf_full_path = os.path.join(root_dir, pdf_rel_path) if not os.path.exists(pdf_full_path): print(f"File not found: {pdf_full_path}") continue # 2. Convert PDF to Image try: pages = convert_from_path(pdf_full_path) # Calculate total dimensions total_h = sum(page.height for page in pages) max_w = max(page.width for page in pages) # Create concatenated base image base_img = Image.new("RGBA", (max_w, total_h), "white") current_y = 0 for page in pages: base_img.paste(page.convert("RGBA"), (0, current_y)) current_y += page.height except Exception as e: print(f"Error converting {pdf_full_path}: {e}") continue coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax) hmin = coordinates[0] result = content.get('result', {}) score = result.get('score', 0) error = result.get('error', "") feedbacks = result.get('feedback', []) # Organize feedbacks global_fb = [f for f in feedbacks if not f.get('box_2d')] local_fb = [f for f in feedbacks if f.get('box_2d')] # Sort local feedback by Y position local_fb.sort(key=lambda x: x['box_2d'][0]) # --- PREPARE HEADERS --- header_elements = [] score_text = f"{label} ; Note : {score}" d_notes[label] = str(score) if error and error != "null": score_text += f" | Error: {error}" # Render Row 1 row1_img = render_latex_text(score_text, base_img.width,fontsize=18) header_elements.append(row1_img) # --- OTHER HEADERS # Render Global Feedbacks (Rows 2+) for fb in global_fb: fb_img = render_latex_text(fb['text'], base_img.width) header_elements.append(fb_img) # Calculate total new height header_height = sum(img.height for img in header_elements) total_height = base_img.height + header_height # Create Canvas final_img = Image.new("RGB", (base_img.width + MARGIN_LEFT, total_height), "white") # Paste Headers current_y = 0 for elem in header_elements: final_img.paste(elem, (0, current_y)) current_y += elem.height # Paste Original Image # Note: current_y is now the offset for the actual image content image_offset_y = current_y final_img.paste(base_img, (MARGIN_LEFT, image_offset_y)) # --- DRAW LOCAL ANNOTATIONS --- draw = ImageDraw.Draw(final_img, "RGBA") last_text_bottom = 0 for fb in local_fb: # raw_pos = fb.get('pos') box = fb.get('box_2d') if not box or len(box) < 4: continue ymin, xmin, ymax, xmax = box[0], box[1], box[2], box[3] target_ymin = (ymin - hmin) + image_offset_y target_ymax = (ymax - hmin) + image_offset_y target_xmin = xmin + MARGIN_LEFT target_xmax = xmax + MARGIN_LEFT # Draw Rectangle draw.rectangle([target_xmin, target_ymin, target_xmax, target_ymax], outline="red", width=3) # Render Text with transparent red background # (255, 0, 0, 50) is transparent red txt_img = render_latex_text( fb['text'], width_px=600, bg_color=(255, 200, 200, 180), # Light Red semi-transparent max_lines=3 ) # Calculate placement txt_h = txt_img.height center_y = (target_ymin + target_ymax) / 2 paste_y = center_y - (txt_h / 2) paste_y = max(paste_y, image_offset_y) # Prevent overlap with previous text if paste_y < last_text_bottom: paste_y = last_text_bottom + 5 # Move down + padding # Check for overflow and resize if necessary required_height = int(paste_y + txt_h + 20) # +20 for bottom padding if required_height > final_img.height: # Create a new taller image new_final = Image.new("RGB", (final_img.width, required_height), "white") # Paste the current image content onto the new one new_final.paste(final_img, (0, 0)) final_img = new_final # Re-initialize the draw object for the new image so subsequent rectangles are drawn correctly draw = ImageDraw.Draw(final_img, "RGBA") # Paste in the left margin final_img.paste(txt_img, (10, int(paste_y)), mask=txt_img) last_text_bottom = paste_y + txt_h # 7. Save Image save_path = os.path.join(output_dir, f"{label}.jpg") final_img.save(save_path) json_path = os.path.join(output_dir, "score.json") with open(json_path, "w") as f: json.dump(d_notes, f, indent=4) concat_display_image(output_dir) from pathlib import Path import subprocess def concat_display_image(subdir): subdir = Path(subdir) # Find valid images, excluding previous concatenations images = sorted([ f for f in subdir.glob("*.jpg") if f.name != "Concat.jpg" ]) if not images: return images.sort(key=lambda f: [int(n) for n in re.findall(r'\d+', str(f))]) # Load images opened_imgs = [Image.open(img) for img in images] # Calculate dimensions (max width, sum of heights) max_w = max(i.width for i in opened_imgs) total_h = sum(i.height for i in opened_imgs) # Create canvas and paste vertically canvas = Image.new('RGB', (max_w, total_h)) current_y = 0 for img in opened_imgs: canvas.paste(img, (0, current_y)) current_y += img.height # Save save_path = subdir / "Concat.jpg" canvas.save(save_path) print(f"Saved: {save_path}") # subprocess.call(('xdg-open', save_path)) if len(sys.argv) < 2: print("Usage: python script.py ") sys.exit(1) root_dir = sys.argv[1] labels = list(filter(None, (Path(root_dir) / "labels").read_text().splitlines())) results = make_dictionary(root_dir) # Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates} # Coordinates are the real coordinates (hmin, hmax) of the image in the Group # print(results,"\n\n\n") process_correction(root_dir, results, labels) # concat_anot_images(root_dir)