import sys import os import json import numpy as np import shutil from PIL import Image, ImageChops, ImageFilter Image.MAX_IMAGE_PIXELS = None from pdf2image import convert_from_path import annotating # Reuse rendering logic DPI = 100 def detect_checks_and_notes(output_dir): """ Returns: actions: List of dicts {type, label, ...} for checked boxes notes_img: RGBA image of manual notes (checks masked out) """ names = ["Concat_annotated.pdf"] for name in names: pdf_path = os.path.join(output_dir, name) if os.path.exists(pdf_path): break # ref_path = os.path.join(output_dir, "Reference.png") ref_path = os.path.join(output_dir, "Reference.jpg") json_path = os.path.join(output_dir, "checkboxes.json") if not (os.path.exists(pdf_path) and os.path.exists(ref_path)): print(f"\tMissing annotated file in {output_dir}") return [], None # Load Coordinates with open(json_path, 'r') as f: boxes = json.load(f) # Load Reference ref_img = Image.open(ref_path).convert("RGB") # Load User PDF (First page only, assuming it's one long strip) # Warning: If the PDF is huge, pdf2image might split pages or OOM. # Assuming user didn't change page dimensions/order. try: # user_pages = convert_from_path(pdf_path, dpi=DPI) # La version suivante évite les size mismatch # Mais donne plus de bruit user_pages = convert_from_path(pdf_path, dpi=72) except Exception as e: print(f"Error reading PDF: {e}") return [], None # Concatenate PDF pages back to one image if user saved as multiple pages total_h = sum(p.height for p in user_pages) user_img = Image.new("RGB", (user_pages[0].width, total_h)) y = 0 for p in user_pages: user_img.paste(p, (0, y)) y += p.height # Resize user_img to match ref_img if slight mismatch (DPI export diffs) if user_img.size != ref_img.size: print("Debug : size mismatch : ", user_img.size, ref_img.size) user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS) # --- Detection Phase --- actions = [] # Convert to numpy for analysis ref_arr = np.array(ref_img) user_arr = np.array(user_img) # Diff for analysis # Simple absolute difference diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8) # Convert to grayscale for thresholding diff_gray = np.mean(diff, axis=2) # Threshold for "Checked" CHECK_THRESHOLD = 30 # intensity diff DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened # Mask to hide checkmarks from the "Notes" extraction mask_img = Image.new("L", ref_img.size, 255) # White (255) = keep, Black (0) = hide mask_draw = ImageDraw.Draw(mask_img) for box in boxes: # global_box: [x1, y1, x2, y2] b = box['global_box'] x1, y1, x2, y2 = map(int, b) # Ensure bounds x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2) # Analyze ROI roi = diff_gray[y1+5:y2-5, x1+5:x2-5] if roi.size == 0: continue changed_pixels = np.sum(roi > CHECK_THRESHOLD) density = changed_pixels / roi.size if density > DENSITY_THRESHOLD: print("A checked box !", density, b) actions.append(box) # It's checked, so we mask this area out for manual notes # Expand mask slightly to catch sloppy ticks mask_draw.rectangle([x1-15, y1-15, x2+15, y2+15], fill=0) else: mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0) if box["type"] == "score" and box["value"] == 0.0: # Mask the whole line mask_draw.rectangle([0, y1-10, ref_img.width, y2+10], fill=0) # --- Extraction Phase --- # 150 + no blur is alright, with some lines at the end # 100 + 2 px blur is too clean : tes annotations sont morcelées # 50 + 2 px blur seems good ref_blur = ref_img.filter(ImageFilter.GaussianBlur(2)) user_blur = user_img.filter(ImageFilter.GaussianBlur(2)) # 1. Get difference image # diff_img = ImageChops.difference(ref_img, user_img).convert("L") diff_img = ImageChops.difference(ref_blur, user_blur).convert("L") diff_data = np.array(diff_img) alpha = np.where(diff_data > 50, 255, 0).astype(np.uint8) notes = user_img.convert("RGBA") r, g, b, a = notes.split() # Combine the diff-based alpha with the box-mask mask_arr = np.array(mask_img) final_alpha = np.minimum(alpha, mask_arr) notes.putalpha(Image.fromarray(final_alpha)) # notes.show() return actions, notes from PIL import ImageDraw from utils import natural_key from annotating import MARGIN_LEFT, ANNOT_WIDTH def has_significant_notes(note_img, threshold=20): """Checks if the note layer has visible content (non-transparent pixels).""" # Assuming note_img is RGBA. # We check alpha channel for non-zero values (or low transparency) # Since we generated notes with variable alpha based on diff, checking alpha sum is good. if note_img.mode != 'RGBA': return False alpha = np.array(note_img)[:, :, 3] # Count pixels with significant opacity visible_pixels = np.sum(alpha > 50) # visible_pixels_bis = np.sum(alpha > 200) # if visible_pixels > 0: # print(f"Debug : visible pixels is {visible_pixels}") return visible_pixels > threshold def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer, all_labels): """ Modifies data based on actions, reads bnote.json, cuts notes, regenerates all label images for consistency, saves dirty ones, and generates Concat.jpg. """ output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") bnote_path = os.path.join(output_dir, "bnote.json") score_path = os.path.join(output_dir, "score.json") if not os.path.exists(bnote_path): print(f"Error: bnote.json not found in {output_dir}") return with open(bnote_path, 'r') as f: bnote_data = json.load(f) labels_data = data[student_id] # --- 1. Apply Actions to Data (Update scores / Flags for deletion) --- actions_by_label = {} for a in actions: actions_by_label.setdefault(a['label'], []).append(a) dirty_labels = set() # Labels that logic says changed for label, acts in actions_by_label.items(): if label not in labels_data: continue content = labels_data[label] result = content['result'] feedbacks = result.get('feedback', []) # Helpers to find objects by index (references match those in feedbacks list) global_fb = [f for f in feedbacks if not f.get('box_2d')] local_fb = [f for f in feedbacks if f.get('box_2d')] local_fb.sort(key=lambda x: x['box_2d'][0]) for act in acts: if act['type'] == 'score': result['score'] = act['value'] dirty_labels.add(label) print(f" > Updated score for {label} to {act['value']}") elif act['type'] == 'del_global': if act['index'] < len(global_fb): global_fb[act['index']]["to_delete"] = True dirty_labels.add(label) print(f" > Deleted global feedback in {label}") elif act['type'] in ('del_local', 'del_local_rect'): if act['index'] < len(local_fb): target = local_fb[act['index']] if act['type'] == 'del_local': target["to_delete"] = True print(f" > Deleted local feedback in {label}") else: target["norectangle"] = True print(f" > Deleted rect in {label}") dirty_labels.add(label) # --- 2. Process Images (Cut notes, Regenerate, Concatenate) --- concat_list = [] concat_list_F = [] d_notes = dict.fromkeys(all_labels, "") # Iterate over images defined in bnote.json to maintain order/geometry for img_info in bnote_data.get("images", []): label = img_info["label"] if label not in labels_data: continue # Update scores dict content = labels_data[label] result = content['result'] d_notes[label] = str(result.get('score', 0)) # A. Cut Manual Notes hmin, hmax = img_info["hmin"], img_info["hmax"] sub_note = None if notes_layer: sub_note = notes_layer.crop((0, hmin, notes_layer.width, hmax)) has_notes = has_significant_notes(sub_note) # B. Regenerate Label Image # We always regenerate to ensure Concat.jpg is consistent with any modifications pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf") if not os.path.exists(pdf_path): continue (base_img, _, _) = annotating.make_base_image(pdf_path) # Compose uses the result object we modified in step 1 final_img, new_header_h = annotating.compose_label_image( base_img, label, content['result'], content['coordinates'][0], with_error=False ) if final_img==None: continue # Overlay manual notes if has_notes: old_header_h = int(img_info.get("header_height", 0)) w, h = sub_note.size # 1. Paste header ink at the top if old_header_h > 0: header_crop = sub_note.crop((0, 0, w, min(h, old_header_h))) final_img.paste(header_crop, (0, 0), mask=header_crop) # 2. Paste student-content ink at the new header height if h > old_header_h: body_crop = sub_note.crop((0, old_header_h, w, h)) final_img.paste(body_crop, (0, new_header_h), mask=body_crop) # C. Save individual file if Modified (Dirty logic or visual notes) if (label in dirty_labels) or has_notes: save_path = os.path.join(output_dir, f"{label}.jpg") final_img.save(save_path) print(f" Saved dirty image: {label}.jpg") concat_list.append(final_img) perfect_no_comment = True if float(d_notes[label]) != 4.0: perfect_no_comment = False if len(result.get('feedback', [])) != 0: perfect_no_comment = False if not perfect_no_comment: concat_list_F.append(final_img) # --- 3. Save Final Outputs --- with open(score_path, "w") as f: json.dump(d_notes, f, indent=4) print(f" Saved {score_path}") if concat_list: max_w = max(i.width for i in concat_list) total_h = sum(i.height for i in concat_list) full_img = Image.new("RGB", (max_w, total_h), "white") y = 0 for img in concat_list: full_img.paste(img, (0, y)) y += img.height full_img.save(os.path.join(output_dir, "Concat.jpg")) print(f" Saved regenerated Concat.jpg") if concat_list_F: max_w = max(i.width for i in concat_list_F) total_h = sum(i.height for i in concat_list_F) full_img = Image.new("RGB", (max_w, total_h), "white") y = 0 for img in concat_list_F: full_img.paste(img, (0, y)) y += img.height full_img.save(os.path.join(output_dir, "Concat_F.jpg")) print(f" Saved regenerated Concat_F.jpg") from pathlib import Path from utils import read_all_labels if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python reading_annotations.py ") sys.exit(1) root_dir = sys.argv[1] try: all_labels = read_all_labels(Path(root_dir)) except FileNotFoundError: all_labels = [] # Load original data original_data = annotating.make_dictionary(root_dir) # Process each Bnot folder for student_id in original_data.keys(): bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") if os.path.exists(bnot_dir): print(f"Processing annotations for: {student_id}") actions, notes = detect_checks_and_notes(bnot_dir) if actions or notes: apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes, all_labels) else: print(" No changes detected or missing files.")