import sys import os import json import numpy as np import shutil from PIL import Image, ImageChops, ImageFilter Image.MAX_IMAGE_PIXELS = None from pdf2image import convert_from_path import annotating # Reuse rendering logic DPI = 100 def detect_checks_and_notes(output_dir): """ Returns: actions: List of dicts {type, label, ...} for checked boxes notes_img: RGBA image of manual notes (checks masked out) """ pdf_path = os.path.join(output_dir, "Concat_annotated.pdf") # ref_path = os.path.join(output_dir, "Reference.png") ref_path = os.path.join(output_dir, "Reference.jpg") json_path = os.path.join(output_dir, "checkboxes.json") if not (os.path.exists(pdf_path) and os.path.exists(ref_path)): print(f"Missing files in {output_dir}") return [], None # Load Coordinates with open(json_path, 'r') as f: boxes = json.load(f) # Load Reference ref_img = Image.open(ref_path).convert("RGB") # Load User PDF (First page only, assuming it's one long strip) # Warning: If the PDF is huge, pdf2image might split pages or OOM. # Assuming user didn't change page dimensions/order. try: # user_pages = convert_from_path(pdf_path, dpi=DPI) # La version suivante évite les size mismatch # Mais donne plus de bruit user_pages = convert_from_path(pdf_path, dpi=72) except Exception as e: print(f"Error reading PDF: {e}") return [], None # Concatenate PDF pages back to one image if user saved as multiple pages total_h = sum(p.height for p in user_pages) user_img = Image.new("RGB", (user_pages[0].width, total_h)) y = 0 for p in user_pages: user_img.paste(p, (0, y)) y += p.height # Resize user_img to match ref_img if slight mismatch (DPI export diffs) if user_img.size != ref_img.size: print("Debug : size mismatch : ", user_img.size, ref_img.size) user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS) # --- Detection Phase --- actions = [] # Convert to numpy for analysis ref_arr = np.array(ref_img) user_arr = np.array(user_img) # Diff for analysis # Simple absolute difference diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8) # Convert to grayscale for thresholding diff_gray = np.mean(diff, axis=2) # Threshold for "Checked" CHECK_THRESHOLD = 30 # intensity diff DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened # Mask to hide checkmarks from the "Notes" extraction mask_img = Image.new("L", ref_img.size, 255) # White (255) = keep, Black (0) = hide mask_draw = ImageDraw.Draw(mask_img) for box in boxes: # global_box: [x1, y1, x2, y2] b = box['global_box'] x1, y1, x2, y2 = map(int, b) # Ensure bounds x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2) # Analyze ROI roi = diff_gray[y1+5:y2-5, x1+5:x2-5] if roi.size == 0: continue changed_pixels = np.sum(roi > CHECK_THRESHOLD) density = changed_pixels / roi.size if density > DENSITY_THRESHOLD: print("A checked box !", density, b) actions.append(box) # It's checked, so we mask this area out for manual notes # Expand mask slightly to catch sloppy ticks mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0) else: mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0) if box["type"] == "score" and box["value"] == 0.0: # Mask the whole line mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0) # --- Extraction Phase --- # Create the "Manual Notes" layer # Logic: User - Ref. If Diff is dark -> Note. # We want a transparent image with just the pen strokes. # Try Gaussian Blur, peut-être inutile. ref_blur = ref_img.filter(ImageFilter.GaussianBlur(5)) user_blur = user_img.filter(ImageFilter.GaussianBlur(5)) # 1. Get difference image diff_img = ImageChops.difference(ref_img, user_img).convert("L") # 2. Threshold to remove JPEG noise (white background isn't perfect) # Pixels that are different enough: diff_data = np.array(diff_img) # Create alpha channel: 0 where no diff, 255 where diff # Higher treshold is better alpha = np.where(diff_data > 100, 255, 0).astype(np.uint8) # 3. Create output image (Black strokes, variable alpha) # Or Copy user colors? Better to copy user pixels. notes = user_img.convert("RGBA") r, g, b, a = notes.split() # Combine the diff-based alpha with the box-mask mask_arr = np.array(mask_img) final_alpha = np.minimum(alpha, mask_arr) notes.putalpha(Image.fromarray(final_alpha)) # notes.show() return actions, notes from PIL import ImageDraw import re def natural_key(text): return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))] from annotating import MARGIN_LEFT, ANNOT_WIDTH def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer): """ Modifies data based on actions, calls annotating.process_correction logic, overlays notes, saves Concat.jpg. """ labels = data[student_id] # 1. Apply Actions to Data # Sort actions to handle indices correctly (delete from end?) # But we regenerate from dictionary, so modifying the dictionary is fine. # Separate actions by label actions_by_label = {} for a in actions: l = a['label'] if l not in actions_by_label: actions_by_label[l] = [] actions_by_label[l].append(a) for label, acts in sorted(actions_by_label.items(), key=lambda x: natural_key(x[0])): if label not in labels: continue content = labels[label] result = content['result'] feedbacks = result.get('feedback', []) # Split feedbacks again to match indices global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')] local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')] # Sort local by Y to match generation order in annotating.py local_fb_sorted_map = sorted(local_fb_indices, key=lambda i: feedbacks[i]['box_2d'][0]) items_to_remove = set() for act in acts: if act['type'] == 'score': result['score'] = act['value'] print(f" > Updated score for {label} to {act['value']}") elif act['type'] == 'del_global': # act['index'] is the index within the global_fb list # We need to find the actual index in the main list if act['index'] < len(global_fb_indices): real_idx = global_fb_indices[act['index']] feedbacks[real_idx]["to_delete"] = None print(f" > Deleted global feedback in {label}") elif act['type'] == 'del_local': # act['index'] is index in sorted local list if act['index'] < len(local_fb_sorted_map): real_idx = local_fb_sorted_map[act['index']] feedbacks[real_idx]["to_delete"] = None print(f" > Deleted local feedback in {label}") elif act['type'] == 'del_local_rect': # act['index'] is index in sorted local list if act['index'] < len(local_fb_sorted_map): real_idx = local_fb_sorted_map[act['index']] feedbacks[real_idx]["norectangle"] = None print(f" > Deleted rect of local feedback in {label}") # Remove feedbacks (in reverse to preserve indices) # for idx in sorted(list(items_to_remove), reverse=True): # del feedbacks[idx] # 2. Regenerate Clean Image # We use a temporary modified dictionary temp_data = {student_id: labels} output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") final_concats = [] sorted_labels = sorted(labels.items(), key=lambda x: natural_key(x[0])) for label, content in sorted_labels: # ... [PDF to Image Conversion] ... copie_folder = f"Copie{student_id}" pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf") if not os.path.exists(pdf_path): continue (base_img, _total_h, _max_w) = annotating.make_base_image(pdf_path) img = annotating.compose_label_image( base_img, label, content['result'], content['coordinates'][0] ) final_concats.append(img) # Concatenate Labels if not final_concats: return mw = max(i.width for i in final_concats) th = sum(i.height for i in final_concats) full_clean_img = Image.new("RGB", (mw, th), "white") y=0 for i in final_concats: full_clean_img.paste(i, (0,y)) y+=i.height # 3. Overlay Manual Notes if notes_layer: full_clean_img.paste(notes_layer, (0,0), mask=notes_layer) # Save final Concat.jpg full_clean_img.save(os.path.join(output_dir, "Concat.jpg")) print(f"Saved regenerated: {os.path.join(output_dir, 'Concat.jpg')}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python reading_annotations.py ") sys.exit(1) root_dir = sys.argv[1] # Load original data original_data = annotating.make_dictionary(root_dir) # Process each Bnot folder for student_id in original_data.keys(): bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") if os.path.exists(bnot_dir): print(f"Processing annotations for: {student_id}") actions, notes = detect_checks_and_notes(bnot_dir) if actions or notes: apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes) else: print(" No changes detected or missing files.")