import sys import os import json import numpy as np import shutil from PIL import Image, ImageChops Image.MAX_IMAGE_PIXELS = None from pdf2image import convert_from_path import annotating # Reuse rendering logic def detect_checks_and_notes(output_dir): """ Returns: actions: List of dicts {type, label, ...} for checked boxes notes_img: RGBA image of manual notes (checks masked out) """ pdf_path = os.path.join(output_dir, "Concat_annotated.pdf") ref_path = os.path.join(output_dir, "Reference.png") json_path = os.path.join(output_dir, "checkboxes.json") if not (os.path.exists(pdf_path) and os.path.exists(ref_path)): print(f"Missing files in {output_dir}") return [], None # Load Coordinates with open(json_path, 'r') as f: boxes = json.load(f) # Load Reference ref_img = Image.open(ref_path).convert("RGB") # Load User PDF (First page only, assuming it's one long strip) # Warning: If the PDF is huge, pdf2image might split pages or OOM. # Assuming user didn't change page dimensions/order. try: user_pages = convert_from_path(pdf_path) except Exception as e: print(f"Error reading PDF: {e}") return [], None print("Debug : user_pages", len(user_pages)) # Concatenate PDF pages back to one image if user saved as multiple pages # (Xournal++ might preserve the long format or split it) total_h = sum(p.height for p in user_pages) user_img = Image.new("RGB", (user_pages[0].width, total_h)) y = 0 for p in user_pages: user_img.paste(p, (0, y)) y += p.height # Resize user_img to match ref_img if slight mismatch (DPI export diffs) if user_img.size != ref_img.size: print("Debug : size mismatch : ", user_img.size, ref_img.size) user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS) # --- Detection Phase --- actions = [] # Convert to numpy for analysis ref_arr = np.array(ref_img) user_arr = np.array(user_img) # Diff for analysis # Simple absolute difference diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8) # Convert to grayscale for thresholding diff_gray = np.mean(diff, axis=2) # Threshold for "Checked" CHECK_THRESHOLD = 30 # intensity diff DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened # Mask to hide checkmarks from the "Notes" extraction mask_img = Image.new("L", ref_img.size, 255) # White = keep, Black = hide mask_draw = ImageDraw.Draw(mask_img) for box in boxes: # global_box: [x1, y1, x2, y2] b = box['global_box'] x1, y1, x2, y2 = map(int, b) # Ensure bounds x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2) # Analyze ROI roi = diff_gray[y1+5:y2-5, x1+5:x2-5] if roi.size == 0: continue changed_pixels = np.sum(roi > CHECK_THRESHOLD) density = changed_pixels / roi.size if density > DENSITY_THRESHOLD: print("A checked box !", density, b) actions.append(box) # It's checked, so we mask this area out for manual notes # Expand mask slightly to catch sloppy ticks mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0) else: # print("A box, not checked !", density) # Even if not "checked", mask the box area slightly to avoid # artifacts if user hovered over it, though arguably we keep it. # Let's strictly mask only if checked to verify detection? # No, prompt says "not extract the part that are just checking". # If user checked it, we mask it. pass # --- Extraction Phase --- # Create the "Manual Notes" layer # Logic: User - Ref. If Diff is dark -> Note. # We want a transparent image with just the pen strokes. # 1. Get difference image diff_img = ImageChops.difference(ref_img, user_img).convert("L") # 2. Threshold to remove JPEG noise (white background isn't perfect) # Pixels that are different enough: diff_data = np.array(diff_img) # Create alpha channel: 0 where no diff, 255 where diff alpha = np.where(diff_data > 20, 255, 0).astype(np.uint8) # 3. Create output image (Black strokes, variable alpha) # Or Copy user colors? Better to copy user pixels. notes = user_img.convert("RGBA") r, g, b, a = notes.split() # Combine the diff-based alpha with the box-mask mask_arr = np.array(mask_img) final_alpha = np.minimum(alpha, mask_arr) notes.putalpha(Image.fromarray(final_alpha)) return actions, notes from PIL import ImageDraw def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer): """ Modifies data based on actions, calls annotating.process_correction logic, overlays notes, saves Concat.jpg. """ labels = data[student_id] # 1. Apply Actions to Data # Sort actions to handle indices correctly (delete from end?) # But we regenerate from dictionary, so modifying the dictionary is fine. # Separate actions by label actions_by_label = {} for a in actions: l = a['label'] if l not in actions_by_label: actions_by_label[l] = [] actions_by_label[l].append(a) for label, acts in actions_by_label.items(): if label not in labels: continue content = labels[label] result = content['result'] feedbacks = result.get('feedback', []) # Split feedbacks again to match indices global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')] local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')] # Sort local by Y to match generation order in annotating.py local_fb_sorted_map = sorted(local_fb_indices, key=lambda i: feedbacks[i]['box_2d'][0]) items_to_remove = set() for act in acts: if act['type'] == 'set_score': result['score'] = act['value'] print(f" > Updated score for {label} to {act['value']}") elif act['type'] == 'del_global': # act['index'] is the index within the global_fb list # We need to find the actual index in the main list if act['index'] < len(global_fb_indices): real_idx = global_fb_indices[act['index']] items_to_remove.add(real_idx) print(f" > Deleted global feedback in {label}") elif act['type'] == 'del_local': # act['index'] is index in sorted local list if act['index'] < len(local_fb_sorted_map): real_idx = local_fb_sorted_map[act['index']] items_to_remove.add(real_idx) print(f" > Deleted local feedback in {label}") # Remove feedbacks (in reverse to preserve indices) for idx in sorted(list(items_to_remove), reverse=True): del feedbacks[idx] # 2. Regenerate Clean Image # We use a temporary modified dictionary temp_data = {student_id: labels} # Run the original process (but we need to intercept it to not save, or just let it save) # annotating.process_correction saves to "Anot_CopieID". # We want "Bnot_CopieID" (updated). # Hijack the output dir in logic or copy code? # Easiest: Let's create a temporary helper or modify annotating logic slightly? # The prompt implies we use `annotating.py` logic. # Let's call `annotating.process_correction` but point it to a temp root or modify path? # No, `process_correction` takes `root_dir` and writes to `Anot_...`. # Let's just implement the rendering loop here to be safe and clean, # overlaying the notes at the end. output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") # Don't delete output_dir, we need it. # ... (Reuse rendering logic from annotating.py exactly) ... # See below for condensed integration final_concats = [] for label, content in labels.items(): # ... [PDF to Image Conversion] ... copie_folder = f"Copie{student_id}" pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf") if not os.path.exists(pdf_path): continue pages = annotating.convert_from_path(pdf_path) base_img = Image.new("RGBA", (max(p.width for p in pages), sum(p.height for p in pages)), "white") y=0 for p in pages: base_img.paste(p.convert("RGBA"), (0,y)); y+=p.height # ... [Draw Header/Margin (Clean)] ... margin_left = 200 result = content['result'] coordinates = content.get('coordinates', (0,0)) hmin = coordinates[0] score_text = f"{label} ; Note : {result.get('score', 0)}" if result.get('error') and result.get('error') != "null": score_text += f" | Error: {result.get('error')}" header_imgs = [annotating.render_latex_text(score_text, base_img.width, fontsize=18)] feedbacks = result.get('feedback', []) # Separate again (now cleaned) global_fb = [f for f in feedbacks if not f.get('box_2d')] local_fb = [f for f in feedbacks if f.get('box_2d')] local_fb.sort(key=lambda x: x['box_2d'][0]) for fb in global_fb: header_imgs.append(annotating.render_latex_text(fb['text'], base_img.width)) total_h = base_img.height + sum(i.height for i in header_imgs) label_img = Image.new("RGB", (base_img.width + margin_left, total_h), "white") cy = 0 for i in header_imgs: label_img.paste(i, (0, cy)); cy+=i.height img_offset_y = cy label_img.paste(base_img, (margin_left, img_offset_y)) draw = ImageDraw.Draw(label_img, "RGBA") last_bot = 0 for fb in local_fb: box = fb['box_2d'] ymin, xmin, ymax, xmax = box t_ymin = (ymin - hmin) + img_offset_y t_ymax = (ymax - hmin) + img_offset_y draw.rectangle([xmin+margin_left, t_ymin, xmax+margin_left, t_ymax], outline="red", width=3) txt = annotating.render_latex_text(fb['text'], 500, (255,200,200,180), max_lines=3) py = max((t_ymin+t_ymax)/2 - txt.height/2, img_offset_y) if py < last_bot: py = last_bot + 5 if py + txt.height + 20 > label_img.height: new_l = Image.new("RGB", (label_img.width, int(py+txt.height+20)), "white") new_l.paste(label_img, (0,0)) label_img = new_l draw = ImageDraw.Draw(label_img, "RGBA") label_img.paste(txt, (10, int(py)), mask=txt) last_bot = py + txt.height final_concats.append(label_img) # Concatenate Labels if not final_concats: return mw = max(i.width for i in final_concats) th = sum(i.height for i in final_concats) full_clean_img = Image.new("RGB", (mw, th), "white") y=0 for i in final_concats: full_clean_img.paste(i, (0,y)) y+=i.height # 3. Overlay Manual Notes if notes_layer: # Notes layer might be different size if regenerated image size changed (e.g. deleted comments reduced height) # However, usually reducing content reduces height, so we align top-left. # But `notes_layer` is based on the "Reference.png" which had boxes. # The new `full_clean_img` does NOT have boxes. The dimensions should be identical # unless removing a feedback at the very bottom shrinks the image. # We paste notes_layer on top. full_clean_img.paste(notes_layer, (0,0), mask=notes_layer) # Save final Concat.jpg final_path = os.path.join(output_dir, "Concat.jpg") full_clean_img.save(final_path) print(f"Saved regenerated: {final_path}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python reading_annotations.py