Copies/reading_annotations.py

import sys
import os
import json
import numpy as np
import shutil
from PIL import Image, ImageChops
Image.MAX_IMAGE_PIXELS = None
from pdf2image import convert_from_path
import annotating # Reuse rendering logic

def detect_checks_and_notes(output_dir):
    """
    Returns:
        actions: List of dicts {type, label, ...} for checked boxes
        notes_img: RGBA image of manual notes (checks masked out)
    """
    pdf_path = os.path.join(output_dir, "Concat_annotated.pdf")
    ref_path = os.path.join(output_dir, "Reference.png")
    json_path = os.path.join(output_dir, "checkboxes.json")

    if not (os.path.exists(pdf_path) and os.path.exists(ref_path)):
        print(f"Missing files in {output_dir}")
        return [], None

    # Load Coordinates
    with open(json_path, 'r') as f:
        boxes = json.load(f)

    # Load Reference
    ref_img = Image.open(ref_path).convert("RGB")

    # Load User PDF (First page only, assuming it's one long strip)
    # Warning: If the PDF is huge, pdf2image might split pages or OOM.
    # Assuming user didn't change page dimensions/order.
    try:
        user_pages = convert_from_path(pdf_path)
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return [], None
    print("Debug : user_pages", len(user_pages))
    # Concatenate PDF pages back to one image if user saved as multiple pages
    # (Xournal++ might preserve the long format or split it)
    total_h = sum(p.height for p in user_pages)
    user_img = Image.new("RGB", (user_pages[0].width, total_h))
    y = 0
    for p in user_pages:
        user_img.paste(p, (0, y))
        y += p.height

    # Resize user_img to match ref_img if slight mismatch (DPI export diffs)
    if user_img.size != ref_img.size:
        print("Debug : size mismatch : ", user_img.size, ref_img.size)
        user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS)

    # --- Detection Phase ---
    actions = []

    # Convert to numpy for analysis
    ref_arr = np.array(ref_img)
    user_arr = np.array(user_img)

    # Diff for analysis
    # Simple absolute difference
    diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8)
    # Convert to grayscale for thresholding
    diff_gray = np.mean(diff, axis=2)

    # Threshold for "Checked"
    CHECK_THRESHOLD = 30 # intensity diff
    DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened

    # Mask to hide checkmarks from the "Notes" extraction
    mask_img = Image.new("L", ref_img.size, 255) # White = keep, Black = hide
    mask_draw = ImageDraw.Draw(mask_img)

    for box in boxes:
        # global_box: [x1, y1, x2, y2]
        b = box['global_box']
        x1, y1, x2, y2 = map(int, b)

        # Ensure bounds
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2)

        # Analyze ROI
        roi = diff_gray[y1+5:y2-5, x1+5:x2-5]
        if roi.size == 0: continue

        changed_pixels = np.sum(roi > CHECK_THRESHOLD)
        density = changed_pixels / roi.size

        if density > DENSITY_THRESHOLD:
            print("A checked box !", density, b)
            actions.append(box)
            # It's checked, so we mask this area out for manual notes
            # Expand mask slightly to catch sloppy ticks
            mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0)
        else:
            # print("A box, not checked !", density)
            # Even if not "checked", mask the box area slightly to avoid
            # artifacts if user hovered over it, though arguably we keep it.
            # Let's strictly mask only if checked to verify detection?
            # No, prompt says "not extract the part that are just checking".
            # If user checked it, we mask it.
            pass

    # --- Extraction Phase ---
    # Create the "Manual Notes" layer
    # Logic: User - Ref. If Diff is dark -> Note.
    # We want a transparent image with just the pen strokes.

    # 1. Get difference image
    diff_img = ImageChops.difference(ref_img, user_img).convert("L")

    # 2. Threshold to remove JPEG noise (white background isn't perfect)
    # Pixels that are different enough:
    diff_data = np.array(diff_img)
    # Create alpha channel: 0 where no diff, 255 where diff
    alpha = np.where(diff_data > 20, 255, 0).astype(np.uint8)

    # 3. Create output image (Black strokes, variable alpha)
    # Or Copy user colors? Better to copy user pixels.
    notes = user_img.convert("RGBA")
    r, g, b, a = notes.split()

    # Combine the diff-based alpha with the box-mask
    mask_arr = np.array(mask_img)
    final_alpha = np.minimum(alpha, mask_arr)

    notes.putalpha(Image.fromarray(final_alpha))

    return actions, notes

from PIL import ImageDraw

def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer):
    """
    Modifies data based on actions, calls annotating.process_correction logic,
    overlays notes, saves Concat.jpg.
    """
    labels = data[student_id]

    # 1. Apply Actions to Data
    # Sort actions to handle indices correctly (delete from end?)
    # But we regenerate from dictionary, so modifying the dictionary is fine.

    # Separate actions by label
    actions_by_label = {}
    for a in actions:
        l = a['label']
        if l not in actions_by_label: actions_by_label[l] = []
        actions_by_label[l].append(a)

    for label, acts in actions_by_label.items():
        if label not in labels: continue

        content = labels[label]
        result = content['result']
        feedbacks = result.get('feedback', [])

        # Split feedbacks again to match indices
        global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')]
        local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')]
        # Sort local by Y to match generation order in annotating.py
        local_fb_sorted_map = sorted(local_fb_indices, key=lambda i: feedbacks[i]['box_2d'][0])

        items_to_remove = set()

        for act in acts:
            if act['type'] == 'set_score':
                result['score'] = act['value']
                print(f"  > Updated score for {label} to {act['value']}")

            elif act['type'] == 'del_global':
                # act['index'] is the index within the global_fb list
                # We need to find the actual index in the main list
                if act['index'] < len(global_fb_indices):
                    real_idx = global_fb_indices[act['index']]
                    items_to_remove.add(real_idx)
                    print(f"  > Deleted global feedback in {label}")

            elif act['type'] == 'del_local':
                # act['index'] is index in sorted local list
                if act['index'] < len(local_fb_sorted_map):
                    real_idx = local_fb_sorted_map[act['index']]
                    items_to_remove.add(real_idx)
                    print(f"  > Deleted local feedback in {label}")

        # Remove feedbacks (in reverse to preserve indices)
        for idx in sorted(list(items_to_remove), reverse=True):
            del feedbacks[idx]

    # 2. Regenerate Clean Image
    # We use a temporary modified dictionary
    temp_data = {student_id: labels}

    # Run the original process (but we need to intercept it to not save, or just let it save)
    # annotating.process_correction saves to "Anot_CopieID".
    # We want "Bnot_CopieID" (updated).

    # Hijack the output dir in logic or copy code?
    # Easiest: Let's create a temporary helper or modify annotating logic slightly?
    # The prompt implies we use `annotating.py` logic.
    # Let's call `annotating.process_correction` but point it to a temp root or modify path?
    # No, `process_correction` takes `root_dir` and writes to `Anot_...`.
    # Let's just implement the rendering loop here to be safe and clean,
    # overlaying the notes at the end.

    output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
    # Don't delete output_dir, we need it.

    # ... (Reuse rendering logic from annotating.py exactly) ...
    # See below for condensed integration

    final_concats = []

    for label, content in labels.items():
        # ... [PDF to Image Conversion] ...
        copie_folder = f"Copie{student_id}"
        pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf")
        if not os.path.exists(pdf_path): continue

        pages = annotating.convert_from_path(pdf_path)
        base_img = Image.new("RGBA", (max(p.width for p in pages), sum(p.height for p in pages)), "white")
        y=0
        for p in pages: base_img.paste(p.convert("RGBA"), (0,y)); y+=p.height

        # ... [Draw Header/Margin (Clean)] ...
        margin_left = 200
        result = content['result']
        coordinates = content.get('coordinates', (0,0))
        hmin = coordinates[0]

        score_text = f"{label}   ;   Note : {result.get('score', 0)}"
        if result.get('error') and result.get('error') != "null": score_text += f" | Error: {result.get('error')}"

        header_imgs = [annotating.render_latex_text(score_text, base_img.width, fontsize=18)]

        feedbacks = result.get('feedback', [])
        # Separate again (now cleaned)
        global_fb = [f for f in feedbacks if not f.get('box_2d')]
        local_fb = [f for f in feedbacks if f.get('box_2d')]
        local_fb.sort(key=lambda x: x['box_2d'][0])

        for fb in global_fb: header_imgs.append(annotating.render_latex_text(fb['text'], base_img.width))

        total_h = base_img.height + sum(i.height for i in header_imgs)
        label_img = Image.new("RGB", (base_img.width + margin_left, total_h), "white")

        cy = 0
        for i in header_imgs: label_img.paste(i, (0, cy)); cy+=i.height
        img_offset_y = cy
        label_img.paste(base_img, (margin_left, img_offset_y))

        draw = ImageDraw.Draw(label_img, "RGBA")
        last_bot = 0
        for fb in local_fb:
            box = fb['box_2d']
            ymin, xmin, ymax, xmax = box
            t_ymin = (ymin - hmin) + img_offset_y
            t_ymax = (ymax - hmin) + img_offset_y
            draw.rectangle([xmin+margin_left, t_ymin, xmax+margin_left, t_ymax], outline="red", width=3)

            txt = annotating.render_latex_text(fb['text'], 500, (255,200,200,180), max_lines=3)
            py = max((t_ymin+t_ymax)/2 - txt.height/2, img_offset_y)
            if py < last_bot: py = last_bot + 5

            if py + txt.height + 20 > label_img.height:
                new_l = Image.new("RGB", (label_img.width, int(py+txt.height+20)), "white")
                new_l.paste(label_img, (0,0))
                label_img = new_l
                draw = ImageDraw.Draw(label_img, "RGBA")

            label_img.paste(txt, (10, int(py)), mask=txt)
            last_bot = py + txt.height

        final_concats.append(label_img)

    # Concatenate Labels
    if not final_concats: return

    mw = max(i.width for i in final_concats)
    th = sum(i.height for i in final_concats)
    full_clean_img = Image.new("RGB", (mw, th), "white")
    y=0
    for i in final_concats:
        full_clean_img.paste(i, (0,y))
        y+=i.height

    # 3. Overlay Manual Notes
    if notes_layer:
        # Notes layer might be different size if regenerated image size changed (e.g. deleted comments reduced height)
        # However, usually reducing content reduces height, so we align top-left.
        # But `notes_layer` is based on the "Reference.png" which had boxes.
        # The new `full_clean_img` does NOT have boxes. The dimensions should be identical
        # unless removing a feedback at the very bottom shrinks the image.

        # We paste notes_layer on top.
        full_clean_img.paste(notes_layer, (0,0), mask=notes_layer)

    # Save final Concat.jpg
    final_path = os.path.join(output_dir, "Concat.jpg")
    full_clean_img.save(final_path)
    print(f"Saved regenerated: {final_path}")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python reading_annotations.py <Dir>")
        sys.exit(1)

    root_dir = sys.argv[1]

    # Load original data
    original_data = annotating.make_dictionary(root_dir)

    # Process each Bnot folder
    for student_id in original_data.keys():
        bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
        if os.path.exists(bnot_dir):
            print(f"Processing annotations for: {student_id}")
            actions, notes = detect_checks_and_notes(bnot_dir)
            if actions or notes:
                apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes)
            else:
                print("  No changes detected or missing files.")