Copies/reading_annotations.py

import sys
import os
import json
import numpy as np
import shutil
from PIL import Image, ImageChops, ImageFilter
Image.MAX_IMAGE_PIXELS = None
from pdf2image import convert_from_path
import annotating # Reuse rendering logic

DPI = 100

def detect_checks_and_notes(output_dir):
    """
    Returns:
        actions: List of dicts {type, label, ...} for checked boxes
        notes_img: RGBA image of manual notes (checks masked out)
    """
    pdf_path = os.path.join(output_dir, "Concat_annotated.pdf")
    # ref_path = os.path.join(output_dir, "Reference.png")
    ref_path = os.path.join(output_dir, "Reference.jpg")
    json_path = os.path.join(output_dir, "checkboxes.json")

    if not (os.path.exists(pdf_path) and os.path.exists(ref_path)):
        print(f"Missing files in {output_dir}")
        return [], None

    # Load Coordinates
    with open(json_path, 'r') as f:
        boxes = json.load(f)

    # Load Reference
    ref_img = Image.open(ref_path).convert("RGB")

    # Load User PDF (First page only, assuming it's one long strip)
    # Warning: If the PDF is huge, pdf2image might split pages or OOM.
    # Assuming user didn't change page dimensions/order.
    try:
        # user_pages = convert_from_path(pdf_path, dpi=DPI)
        # La version suivante évite les size mismatch
        # Mais donne plus de bruit
        user_pages = convert_from_path(pdf_path, dpi=72)
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return [], None
    # Concatenate PDF pages back to one image if user saved as multiple pages
    total_h = sum(p.height for p in user_pages)
    user_img = Image.new("RGB", (user_pages[0].width, total_h))
    y = 0
    for p in user_pages:
        user_img.paste(p, (0, y))
        y += p.height

    # Resize user_img to match ref_img if slight mismatch (DPI export diffs)
    if user_img.size != ref_img.size:
        print("Debug : size mismatch : ", user_img.size, ref_img.size)
        user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS)

    # --- Detection Phase ---
    actions = []

    # Convert to numpy for analysis
    ref_arr = np.array(ref_img)
    user_arr = np.array(user_img)

    # Diff for analysis
    # Simple absolute difference
    diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8)
    # Convert to grayscale for thresholding
    diff_gray = np.mean(diff, axis=2)

    # Threshold for "Checked"
    CHECK_THRESHOLD = 30 # intensity diff
    DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened

    # Mask to hide checkmarks from the "Notes" extraction
    mask_img = Image.new("L", ref_img.size, 255) # White (255) = keep, Black (0) = hide
    mask_draw = ImageDraw.Draw(mask_img)

    for box in boxes:
        # global_box: [x1, y1, x2, y2]
        b = box['global_box']
        x1, y1, x2, y2 = map(int, b)

        # Ensure bounds
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2)

        # Analyze ROI
        roi = diff_gray[y1+5:y2-5, x1+5:x2-5]
        if roi.size == 0: continue

        changed_pixels = np.sum(roi > CHECK_THRESHOLD)
        density = changed_pixels / roi.size

        if density > DENSITY_THRESHOLD:
            print("A checked box !", density, b)
            actions.append(box)
            # It's checked, so we mask this area out for manual notes
            # Expand mask slightly to catch sloppy ticks
            mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0)
        else:
            mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0)

        if box["type"] == "score" and box["value"] == 0.0:
            # Mask the whole line
            mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0)

    # --- Extraction Phase ---

    # 150 + no blur is alright, with some lines at the end
    # 100 + 2 px blur is too clean : tes annotations sont morcelées
    # 50 + 2 px blur seems good

    ref_blur = ref_img.filter(ImageFilter.GaussianBlur(2))
    user_blur = user_img.filter(ImageFilter.GaussianBlur(2))

    # 1. Get difference image
    # diff_img = ImageChops.difference(ref_img, user_img).convert("L")
    diff_img = ImageChops.difference(ref_blur, user_blur).convert("L")

    diff_data = np.array(diff_img)
    alpha = np.where(diff_data > 50, 255, 0).astype(np.uint8)

    notes = user_img.convert("RGBA")
    r, g, b, a = notes.split()

    # Combine the diff-based alpha with the box-mask
    mask_arr = np.array(mask_img)
    final_alpha = np.minimum(alpha, mask_arr)

    notes.putalpha(Image.fromarray(final_alpha))

    # notes.show()

    return actions, notes

from PIL import ImageDraw

import re
def natural_key(text):
    return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]

from annotating import MARGIN_LEFT, ANNOT_WIDTH

def has_significant_notes(note_img, threshold=20):
    """Checks if the note layer has visible content (non-transparent pixels)."""
    # Assuming note_img is RGBA.
    # We check alpha channel for non-zero values (or low transparency)
    # Since we generated notes with variable alpha based on diff, checking alpha sum is good.
    if note_img.mode != 'RGBA':
        return False
    alpha = np.array(note_img)[:, :, 3]
    # Count pixels with significant opacity
    visible_pixels = np.sum(alpha > 50)
    # visible_pixels_bis = np.sum(alpha > 200)
    if visible_pixels > 0:
        print(f"Debug : visible pixels is {visible_pixels}")
    return visible_pixels > threshold

def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer):
    """
    Modifies data based on actions, reads bnote.json, cuts notes,
    regenerates all label images for consistency, saves dirty ones,
    and generates Concat.jpg.
    """
    output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
    bnote_path = os.path.join(output_dir, "bnote.json")
    score_path = os.path.join(output_dir, "score.json")

    if not os.path.exists(bnote_path):
        print(f"Error: bnote.json not found in {output_dir}")
        return

    with open(bnote_path, 'r') as f:
        bnote_data = json.load(f)

    labels_data = data[student_id]

    # --- 1. Apply Actions to Data (Update scores / Flags for deletion) ---
    actions_by_label = {}
    for a in actions:
        actions_by_label.setdefault(a['label'], []).append(a)

    dirty_labels = set() # Labels that logic says changed

    for label, acts in actions_by_label.items():
        if label not in labels_data: continue

        content = labels_data[label]
        result = content['result']
        feedbacks = result.get('feedback', [])

        # Helpers to find objects by index (references match those in feedbacks list)
        global_fb = [f for f in feedbacks if not f.get('box_2d')]
        local_fb = [f for f in feedbacks if f.get('box_2d')]
        local_fb.sort(key=lambda x: x['box_2d'][0])

        for act in acts:
            if act['type'] == 'score':
                result['score'] = act['value']
                dirty_labels.add(label)
                print(f"  > Updated score for {label} to {act['value']}")

            elif act['type'] == 'del_global':
                if act['index'] < len(global_fb):
                    global_fb[act['index']]["to_delete"] = True
                    dirty_labels.add(label)
                    print(f"  > Deleted global feedback in {label}")

            elif act['type'] in ('del_local', 'del_local_rect'):
                if act['index'] < len(local_fb):
                    target = local_fb[act['index']]
                    if act['type'] == 'del_local':
                        target["to_delete"] = True
                        print(f"  > Deleted local feedback in {label}")
                    else:
                        target["norectangle"] = True
                        print(f"  > Deleted rect in {label}")
                    dirty_labels.add(label)

    # --- 2. Process Images (Cut notes, Regenerate, Concatenate) ---
    concat_list = []
    d_notes = {}

    # Iterate over images defined in bnote.json to maintain order/geometry
    for img_info in bnote_data.get("images", []):
        label = img_info["label"]
        if label not in labels_data: continue

        # Update scores dict
        content = labels_data[label]
        d_notes[label] = str(content['result'].get('score', 0))

        # A. Cut Manual Notes
        hmin, hmax = img_info["hmin"], img_info["hmax"]
        sub_note = None
        if notes_layer:
            sub_note = notes_layer.crop((0, hmin, notes_layer.width, hmax))

        has_notes = has_significant_notes(sub_note)

        # B. Regenerate Label Image
        # We always regenerate to ensure Concat.jpg is consistent with any modifications
        pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
        if not os.path.exists(pdf_path): continue

        (base_img, _, _) = annotating.make_base_image(pdf_path)

        # Compose uses the result object we modified in step 1
        final_img, _ = annotating.compose_label_image(
            base_img, label, content['result'], content['coordinates'][0]
        )

        # Overlay manual notes
        if has_notes:
            final_img.paste(sub_note, (0, 0), mask=sub_note)

        # C. Save individual file if Modified (Dirty logic or visual notes)
        if (label in dirty_labels) or has_notes:
            save_path = os.path.join(output_dir, f"{label}.jpg")
            final_img.save(save_path)
            print(f"  Saved dirty image: {label}.jpg")

        concat_list.append(final_img)

    # --- 3. Save Final Outputs ---
    with open(score_path, "w") as f:
        json.dump(d_notes, f, indent=4)
    print(f"  Saved {score_path}")

    if concat_list:
        max_w = max(i.width for i in concat_list)
        total_h = sum(i.height for i in concat_list)
        full_img = Image.new("RGB", (max_w, total_h), "white")

        y = 0
        for img in concat_list:
            full_img.paste(img, (0, y))
            y += img.height

        full_img.save(os.path.join(output_dir, "Concat.jpg"))
        print(f"  Saved regenerated Concat.jpg")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python reading_annotations.py <Dir>")
        sys.exit(1)

    root_dir = sys.argv[1]

    # Load original data
    original_data = annotating.make_dictionary(root_dir)

    # Process each Bnot folder
    for student_id in original_data.keys():
        bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
        if os.path.exists(bnot_dir):
            print(f"Processing annotations for: {student_id}")
            actions, notes = detect_checks_and_notes(bnot_dir)
            if actions or notes:
                apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes)
            else:
                print("  No changes detected or missing files.")