From 2e1c519dce870891b031e379024c5a47bcfdeeb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= <sebastien.miquel@posteo.eu>
Date: Sun, 15 Feb 2026 14:24:25 +0100
Subject: [PATCH] Working state, with single images regeneration and better
 notes detection

---
 annotating.py             |   4 +-
 annotating_with_checks.py |  25 ++++-
 reading_annotations.py    | 197 ++++++++++++++++++++++----------------
 3 files changed, 138 insertions(+), 88 deletions(-)

diff --git a/annotating.py b/annotating.py
index 4a4ecfc..70c92d6 100644
--- a/annotating.py
+++ b/annotating.py
@@ -332,7 +332,7 @@ def compose_label_image(base_img, label, result, hmin,
 
         last_text_bottom = paste_y + txt_img.height
 
-    return final_img
+    return final_img, header_height
 
 def natural_key(text):
     return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
@@ -384,7 +384,7 @@ def process_correction(root_dir, data, all_labels, overwrite=False):
             score = result.get('score', 0)
             d_notes[label] = str(score)
 
-            final_img = compose_label_image(base_img, label, result, coordinates[0])
+            final_img, _ = compose_label_image(base_img, label, result, coordinates[0])
 
             # 7. Save Image
             save_path = os.path.join(output_dir, f"{label}.jpg")
diff --git a/annotating_with_checks.py b/annotating_with_checks.py
index 10835ae..6d9a2c9 100644
--- a/annotating_with_checks.py
+++ b/annotating_with_checks.py
@@ -110,6 +110,8 @@ def process_student(args):
 
     label_images = []
     all_checkboxes = []
+    bnote_entries = [] # For bnote.json
+
     sorted_labels = sorted(labels.items(), key=lambda x: natural_key(x[0]))
 
     for label, content in sorted_labels:
@@ -122,7 +124,7 @@ def process_student(args):
         cb_renderer = CheckboxRenderer(label)
 
         # Render using the shared engine
-        final_img = annotating.compose_label_image(
+        final_img, header_h = annotating.compose_label_image(
             base_img, label, content['result'], content['coordinates'][0],
             render_fn=safe_render_latex,
             draw_callback=cb_renderer.callback
@@ -130,6 +132,13 @@ def process_student(args):
 
         label_images.append(final_img)
         all_checkboxes.append(cb_renderer.checkboxes)
+        bnote_entries.append({
+            "id": student_id,
+            "label": label,
+            "header_height": header_h,
+            # hmin/hmax will be filled during concatenation
+            "img_h": final_img.height
+        })
 
     if not label_images: return
 
@@ -141,9 +150,13 @@ def process_student(args):
     final_json_map = []
     current_y = 0
 
-    for img, boxes in zip(label_images, all_checkboxes):
+    for idx, (img, boxes) in enumerate(zip(label_images, all_checkboxes)):
         concat_img.paste(img, (0, current_y))
 
+        bnote_entries[idx]["hmin"] = current_y
+        bnote_entries[idx]["hmax"] = current_y + img.height
+        del bnote_entries[idx]["img_h"] # Clean up temp data
+
         # Adjust coordinates for concatenated image
         for item in boxes:
             # item might have 'rel_box' (header) or 'final_box' (local)
@@ -154,6 +167,14 @@ def process_student(args):
 
         current_y += img.height
 
+    bnote_data = {
+        "width": max_w,
+        "height": total_h,
+        "images": bnote_entries
+    }
+    with open(os.path.join(output_dir, "bnote.json"), "w") as f:
+        json.dump(bnote_data, f, indent=2)
+
     with open(os.path.join(output_dir, "checkboxes.json"), "w") as f:
         json.dump(final_json_map, f, indent=2)
 
diff --git a/reading_annotations.py b/reading_annotations.py
index d27f932..e9c55de 100644
--- a/reading_annotations.py
+++ b/reading_annotations.py
@@ -107,26 +107,21 @@ def detect_checks_and_notes(output_dir):
             mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0)
 
     # --- Extraction Phase ---
-    # Create the "Manual Notes" layer
-    # Logic: User - Ref. If Diff is dark -> Note.
-    # We want a transparent image with just the pen strokes.
 
-    # Try Gaussian Blur, peut-être inutile.
-    ref_blur = ref_img.filter(ImageFilter.GaussianBlur(5))
-    user_blur = user_img.filter(ImageFilter.GaussianBlur(5))
+    # 150 + no blur is alright, with some lines at the end
+    # 100 + 2 px blur is too clean : tes annotations sont morcelées
+    # 50 + 2 px blur seems good
+
+    ref_blur = ref_img.filter(ImageFilter.GaussianBlur(2))
+    user_blur = user_img.filter(ImageFilter.GaussianBlur(2))
 
     # 1. Get difference image
-    diff_img = ImageChops.difference(ref_img, user_img).convert("L")
+    # diff_img = ImageChops.difference(ref_img, user_img).convert("L")
+    diff_img = ImageChops.difference(ref_blur, user_blur).convert("L")
 
-    # 2. Threshold to remove JPEG noise (white background isn't perfect)
-    # Pixels that are different enough:
     diff_data = np.array(diff_img)
-    # Create alpha channel: 0 where no diff, 255 where diff
-    # Higher treshold is better
-    alpha = np.where(diff_data > 100, 255, 0).astype(np.uint8)
+    alpha = np.where(diff_data > 50, 255, 0).astype(np.uint8)
 
-    # 3. Create output image (Black strokes, variable alpha)
-    # Or Copy user colors? Better to copy user pixels.
     notes = user_img.convert("RGBA")
     r, g, b, a = notes.split()
 
@@ -148,110 +143,144 @@ def natural_key(text):
 
 from annotating import MARGIN_LEFT, ANNOT_WIDTH
 
+def has_significant_notes(note_img, threshold=20):
+    """Checks if the note layer has visible content (non-transparent pixels)."""
+    # Assuming note_img is RGBA.
+    # We check alpha channel for non-zero values (or low transparency)
+    # Since we generated notes with variable alpha based on diff, checking alpha sum is good.
+    if note_img.mode != 'RGBA':
+        return False
+    alpha = np.array(note_img)[:, :, 3]
+    # Count pixels with significant opacity
+    visible_pixels = np.sum(alpha > 50)
+    # visible_pixels_bis = np.sum(alpha > 200)
+    if visible_pixels > 0:
+        print(f"Debug : visible pixels is {visible_pixels}")
+    return visible_pixels > threshold
+
 def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer):
     """
-    Modifies data based on actions, calls annotating.process_correction logic,
-    overlays notes, saves Concat.jpg.
+    Modifies data based on actions, reads bnote.json, cuts notes,
+    regenerates all label images for consistency, saves dirty ones,
+    and generates Concat.jpg.
     """
-    labels = data[student_id]
+    output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
+    bnote_path = os.path.join(output_dir, "bnote.json")
+    score_path = os.path.join(output_dir, "score.json")
 
-    # 1. Apply Actions to Data
-    # Sort actions to handle indices correctly (delete from end?)
-    # But we regenerate from dictionary, so modifying the dictionary is fine.
+    if not os.path.exists(bnote_path):
+        print(f"Error: bnote.json not found in {output_dir}")
+        return
 
-    # Separate actions by label
+    with open(bnote_path, 'r') as f:
+        bnote_data = json.load(f)
+
+    labels_data = data[student_id]
+
+    # --- 1. Apply Actions to Data (Update scores / Flags for deletion) ---
     actions_by_label = {}
     for a in actions:
-        l = a['label']
-        if l not in actions_by_label:
-            actions_by_label[l] = []
-        actions_by_label[l].append(a)
+        actions_by_label.setdefault(a['label'], []).append(a)
 
-    for label, acts in sorted(actions_by_label.items(), key=lambda x: natural_key(x[0])):
-        if label not in labels: continue
+    dirty_labels = set() # Labels that logic says changed
 
-        content = labels[label]
+    for label, acts in actions_by_label.items():
+        if label not in labels_data: continue
+
+        content = labels_data[label]
         result = content['result']
         feedbacks = result.get('feedback', [])
 
-        # Split feedbacks again to match indices
-        global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')]
-        local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')]
-        # Sort local by Y to match generation order in annotating.py
-        local_fb_sorted_map = sorted(local_fb_indices,
-                                     key=lambda i: feedbacks[i]['box_2d'][0])
-
-        items_to_remove = set()
+        # Helpers to find objects by index (references match those in feedbacks list)
+        global_fb = [f for f in feedbacks if not f.get('box_2d')]
+        local_fb = [f for f in feedbacks if f.get('box_2d')]
+        local_fb.sort(key=lambda x: x['box_2d'][0])
 
         for act in acts:
             if act['type'] == 'score':
                 result['score'] = act['value']
+                dirty_labels.add(label)
                 print(f"  > Updated score for {label} to {act['value']}")
 
             elif act['type'] == 'del_global':
-                # act['index'] is the index within the global_fb list
-                # We need to find the actual index in the main list
-                if act['index'] < len(global_fb_indices):
-                    real_idx = global_fb_indices[act['index']]
-                    feedbacks[real_idx]["to_delete"] = None
+                if act['index'] < len(global_fb):
+                    global_fb[act['index']]["to_delete"] = True
+                    dirty_labels.add(label)
                     print(f"  > Deleted global feedback in {label}")
-            elif act['type'] == 'del_local':
-                # act['index'] is index in sorted local list
-                if act['index'] < len(local_fb_sorted_map):
-                    real_idx = local_fb_sorted_map[act['index']]
-                    feedbacks[real_idx]["to_delete"] = None
-                    print(f"  > Deleted local feedback in {label}")
-            elif act['type'] == 'del_local_rect':
-                # act['index'] is index in sorted local list
-                if act['index'] < len(local_fb_sorted_map):
-                    real_idx = local_fb_sorted_map[act['index']]
-                    feedbacks[real_idx]["norectangle"] = None
-                    print(f"  > Deleted rect of local feedback in {label}")
 
+            elif act['type'] in ('del_local', 'del_local_rect'):
+                if act['index'] < len(local_fb):
+                    target = local_fb[act['index']]
+                    if act['type'] == 'del_local':
+                        target["to_delete"] = True
+                        print(f"  > Deleted local feedback in {label}")
+                    else:
+                        target["norectangle"] = True
+                        print(f"  > Deleted rect in {label}")
+                    dirty_labels.add(label)
 
-        # Remove feedbacks (in reverse to preserve indices)
-        # for idx in sorted(list(items_to_remove), reverse=True):
-            # del feedbacks[idx]
+    # --- 2. Process Images (Cut notes, Regenerate, Concatenate) ---
+    concat_list = []
+    d_notes = {}
 
-    # 2. Regenerate Clean Image
-    # We use a temporary modified dictionary
-    temp_data = {student_id: labels}
+    # Iterate over images defined in bnote.json to maintain order/geometry
+    for img_info in bnote_data.get("images", []):
+        label = img_info["label"]
+        if label not in labels_data: continue
 
-    output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
-    final_concats = []
+        # Update scores dict
+        content = labels_data[label]
+        d_notes[label] = str(content['result'].get('score', 0))
 
-    sorted_labels = sorted(labels.items(), key=lambda x: natural_key(x[0]))
-    for label, content in sorted_labels:
-        # ... [PDF to Image Conversion] ...
-        copie_folder = f"Copie{student_id}"
-        pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf")
+        # A. Cut Manual Notes
+        hmin, hmax = img_info["hmin"], img_info["hmax"]
+        sub_note = None
+        if notes_layer:
+            sub_note = notes_layer.crop((0, hmin, notes_layer.width, hmax))
+
+        has_notes = has_significant_notes(sub_note)
+
+        # B. Regenerate Label Image
+        # We always regenerate to ensure Concat.jpg is consistent with any modifications
+        pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
         if not os.path.exists(pdf_path): continue
 
-        (base_img, _total_h, _max_w) = annotating.make_base_image(pdf_path)
-        img = annotating.compose_label_image(
+        (base_img, _, _) = annotating.make_base_image(pdf_path)
+
+        # Compose uses the result object we modified in step 1
+        final_img, _ = annotating.compose_label_image(
             base_img, label, content['result'], content['coordinates'][0]
         )
 
-        final_concats.append(img)
+        # Overlay manual notes
+        if has_notes:
+            final_img.paste(sub_note, (0, 0), mask=sub_note)
 
-    # Concatenate Labels
-    if not final_concats: return
+        # C. Save individual file if Modified (Dirty logic or visual notes)
+        if (label in dirty_labels) or has_notes:
+            save_path = os.path.join(output_dir, f"{label}.jpg")
+            final_img.save(save_path)
+            print(f"  Saved dirty image: {label}.jpg")
 
-    mw = max(i.width for i in final_concats)
-    th = sum(i.height for i in final_concats)
-    full_clean_img = Image.new("RGB", (mw, th), "white")
-    y=0
-    for i in final_concats:
-        full_clean_img.paste(i, (0,y))
-        y+=i.height
+        concat_list.append(final_img)
 
-    # 3. Overlay Manual Notes
-    if notes_layer:
-        full_clean_img.paste(notes_layer, (0,0), mask=notes_layer)
+    # --- 3. Save Final Outputs ---
+    with open(score_path, "w") as f:
+        json.dump(d_notes, f, indent=4)
+    print(f"  Saved {score_path}")
 
-    # Save final Concat.jpg
-    full_clean_img.save(os.path.join(output_dir, "Concat.jpg"))
-    print(f"Saved regenerated: {os.path.join(output_dir, 'Concat.jpg')}")
+    if concat_list:
+        max_w = max(i.width for i in concat_list)
+        total_h = sum(i.height for i in concat_list)
+        full_img = Image.new("RGB", (max_w, total_h), "white")
+
+        y = 0
+        for img in concat_list:
+            full_img.paste(img, (0, y))
+            y += img.height
+
+        full_img.save(os.path.join(output_dir, "Concat.jpg"))
+        print(f"  Saved regenerated Concat.jpg")
 
 if __name__ == "__main__":
     if len(sys.argv) < 2: