From 2e1c519dce870891b031e379024c5a47bcfdeeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Miquel?= Date: Sun, 15 Feb 2026 14:24:25 +0100 Subject: [PATCH] Working state, with single images regeneration and better notes detection --- annotating.py | 4 +- annotating_with_checks.py | 25 ++++- reading_annotations.py | 197 ++++++++++++++++++++++---------------- 3 files changed, 138 insertions(+), 88 deletions(-) diff --git a/annotating.py b/annotating.py index 4a4ecfc..70c92d6 100644 --- a/annotating.py +++ b/annotating.py @@ -332,7 +332,7 @@ def compose_label_image(base_img, label, result, hmin, last_text_bottom = paste_y + txt_img.height - return final_img + return final_img, header_height def natural_key(text): return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))] @@ -384,7 +384,7 @@ def process_correction(root_dir, data, all_labels, overwrite=False): score = result.get('score', 0) d_notes[label] = str(score) - final_img = compose_label_image(base_img, label, result, coordinates[0]) + final_img, _ = compose_label_image(base_img, label, result, coordinates[0]) # 7. Save Image save_path = os.path.join(output_dir, f"{label}.jpg") diff --git a/annotating_with_checks.py b/annotating_with_checks.py index 10835ae..6d9a2c9 100644 --- a/annotating_with_checks.py +++ b/annotating_with_checks.py @@ -110,6 +110,8 @@ def process_student(args): label_images = [] all_checkboxes = [] + bnote_entries = [] # For bnote.json + sorted_labels = sorted(labels.items(), key=lambda x: natural_key(x[0])) for label, content in sorted_labels: @@ -122,7 +124,7 @@ def process_student(args): cb_renderer = CheckboxRenderer(label) # Render using the shared engine - final_img = annotating.compose_label_image( + final_img, header_h = annotating.compose_label_image( base_img, label, content['result'], content['coordinates'][0], render_fn=safe_render_latex, draw_callback=cb_renderer.callback @@ -130,6 +132,13 @@ def process_student(args): label_images.append(final_img) all_checkboxes.append(cb_renderer.checkboxes) + bnote_entries.append({ + "id": student_id, + "label": label, + "header_height": header_h, + # hmin/hmax will be filled during concatenation + "img_h": final_img.height + }) if not label_images: return @@ -141,9 +150,13 @@ def process_student(args): final_json_map = [] current_y = 0 - for img, boxes in zip(label_images, all_checkboxes): + for idx, (img, boxes) in enumerate(zip(label_images, all_checkboxes)): concat_img.paste(img, (0, current_y)) + bnote_entries[idx]["hmin"] = current_y + bnote_entries[idx]["hmax"] = current_y + img.height + del bnote_entries[idx]["img_h"] # Clean up temp data + # Adjust coordinates for concatenated image for item in boxes: # item might have 'rel_box' (header) or 'final_box' (local) @@ -154,6 +167,14 @@ def process_student(args): current_y += img.height + bnote_data = { + "width": max_w, + "height": total_h, + "images": bnote_entries + } + with open(os.path.join(output_dir, "bnote.json"), "w") as f: + json.dump(bnote_data, f, indent=2) + with open(os.path.join(output_dir, "checkboxes.json"), "w") as f: json.dump(final_json_map, f, indent=2) diff --git a/reading_annotations.py b/reading_annotations.py index d27f932..e9c55de 100644 --- a/reading_annotations.py +++ b/reading_annotations.py @@ -107,26 +107,21 @@ def detect_checks_and_notes(output_dir): mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0) # --- Extraction Phase --- - # Create the "Manual Notes" layer - # Logic: User - Ref. If Diff is dark -> Note. - # We want a transparent image with just the pen strokes. - # Try Gaussian Blur, peut-ĂȘtre inutile. - ref_blur = ref_img.filter(ImageFilter.GaussianBlur(5)) - user_blur = user_img.filter(ImageFilter.GaussianBlur(5)) + # 150 + no blur is alright, with some lines at the end + # 100 + 2 px blur is too clean : tes annotations sont morcelĂ©es + # 50 + 2 px blur seems good + + ref_blur = ref_img.filter(ImageFilter.GaussianBlur(2)) + user_blur = user_img.filter(ImageFilter.GaussianBlur(2)) # 1. Get difference image - diff_img = ImageChops.difference(ref_img, user_img).convert("L") + # diff_img = ImageChops.difference(ref_img, user_img).convert("L") + diff_img = ImageChops.difference(ref_blur, user_blur).convert("L") - # 2. Threshold to remove JPEG noise (white background isn't perfect) - # Pixels that are different enough: diff_data = np.array(diff_img) - # Create alpha channel: 0 where no diff, 255 where diff - # Higher treshold is better - alpha = np.where(diff_data > 100, 255, 0).astype(np.uint8) + alpha = np.where(diff_data > 50, 255, 0).astype(np.uint8) - # 3. Create output image (Black strokes, variable alpha) - # Or Copy user colors? Better to copy user pixels. notes = user_img.convert("RGBA") r, g, b, a = notes.split() @@ -148,110 +143,144 @@ def natural_key(text): from annotating import MARGIN_LEFT, ANNOT_WIDTH +def has_significant_notes(note_img, threshold=20): + """Checks if the note layer has visible content (non-transparent pixels).""" + # Assuming note_img is RGBA. + # We check alpha channel for non-zero values (or low transparency) + # Since we generated notes with variable alpha based on diff, checking alpha sum is good. + if note_img.mode != 'RGBA': + return False + alpha = np.array(note_img)[:, :, 3] + # Count pixels with significant opacity + visible_pixels = np.sum(alpha > 50) + # visible_pixels_bis = np.sum(alpha > 200) + if visible_pixels > 0: + print(f"Debug : visible pixels is {visible_pixels}") + return visible_pixels > threshold + def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer): """ - Modifies data based on actions, calls annotating.process_correction logic, - overlays notes, saves Concat.jpg. + Modifies data based on actions, reads bnote.json, cuts notes, + regenerates all label images for consistency, saves dirty ones, + and generates Concat.jpg. """ - labels = data[student_id] + output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") + bnote_path = os.path.join(output_dir, "bnote.json") + score_path = os.path.join(output_dir, "score.json") - # 1. Apply Actions to Data - # Sort actions to handle indices correctly (delete from end?) - # But we regenerate from dictionary, so modifying the dictionary is fine. + if not os.path.exists(bnote_path): + print(f"Error: bnote.json not found in {output_dir}") + return - # Separate actions by label + with open(bnote_path, 'r') as f: + bnote_data = json.load(f) + + labels_data = data[student_id] + + # --- 1. Apply Actions to Data (Update scores / Flags for deletion) --- actions_by_label = {} for a in actions: - l = a['label'] - if l not in actions_by_label: - actions_by_label[l] = [] - actions_by_label[l].append(a) + actions_by_label.setdefault(a['label'], []).append(a) - for label, acts in sorted(actions_by_label.items(), key=lambda x: natural_key(x[0])): - if label not in labels: continue + dirty_labels = set() # Labels that logic says changed - content = labels[label] + for label, acts in actions_by_label.items(): + if label not in labels_data: continue + + content = labels_data[label] result = content['result'] feedbacks = result.get('feedback', []) - # Split feedbacks again to match indices - global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')] - local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')] - # Sort local by Y to match generation order in annotating.py - local_fb_sorted_map = sorted(local_fb_indices, - key=lambda i: feedbacks[i]['box_2d'][0]) - - items_to_remove = set() + # Helpers to find objects by index (references match those in feedbacks list) + global_fb = [f for f in feedbacks if not f.get('box_2d')] + local_fb = [f for f in feedbacks if f.get('box_2d')] + local_fb.sort(key=lambda x: x['box_2d'][0]) for act in acts: if act['type'] == 'score': result['score'] = act['value'] + dirty_labels.add(label) print(f" > Updated score for {label} to {act['value']}") elif act['type'] == 'del_global': - # act['index'] is the index within the global_fb list - # We need to find the actual index in the main list - if act['index'] < len(global_fb_indices): - real_idx = global_fb_indices[act['index']] - feedbacks[real_idx]["to_delete"] = None + if act['index'] < len(global_fb): + global_fb[act['index']]["to_delete"] = True + dirty_labels.add(label) print(f" > Deleted global feedback in {label}") - elif act['type'] == 'del_local': - # act['index'] is index in sorted local list - if act['index'] < len(local_fb_sorted_map): - real_idx = local_fb_sorted_map[act['index']] - feedbacks[real_idx]["to_delete"] = None - print(f" > Deleted local feedback in {label}") - elif act['type'] == 'del_local_rect': - # act['index'] is index in sorted local list - if act['index'] < len(local_fb_sorted_map): - real_idx = local_fb_sorted_map[act['index']] - feedbacks[real_idx]["norectangle"] = None - print(f" > Deleted rect of local feedback in {label}") + elif act['type'] in ('del_local', 'del_local_rect'): + if act['index'] < len(local_fb): + target = local_fb[act['index']] + if act['type'] == 'del_local': + target["to_delete"] = True + print(f" > Deleted local feedback in {label}") + else: + target["norectangle"] = True + print(f" > Deleted rect in {label}") + dirty_labels.add(label) - # Remove feedbacks (in reverse to preserve indices) - # for idx in sorted(list(items_to_remove), reverse=True): - # del feedbacks[idx] + # --- 2. Process Images (Cut notes, Regenerate, Concatenate) --- + concat_list = [] + d_notes = {} - # 2. Regenerate Clean Image - # We use a temporary modified dictionary - temp_data = {student_id: labels} + # Iterate over images defined in bnote.json to maintain order/geometry + for img_info in bnote_data.get("images", []): + label = img_info["label"] + if label not in labels_data: continue - output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") - final_concats = [] + # Update scores dict + content = labels_data[label] + d_notes[label] = str(content['result'].get('score', 0)) - sorted_labels = sorted(labels.items(), key=lambda x: natural_key(x[0])) - for label, content in sorted_labels: - # ... [PDF to Image Conversion] ... - copie_folder = f"Copie{student_id}" - pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf") + # A. Cut Manual Notes + hmin, hmax = img_info["hmin"], img_info["hmax"] + sub_note = None + if notes_layer: + sub_note = notes_layer.crop((0, hmin, notes_layer.width, hmax)) + + has_notes = has_significant_notes(sub_note) + + # B. Regenerate Label Image + # We always regenerate to ensure Concat.jpg is consistent with any modifications + pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf") if not os.path.exists(pdf_path): continue - (base_img, _total_h, _max_w) = annotating.make_base_image(pdf_path) - img = annotating.compose_label_image( + (base_img, _, _) = annotating.make_base_image(pdf_path) + + # Compose uses the result object we modified in step 1 + final_img, _ = annotating.compose_label_image( base_img, label, content['result'], content['coordinates'][0] ) - final_concats.append(img) + # Overlay manual notes + if has_notes: + final_img.paste(sub_note, (0, 0), mask=sub_note) - # Concatenate Labels - if not final_concats: return + # C. Save individual file if Modified (Dirty logic or visual notes) + if (label in dirty_labels) or has_notes: + save_path = os.path.join(output_dir, f"{label}.jpg") + final_img.save(save_path) + print(f" Saved dirty image: {label}.jpg") - mw = max(i.width for i in final_concats) - th = sum(i.height for i in final_concats) - full_clean_img = Image.new("RGB", (mw, th), "white") - y=0 - for i in final_concats: - full_clean_img.paste(i, (0,y)) - y+=i.height + concat_list.append(final_img) - # 3. Overlay Manual Notes - if notes_layer: - full_clean_img.paste(notes_layer, (0,0), mask=notes_layer) + # --- 3. Save Final Outputs --- + with open(score_path, "w") as f: + json.dump(d_notes, f, indent=4) + print(f" Saved {score_path}") - # Save final Concat.jpg - full_clean_img.save(os.path.join(output_dir, "Concat.jpg")) - print(f"Saved regenerated: {os.path.join(output_dir, 'Concat.jpg')}") + if concat_list: + max_w = max(i.width for i in concat_list) + total_h = sum(i.height for i in concat_list) + full_img = Image.new("RGB", (max_w, total_h), "white") + + y = 0 + for img in concat_list: + full_img.paste(img, (0, y)) + y += img.height + + full_img.save(os.path.join(output_dir, "Concat.jpg")) + print(f" Saved regenerated Concat.jpg") if __name__ == "__main__": if len(sys.argv) < 2: