Copies/reading_annotations.py

350 lines
12 KiB
Python

import sys
import os
import json
import numpy as np
import shutil
from PIL import Image, ImageChops, ImageFilter
Image.MAX_IMAGE_PIXELS = None
from pdf2image import convert_from_path
import annotating # Reuse rendering logic
DPI = 100
def detect_checks_and_notes(output_dir):
"""
Returns:
actions: List of dicts {type, label, ...} for checked boxes
notes_img: RGBA image of manual notes (checks masked out)
"""
names = ["Concat_annotated.pdf"]
for name in names:
pdf_path = os.path.join(output_dir, name)
if os.path.exists(pdf_path):
break
# ref_path = os.path.join(output_dir, "Reference.png")
ref_path = os.path.join(output_dir, "Reference.jpg")
json_path = os.path.join(output_dir, "checkboxes.json")
if not (os.path.exists(pdf_path) and os.path.exists(ref_path)):
print(f"\tMissing annotated file in {output_dir}")
return [], None
# Load Coordinates
with open(json_path, 'r') as f:
boxes = json.load(f)
# Load Reference
ref_img = Image.open(ref_path).convert("RGB")
# Load User PDF (First page only, assuming it's one long strip)
# Warning: If the PDF is huge, pdf2image might split pages or OOM.
# Assuming user didn't change page dimensions/order.
try:
# user_pages = convert_from_path(pdf_path, dpi=DPI)
# La version suivante évite les size mismatch
# Mais donne plus de bruit
user_pages = convert_from_path(pdf_path, dpi=72)
except Exception as e:
print(f"Error reading PDF: {e}")
return [], None
# Concatenate PDF pages back to one image if user saved as multiple pages
total_h = sum(p.height for p in user_pages)
user_img = Image.new("RGB", (user_pages[0].width, total_h))
y = 0
for p in user_pages:
user_img.paste(p, (0, y))
y += p.height
# Resize user_img to match ref_img if slight mismatch (DPI export diffs)
if user_img.size != ref_img.size:
print("Debug : size mismatch : ", user_img.size, ref_img.size)
user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS)
# --- Detection Phase ---
actions = []
# Convert to numpy for analysis
ref_arr = np.array(ref_img)
user_arr = np.array(user_img)
# Diff for analysis
# Simple absolute difference
diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8)
# Convert to grayscale for thresholding
diff_gray = np.mean(diff, axis=2)
# Threshold for "Checked"
CHECK_THRESHOLD = 30 # intensity diff
DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened
# Mask to hide checkmarks from the "Notes" extraction
mask_img = Image.new("L", ref_img.size, 255) # White (255) = keep, Black (0) = hide
mask_draw = ImageDraw.Draw(mask_img)
for box in boxes:
# global_box: [x1, y1, x2, y2]
b = box['global_box']
x1, y1, x2, y2 = map(int, b)
# Ensure bounds
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2)
# Analyze ROI
roi = diff_gray[y1+5:y2-5, x1+5:x2-5]
if roi.size == 0: continue
changed_pixels = np.sum(roi > CHECK_THRESHOLD)
density = changed_pixels / roi.size
if density > DENSITY_THRESHOLD:
print("A checked box !", density, b)
actions.append(box)
# It's checked, so we mask this area out for manual notes
# Expand mask slightly to catch sloppy ticks
mask_draw.rectangle([x1-15, y1-15, x2+15, y2+15], fill=0)
else:
mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0)
if box["type"] == "score" and box["value"] == 0.0:
# Mask the whole line
mask_draw.rectangle([0, y1-10, ref_img.width, y2+10], fill=0)
# --- Extraction Phase ---
# 150 + no blur is alright, with some lines at the end
# 100 + 2 px blur is too clean : tes annotations sont morcelées
# 50 + 2 px blur seems good
ref_blur = ref_img.filter(ImageFilter.GaussianBlur(2))
user_blur = user_img.filter(ImageFilter.GaussianBlur(2))
# 1. Get difference image
# diff_img = ImageChops.difference(ref_img, user_img).convert("L")
diff_img = ImageChops.difference(ref_blur, user_blur).convert("L")
diff_data = np.array(diff_img)
alpha = np.where(diff_data > 50, 255, 0).astype(np.uint8)
notes = user_img.convert("RGBA")
r, g, b, a = notes.split()
# Combine the diff-based alpha with the box-mask
mask_arr = np.array(mask_img)
final_alpha = np.minimum(alpha, mask_arr)
notes.putalpha(Image.fromarray(final_alpha))
# notes.show()
return actions, notes
from PIL import ImageDraw
from utils import natural_key
from annotating import MARGIN_LEFT, ANNOT_WIDTH
def has_significant_notes(note_img, threshold=20):
"""Checks if the note layer has visible content (non-transparent pixels)."""
# Assuming note_img is RGBA.
# We check alpha channel for non-zero values (or low transparency)
# Since we generated notes with variable alpha based on diff, checking alpha sum is good.
if note_img.mode != 'RGBA':
return False
alpha = np.array(note_img)[:, :, 3]
# Count pixels with significant opacity
visible_pixels = np.sum(alpha > 50)
# visible_pixels_bis = np.sum(alpha > 200)
# if visible_pixels > 0:
# print(f"Debug : visible pixels is {visible_pixels}")
return visible_pixels > threshold
def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer, all_labels):
"""
Modifies data based on actions, reads bnote.json, cuts notes,
regenerates all label images for consistency, saves dirty ones,
and generates Concat.jpg.
"""
output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
bnote_path = os.path.join(output_dir, "bnote.json")
score_path = os.path.join(output_dir, "score.json")
if not os.path.exists(bnote_path):
print(f"Error: bnote.json not found in {output_dir}")
return
with open(bnote_path, 'r') as f:
bnote_data = json.load(f)
labels_data = data[student_id]
# --- 1. Apply Actions to Data (Update scores / Flags for deletion) ---
actions_by_label = {}
for a in actions:
actions_by_label.setdefault(a['label'], []).append(a)
dirty_labels = set() # Labels that logic says changed
for label, acts in actions_by_label.items():
if label not in labels_data: continue
content = labels_data[label]
result = content['result']
feedbacks = result.get('feedback', [])
# Helpers to find objects by index (references match those in feedbacks list)
global_fb = [f for f in feedbacks if not f.get('box_2d')]
local_fb = [f for f in feedbacks if f.get('box_2d')]
local_fb.sort(key=lambda x: x['box_2d'][0])
for act in acts:
if act['type'] == 'score':
result['score'] = act['value']
dirty_labels.add(label)
print(f" > Updated score for {label} to {act['value']}")
elif act['type'] == 'del_global':
if act['index'] < len(global_fb):
global_fb[act['index']]["to_delete"] = True
dirty_labels.add(label)
print(f" > Deleted global feedback in {label}")
elif act['type'] in ('del_local', 'del_local_rect'):
if act['index'] < len(local_fb):
target = local_fb[act['index']]
if act['type'] == 'del_local':
target["to_delete"] = True
print(f" > Deleted local feedback in {label}")
else:
target["norectangle"] = True
print(f" > Deleted rect in {label}")
dirty_labels.add(label)
# --- 2. Process Images (Cut notes, Regenerate, Concatenate) ---
concat_list = []
concat_list_F = []
d_notes = dict.fromkeys(all_labels, "")
# Iterate over images defined in bnote.json to maintain order/geometry
for img_info in bnote_data.get("images", []):
label = img_info["label"]
if label not in labels_data: continue
# Update scores dict
content = labels_data[label]
result = content['result']
d_notes[label] = str(result.get('score', 0))
# A. Cut Manual Notes
hmin, hmax = img_info["hmin"], img_info["hmax"]
sub_note = None
if notes_layer:
sub_note = notes_layer.crop((0, hmin, notes_layer.width, hmax))
has_notes = has_significant_notes(sub_note)
# B. Regenerate Label Image
# We always regenerate to ensure Concat.jpg is consistent with any modifications
pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
if not os.path.exists(pdf_path): continue
(base_img, _, _) = annotating.make_base_image(pdf_path)
# Compose uses the result object we modified in step 1
final_img, new_header_h = annotating.compose_label_image(
base_img, label, content['result'], content['coordinates'][0],
with_error=False
)
if final_img==None:
continue
# Overlay manual notes
if has_notes:
old_header_h = int(img_info.get("header_height", 0))
w, h = sub_note.size
# 1. Paste header ink at the top
if old_header_h > 0:
header_crop = sub_note.crop((0, 0, w, min(h, old_header_h)))
final_img.paste(header_crop, (0, 0), mask=header_crop)
# 2. Paste student-content ink at the new header height
if h > old_header_h:
body_crop = sub_note.crop((0, old_header_h, w, h))
final_img.paste(body_crop, (0, new_header_h), mask=body_crop)
# C. Save individual file if Modified (Dirty logic or visual notes)
if (label in dirty_labels) or has_notes:
save_path = os.path.join(output_dir, f"{label}.jpg")
final_img.save(save_path)
print(f" Saved dirty image: {label}.jpg")
concat_list.append(final_img)
perfect_no_comment = True
if float(d_notes[label]) != 4.0:
perfect_no_comment = False
if len(result.get('feedback', [])) != 0:
perfect_no_comment = False
if not perfect_no_comment:
concat_list_F.append(final_img)
# --- 3. Save Final Outputs ---
with open(score_path, "w") as f:
json.dump(d_notes, f, indent=4)
print(f" Saved {score_path}")
if concat_list:
max_w = max(i.width for i in concat_list)
total_h = sum(i.height for i in concat_list)
full_img = Image.new("RGB", (max_w, total_h), "white")
y = 0
for img in concat_list:
full_img.paste(img, (0, y))
y += img.height
full_img.save(os.path.join(output_dir, "Concat.jpg"))
print(f" Saved regenerated Concat.jpg")
if concat_list_F:
max_w = max(i.width for i in concat_list_F)
total_h = sum(i.height for i in concat_list_F)
full_img = Image.new("RGB", (max_w, total_h), "white")
y = 0
for img in concat_list_F:
full_img.paste(img, (0, y))
y += img.height
full_img.save(os.path.join(output_dir, "Concat_F.jpg"))
print(f" Saved regenerated Concat_F.jpg")
from pathlib import Path
from utils import read_all_labels
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python reading_annotations.py <Dir>")
sys.exit(1)
root_dir = sys.argv[1]
try:
all_labels = read_all_labels(Path(root_dir))
except FileNotFoundError:
all_labels = []
# Load original data
original_data = annotating.make_dictionary(root_dir)
# Process each Bnot folder
for student_id in original_data.keys():
bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
if os.path.exists(bnot_dir):
print(f"Processing annotations for: {student_id}")
actions, notes = detect_checks_and_notes(bnot_dir)
if actions or notes:
apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes, all_labels)
else:
print(" No changes detected or missing files.")