358 lines
13 KiB
Python
358 lines
13 KiB
Python
import sys
|
|
import os
|
|
import json
|
|
import numpy as np
|
|
import shutil
|
|
from PIL import Image, ImageChops, ImageFilter
|
|
Image.MAX_IMAGE_PIXELS = None
|
|
from pdf2image import convert_from_path
|
|
import annotating # Reuse rendering logic
|
|
|
|
DPI = 100
|
|
|
|
def detect_checks_and_notes(output_dir):
|
|
"""
|
|
Returns:
|
|
actions: List of dicts {type, label, ...} for checked boxes
|
|
notes_img: RGBA image of manual notes (checks masked out)
|
|
"""
|
|
pdf_path = os.path.join(output_dir, "Concat_annotated.pdf")
|
|
# ref_path = os.path.join(output_dir, "Reference.png")
|
|
ref_path = os.path.join(output_dir, "Reference.jpg")
|
|
json_path = os.path.join(output_dir, "checkboxes.json")
|
|
|
|
if not (os.path.exists(pdf_path) and os.path.exists(ref_path)):
|
|
print(f"Missing files in {output_dir}")
|
|
return [], None
|
|
|
|
# Load Coordinates
|
|
with open(json_path, 'r') as f:
|
|
boxes = json.load(f)
|
|
|
|
# Load Reference
|
|
ref_img = Image.open(ref_path).convert("RGB")
|
|
|
|
# Load User PDF (First page only, assuming it's one long strip)
|
|
# Warning: If the PDF is huge, pdf2image might split pages or OOM.
|
|
# Assuming user didn't change page dimensions/order.
|
|
try:
|
|
# user_pages = convert_from_path(pdf_path, dpi=DPI)
|
|
# La version suivante évite les size mismatch
|
|
# Mais donne plus de bruit
|
|
user_pages = convert_from_path(pdf_path, dpi=72)
|
|
except Exception as e:
|
|
print(f"Error reading PDF: {e}")
|
|
return [], None
|
|
# Concatenate PDF pages back to one image if user saved as multiple pages
|
|
total_h = sum(p.height for p in user_pages)
|
|
user_img = Image.new("RGB", (user_pages[0].width, total_h))
|
|
y = 0
|
|
for p in user_pages:
|
|
user_img.paste(p, (0, y))
|
|
y += p.height
|
|
|
|
# Resize user_img to match ref_img if slight mismatch (DPI export diffs)
|
|
if user_img.size != ref_img.size:
|
|
print("Debug : size mismatch : ", user_img.size, ref_img.size)
|
|
user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS)
|
|
|
|
# --- Detection Phase ---
|
|
actions = []
|
|
|
|
# Convert to numpy for analysis
|
|
ref_arr = np.array(ref_img)
|
|
user_arr = np.array(user_img)
|
|
|
|
# Diff for analysis
|
|
# Simple absolute difference
|
|
diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8)
|
|
# Convert to grayscale for thresholding
|
|
diff_gray = np.mean(diff, axis=2)
|
|
|
|
# Threshold for "Checked"
|
|
CHECK_THRESHOLD = 30 # intensity diff
|
|
DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened
|
|
|
|
# Mask to hide checkmarks from the "Notes" extraction
|
|
mask_img = Image.new("L", ref_img.size, 255) # White (255) = keep, Black (0) = hide
|
|
mask_draw = ImageDraw.Draw(mask_img)
|
|
|
|
for box in boxes:
|
|
# global_box: [x1, y1, x2, y2]
|
|
b = box['global_box']
|
|
x1, y1, x2, y2 = map(int, b)
|
|
|
|
# Ensure bounds
|
|
x1, y1 = max(0, x1), max(0, y1)
|
|
x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2)
|
|
|
|
# Analyze ROI
|
|
roi = diff_gray[y1+5:y2-5, x1+5:x2-5]
|
|
if roi.size == 0: continue
|
|
|
|
changed_pixels = np.sum(roi > CHECK_THRESHOLD)
|
|
density = changed_pixels / roi.size
|
|
|
|
if density > DENSITY_THRESHOLD:
|
|
print("A checked box !", density, b)
|
|
actions.append(box)
|
|
# It's checked, so we mask this area out for manual notes
|
|
# Expand mask slightly to catch sloppy ticks
|
|
mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0)
|
|
else:
|
|
mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0)
|
|
|
|
if box["type"] == "score" and box["value"] == 0.0:
|
|
# Mask the whole line
|
|
mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0)
|
|
|
|
# --- Extraction Phase ---
|
|
# Create the "Manual Notes" layer
|
|
# Logic: User - Ref. If Diff is dark -> Note.
|
|
# We want a transparent image with just the pen strokes.
|
|
|
|
# Try Gaussian Blur, peut-être inutile.
|
|
ref_blur = ref_img.filter(ImageFilter.GaussianBlur(5))
|
|
user_blur = user_img.filter(ImageFilter.GaussianBlur(5))
|
|
|
|
# 1. Get difference image
|
|
diff_img = ImageChops.difference(ref_img, user_img).convert("L")
|
|
|
|
# 2. Threshold to remove JPEG noise (white background isn't perfect)
|
|
# Pixels that are different enough:
|
|
diff_data = np.array(diff_img)
|
|
# Create alpha channel: 0 where no diff, 255 where diff
|
|
# Higher treshold is better
|
|
alpha = np.where(diff_data > 100, 255, 0).astype(np.uint8)
|
|
|
|
# 3. Create output image (Black strokes, variable alpha)
|
|
# Or Copy user colors? Better to copy user pixels.
|
|
notes = user_img.convert("RGBA")
|
|
r, g, b, a = notes.split()
|
|
|
|
# Combine the diff-based alpha with the box-mask
|
|
mask_arr = np.array(mask_img)
|
|
final_alpha = np.minimum(alpha, mask_arr)
|
|
|
|
notes.putalpha(Image.fromarray(final_alpha))
|
|
|
|
# Debuging : Issues
|
|
# - The artifacts increase as we go lower in the image
|
|
# - Tous les rectangles sont présents
|
|
# -
|
|
notes.show()
|
|
|
|
return actions, notes
|
|
|
|
from PIL import ImageDraw
|
|
|
|
import re
|
|
def natural_key(text):
|
|
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
|
|
|
|
from annotating import MARGIN_LEFT, ANNOT_WIDTH
|
|
|
|
def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer):
|
|
"""
|
|
Modifies data based on actions, calls annotating.process_correction logic,
|
|
overlays notes, saves Concat.jpg.
|
|
"""
|
|
labels = data[student_id]
|
|
|
|
# 1. Apply Actions to Data
|
|
# Sort actions to handle indices correctly (delete from end?)
|
|
# But we regenerate from dictionary, so modifying the dictionary is fine.
|
|
|
|
# Separate actions by label
|
|
actions_by_label = {}
|
|
for a in actions:
|
|
l = a['label']
|
|
if l not in actions_by_label:
|
|
actions_by_label[l] = []
|
|
actions_by_label[l].append(a)
|
|
|
|
for label, acts in sorted(actions_by_label.items(), key=lambda x: natural_key(x[0])):
|
|
# print(label)
|
|
if label not in labels: continue
|
|
|
|
content = labels[label]
|
|
result = content['result']
|
|
feedbacks = result.get('feedback', [])
|
|
|
|
# Split feedbacks again to match indices
|
|
global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')]
|
|
local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')]
|
|
# Sort local by Y to match generation order in annotating.py
|
|
local_fb_sorted_map = sorted(local_fb_indices,
|
|
key=lambda i: feedbacks[i]['box_2d'][0])
|
|
|
|
items_to_remove = set()
|
|
|
|
for act in acts:
|
|
if act['type'] == 'score':
|
|
result['score'] = act['value']
|
|
print(f" > Updated score for {label} to {act['value']}")
|
|
|
|
elif act['type'] == 'del_global':
|
|
# act['index'] is the index within the global_fb list
|
|
# We need to find the actual index in the main list
|
|
if act['index'] < len(global_fb_indices):
|
|
real_idx = global_fb_indices[act['index']]
|
|
feedbacks[real_idx]["to_delete"] = None
|
|
print(f" > Deleted global feedback in {label}")
|
|
elif act['type'] == 'del_local':
|
|
# act['index'] is index in sorted local list
|
|
if act['index'] < len(local_fb_sorted_map):
|
|
real_idx = local_fb_sorted_map[act['index']]
|
|
feedbacks[real_idx]["to_delete"] = None
|
|
print(f" > Deleted local feedback in {label}")
|
|
elif act['type'] == 'del_local_rect':
|
|
# act['index'] is index in sorted local list
|
|
if act['index'] < len(local_fb_sorted_map):
|
|
real_idx = local_fb_sorted_map[act['index']]
|
|
feedbacks[real_idx]["norectangle"] = None
|
|
print(f" > Deleted rect of local feedback in {label}")
|
|
|
|
|
|
# Remove feedbacks (in reverse to preserve indices)
|
|
# for idx in sorted(list(items_to_remove), reverse=True):
|
|
# del feedbacks[idx]
|
|
|
|
# 2. Regenerate Clean Image
|
|
# We use a temporary modified dictionary
|
|
temp_data = {student_id: labels}
|
|
|
|
# Run the original process (but we need to intercept it to not save, or just let it save)
|
|
|
|
# Hijack the output dir in logic or copy code?
|
|
# Let's just implement the rendering loop here to be safe and clean,
|
|
# overlaying the notes at the end.
|
|
|
|
output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
|
|
|
|
# ... (Reuse rendering logic from annotating.py exactly) ...
|
|
# See below for condensed integration
|
|
|
|
final_concats = []
|
|
|
|
for label, content in sorted(labels.items(), key=lambda x: natural_key(x[0])):
|
|
# ... [PDF to Image Conversion] ...
|
|
copie_folder = f"Copie{student_id}"
|
|
pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf")
|
|
if not os.path.exists(pdf_path): continue
|
|
|
|
(base_img, total_h, max_w) = annotating.make_base_image(pdf_path)
|
|
|
|
# ... [Draw Header/Margin (Clean)] ...
|
|
margin_left = MARGIN_LEFT
|
|
result = content['result']
|
|
coordinates = content.get('coordinates', (0,0))
|
|
hmin = coordinates[0]
|
|
|
|
score_text = f"{label} ; Note : {result.get('score', 0)}"
|
|
if result.get('error') and result.get('error') != "null": score_text += f" | Error: {result.get('error')}"
|
|
|
|
header_imgs = [(annotating.render_latex_text(score_text, base_img.width, fontsize=18), True)]
|
|
|
|
feedbacks = result.get('feedback', [])
|
|
# Separate again (now cleaned)
|
|
global_fb = [f for f in feedbacks if not f.get('box_2d')]
|
|
local_fb = [f for f in feedbacks if f.get('box_2d')]
|
|
local_fb.sort(key=lambda x: x['box_2d'][0])
|
|
|
|
for fb in global_fb:
|
|
render = annotating.render_latex_text(fb['text'], base_img.width)
|
|
header_imgs.append((render, "to_delete" not in fb))
|
|
|
|
total_h = base_img.height + sum(i.height for (i,_) in header_imgs)
|
|
label_img = Image.new("RGB", (base_img.width + margin_left, total_h), "white")
|
|
|
|
cy = 0
|
|
for (i, keep) in header_imgs:
|
|
if keep:
|
|
label_img.paste(i, (0, cy))
|
|
else:
|
|
blank = Image.new("RGB", (i.width, i.height), "white")
|
|
label_img.paste(blank, (0, cy))
|
|
cy+=i.height
|
|
img_offset_y = cy
|
|
label_img.paste(base_img, (margin_left, img_offset_y))
|
|
|
|
draw = ImageDraw.Draw(label_img, "RGBA")
|
|
last_bot = 0
|
|
for fb in local_fb:
|
|
box = fb['box_2d']
|
|
|
|
ymin, xmin, ymax, xmax = box
|
|
t_ymin = (ymin - hmin) + img_offset_y
|
|
t_ymax = (ymax - hmin) + img_offset_y
|
|
if "norectangle" not in fb:
|
|
draw.rectangle([xmin+margin_left, t_ymin, xmax+margin_left, t_ymax],
|
|
outline="red", width=3)
|
|
|
|
txt = annotating.render_latex_text(fb['text'], ANNOT_WIDTH,
|
|
(255,200,200,180), max_lines=3)
|
|
py = max((t_ymin+t_ymax)/2 - txt.height/2, img_offset_y)
|
|
if py < last_bot:
|
|
py = last_bot + 5
|
|
|
|
if py + txt.height + 20 > label_img.height:
|
|
new_l = Image.new("RGB", (label_img.width, int(py+txt.height+20)), "white")
|
|
new_l.paste(label_img, (0,0))
|
|
label_img = new_l
|
|
draw = ImageDraw.Draw(label_img, "RGBA")
|
|
|
|
if not "to_delete" in fb:
|
|
label_img.paste(txt, (10, int(py)), mask=txt)
|
|
last_bot = py + txt.height
|
|
|
|
final_concats.append(label_img)
|
|
|
|
# Concatenate Labels
|
|
if not final_concats: return
|
|
|
|
mw = max(i.width for i in final_concats)
|
|
th = sum(i.height for i in final_concats)
|
|
full_clean_img = Image.new("RGB", (mw, th), "white")
|
|
y=0
|
|
for i in final_concats:
|
|
full_clean_img.paste(i, (0,y))
|
|
y+=i.height
|
|
|
|
# 3. Overlay Manual Notes
|
|
if notes_layer:
|
|
# Notes layer might be different size if regenerated image size changed (e.g. deleted comments reduced height)
|
|
# However, usually reducing content reduces height, so we align top-left.
|
|
# But `notes_layer` is based on the "Reference.png" which had boxes.
|
|
# The new `full_clean_img` does NOT have boxes. The dimensions should be identical
|
|
# unless removing a feedback at the very bottom shrinks the image.
|
|
|
|
# We paste notes_layer on top.
|
|
full_clean_img.paste(notes_layer, (0,0), mask=notes_layer)
|
|
|
|
# Save final Concat.jpg
|
|
final_path = os.path.join(output_dir, "Concat.jpg")
|
|
full_clean_img.save(final_path)
|
|
print(f"Saved regenerated: {final_path}")
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python reading_annotations.py <Dir>")
|
|
sys.exit(1)
|
|
|
|
root_dir = sys.argv[1]
|
|
|
|
# Load original data
|
|
original_data = annotating.make_dictionary(root_dir)
|
|
|
|
# Process each Bnot folder
|
|
for student_id in original_data.keys():
|
|
bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
|
|
if os.path.exists(bnot_dir):
|
|
print(f"Processing annotations for: {student_id}")
|
|
actions, notes = detect_checks_and_notes(bnot_dir)
|
|
if actions or notes:
|
|
apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes)
|
|
else:
|
|
print(" No changes detected or missing files.")
|