Thread reading grouped annotations

master
Sébastien Miquel 2026-03-31 21:08:28 +02:00
parent 92d23a82ad
commit c1524a99c3
1 changed files with 66 additions and 83 deletions

View File

@ -174,6 +174,8 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la
from utils import read_all_labels from utils import read_all_labels
import argparse import argparse
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Read grouped annotations and compile PDFs") parser = argparse.ArgumentParser(description="Read grouped annotations and compile PDFs")
parser.add_argument("input_path", help="Directory path") parser.add_argument("input_path", help="Directory path")
@ -195,9 +197,67 @@ if __name__ == "__main__":
# Load original data # Load original data
original_data = annotating.make_dictionary(root_dir) original_data = annotating.make_dictionary(root_dir)
lock = threading.Lock()
actions_by_student = collections.defaultdict(list) actions_by_student = collections.defaultdict(list)
notes_by_student = collections.defaultdict(dict) notes_by_student = collections.defaultdict(dict)
def process_bgnot_entry(entry):
gdir = os.path.join(bgnot_dir, entry)
if not os.path.isdir(gdir) or entry.startswith("Copie"):
return
actions, notes_img = detect_checks_and_notes(gdir)
bnote_path = os.path.join(gdir, "bnote.json")
if not os.path.exists(bnote_path) or notes_img is None:
return
with open(bnote_path, "r") as f:
bnote_data = json.load(f)
with lock:
for act in actions:
sid = str(act.get("student_id"))
if sid: actions_by_student[sid].append(act)
for img_info in bnote_data.get("images", []):
sid, lbl = str(img_info.get("id")), img_info.get("label")
hmin, hmax = img_info.get("hmin", 0), img_info.get("hmax", 0)
if hmax > hmin:
crop = notes_img.crop((0, hmin, notes_img.width, hmax))
if has_significant_notes(crop):
notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)}
def process_refaire_entry(sid, r_labels):
s_bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{sid}")
if not os.path.exists(s_bnot_dir): return
if not r_labels: r_labels = list(original_data.get(sid, {}).keys())
with lock:
actions_by_student[sid] = [a for a in actions_by_student[sid] if a.get('label') not in r_labels]
for lbl in r_labels: notes_by_student[sid].pop(lbl, None)
b_actions, b_notes_img = detect_checks_and_notes(s_bnot_dir)
b_bnote_path = os.path.join(s_bnot_dir, "bnote.json")
if os.path.exists(b_bnote_path):
with open(b_bnote_path, "r") as f:
b_bnote_data = json.load(f)
with lock:
for act in b_actions:
act["student_id"] = sid
actions_by_student[sid].append(act)
if b_notes_img:
for img_info in b_bnote_data.get("images", []):
lbl = img_info.get("label")
hmin, hmax = img_info.get("hmin", 0), img_info.get("hmax", 0)
if hmax > hmin:
crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax))
if has_significant_notes(crop):
notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)}
# --- 0. Read refaire.json if requested --- # --- 0. Read refaire.json if requested ---
refaire_dict = {} refaire_dict = {}
if args.with_refaire: if args.with_refaire:
@ -210,92 +270,15 @@ if __name__ == "__main__":
refaire_dict[sid] = labels refaire_dict[sid] = labels
else: else:
print(f"Warning: --with-refaire flag used, but {refaire_path} not found.") print(f"Warning: --with-refaire flag used, but {refaire_path} not found.")
# --- 1. Scan BGnot grouped directories and extract all checks & notes ---
for entry in os.listdir(bgnot_dir):
gdir = os.path.join(bgnot_dir, entry)
if not os.path.isdir(gdir) or entry.startswith("Copie"): # Part 1 : lecture des bgnot
continue # Ignore files and already compiled student folders with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
executor.map(process_bgnot_entry, os.listdir(bgnot_dir))
print(f"\nScanning grouped annotations in {entry}") # Part 1.5: Refaire
actions, notes_img = detect_checks_and_notes(gdir)
bnote_path = os.path.join(gdir, "bnote.json")
if not os.path.exists(bnote_path) or notes_img is None:
continue
with open(bnote_path, "r") as f:
bnote_data = json.load(f)
# Route actions to specific students
for act in actions:
sid = str(act.get("student_id"))
if sid:
actions_by_student[sid].append(act)
# Route manual note crops to specific students and labels
for img_info in bnote_data.get("images", []):
sid = str(img_info.get("id"))
lbl = img_info.get("label")
hmin = img_info.get("hmin", 0)
hmax = img_info.get("hmax", 0)
if hmax > hmin:
crop = notes_img.crop((0, hmin, notes_img.width, hmax))
# Store it if there are pen marks on it
if has_significant_notes(crop):
notes_by_student[sid][lbl] = {
'img': crop,
'old_header_h': img_info.get("header_height", 0)
}
# --- 1.5. Override BGnot actions with Bnot actions for Refaire targets ---
if args.with_refaire and refaire_dict: if args.with_refaire and refaire_dict:
bnot_dir = os.path.join(root_dir, "Bnot") for sid, labels in refaire_dict.items():
for sid, r_labels in refaire_dict.items(): process_refaire_entry(sid, labels)
s_bnot_dir = os.path.join(bnot_dir, f"Copie{sid}")
if not os.path.exists(s_bnot_dir):
continue
# If empty list, it targets all labels for this Copie
if not r_labels:
r_labels = list(original_data.get(sid, {}).keys())
# Clear out existing BGnot actions & notes for the refaire labels
actions_by_student[sid] = [
a for a in actions_by_student[sid] if a.get('label') not in r_labels
]
for lbl in r_labels:
notes_by_student[sid].pop(lbl, None)
print(f"\nScanning refaire annotations in Bnot for Copie{sid}")
b_actions, b_notes_img = detect_checks_and_notes(s_bnot_dir)
b_bnote_path = os.path.join(s_bnot_dir, "bnote.json")
if os.path.exists(b_bnote_path):
with open(b_bnote_path, "r") as f:
b_bnote_data = json.load(f)
# Inject actions
for act in b_actions:
# Bnot checkboxes don't natively have student_id, so we inject it
act["student_id"] = sid
actions_by_student[sid].append(act)
# Inject notes
if b_notes_img is not None:
for img_info in b_bnote_data.get("images", []):
lbl = img_info.get("label")
hmin = img_info.get("hmin", 0)
hmax = img_info.get("hmax", 0)
if hmax > hmin:
crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax))
if has_significant_notes(crop):
notes_by_student[sid][lbl] = {
'img': crop,
'old_header_h': img_info.get("header_height", 0)
}
def process_student(sid): def process_student(sid):