diff --git a/reading_grouped_annotations.py b/reading_grouped_annotations.py index 09eea71..cbe7dc0 100644 --- a/reading_grouped_annotations.py +++ b/reading_grouped_annotations.py @@ -174,6 +174,8 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la from utils import read_all_labels import argparse + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Read grouped annotations and compile PDFs") parser.add_argument("input_path", help="Directory path") @@ -195,9 +197,67 @@ if __name__ == "__main__": # Load original data original_data = annotating.make_dictionary(root_dir) + lock = threading.Lock() actions_by_student = collections.defaultdict(list) notes_by_student = collections.defaultdict(dict) + + def process_bgnot_entry(entry): + gdir = os.path.join(bgnot_dir, entry) + if not os.path.isdir(gdir) or entry.startswith("Copie"): + return + + actions, notes_img = detect_checks_and_notes(gdir) + bnote_path = os.path.join(gdir, "bnote.json") + if not os.path.exists(bnote_path) or notes_img is None: + return + + with open(bnote_path, "r") as f: + bnote_data = json.load(f) + + with lock: + for act in actions: + sid = str(act.get("student_id")) + if sid: actions_by_student[sid].append(act) + + for img_info in bnote_data.get("images", []): + sid, lbl = str(img_info.get("id")), img_info.get("label") + hmin, hmax = img_info.get("hmin", 0), img_info.get("hmax", 0) + if hmax > hmin: + crop = notes_img.crop((0, hmin, notes_img.width, hmax)) + if has_significant_notes(crop): + notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)} + + + def process_refaire_entry(sid, r_labels): + s_bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{sid}") + if not os.path.exists(s_bnot_dir): return + if not r_labels: r_labels = list(original_data.get(sid, {}).keys()) + + with lock: + actions_by_student[sid] = [a for a in actions_by_student[sid] if a.get('label') not in r_labels] + for lbl in r_labels: notes_by_student[sid].pop(lbl, None) + + b_actions, b_notes_img = detect_checks_and_notes(s_bnot_dir) + b_bnote_path = os.path.join(s_bnot_dir, "bnote.json") + if os.path.exists(b_bnote_path): + with open(b_bnote_path, "r") as f: + b_bnote_data = json.load(f) + with lock: + for act in b_actions: + act["student_id"] = sid + actions_by_student[sid].append(act) + if b_notes_img: + for img_info in b_bnote_data.get("images", []): + lbl = img_info.get("label") + hmin, hmax = img_info.get("hmin", 0), img_info.get("hmax", 0) + if hmax > hmin: + crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax)) + if has_significant_notes(crop): + notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)} + + + # --- 0. Read refaire.json if requested --- refaire_dict = {} if args.with_refaire: @@ -210,92 +270,15 @@ if __name__ == "__main__": refaire_dict[sid] = labels else: print(f"Warning: --with-refaire flag used, but {refaire_path} not found.") - # --- 1. Scan BGnot grouped directories and extract all checks & notes --- - for entry in os.listdir(bgnot_dir): - gdir = os.path.join(bgnot_dir, entry) - if not os.path.isdir(gdir) or entry.startswith("Copie"): - continue # Ignore files and already compiled student folders + # Part 1 : lecture des bgnot + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + executor.map(process_bgnot_entry, os.listdir(bgnot_dir)) - print(f"\nScanning grouped annotations in {entry}") - actions, notes_img = detect_checks_and_notes(gdir) - - bnote_path = os.path.join(gdir, "bnote.json") - if not os.path.exists(bnote_path) or notes_img is None: - continue - - with open(bnote_path, "r") as f: - bnote_data = json.load(f) - - # Route actions to specific students - for act in actions: - sid = str(act.get("student_id")) - if sid: - actions_by_student[sid].append(act) - - # Route manual note crops to specific students and labels - for img_info in bnote_data.get("images", []): - sid = str(img_info.get("id")) - lbl = img_info.get("label") - hmin = img_info.get("hmin", 0) - hmax = img_info.get("hmax", 0) - - if hmax > hmin: - crop = notes_img.crop((0, hmin, notes_img.width, hmax)) - # Store it if there are pen marks on it - if has_significant_notes(crop): - notes_by_student[sid][lbl] = { - 'img': crop, - 'old_header_h': img_info.get("header_height", 0) - } - - # --- 1.5. Override BGnot actions with Bnot actions for Refaire targets --- + # Part 1.5: Refaire if args.with_refaire and refaire_dict: - bnot_dir = os.path.join(root_dir, "Bnot") - for sid, r_labels in refaire_dict.items(): - s_bnot_dir = os.path.join(bnot_dir, f"Copie{sid}") - if not os.path.exists(s_bnot_dir): - continue - - # If empty list, it targets all labels for this Copie - if not r_labels: - r_labels = list(original_data.get(sid, {}).keys()) - - # Clear out existing BGnot actions & notes for the refaire labels - actions_by_student[sid] = [ - a for a in actions_by_student[sid] if a.get('label') not in r_labels - ] - for lbl in r_labels: - notes_by_student[sid].pop(lbl, None) - - print(f"\nScanning refaire annotations in Bnot for Copie{sid}") - b_actions, b_notes_img = detect_checks_and_notes(s_bnot_dir) - - b_bnote_path = os.path.join(s_bnot_dir, "bnote.json") - if os.path.exists(b_bnote_path): - with open(b_bnote_path, "r") as f: - b_bnote_data = json.load(f) - - # Inject actions - for act in b_actions: - # Bnot checkboxes don't natively have student_id, so we inject it - act["student_id"] = sid - actions_by_student[sid].append(act) - - # Inject notes - if b_notes_img is not None: - for img_info in b_bnote_data.get("images", []): - lbl = img_info.get("label") - hmin = img_info.get("hmin", 0) - hmax = img_info.get("hmax", 0) - - if hmax > hmin: - crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax)) - if has_significant_notes(crop): - notes_by_student[sid][lbl] = { - 'img': crop, - 'old_header_h': img_info.get("header_height", 0) - } + for sid, labels in refaire_dict.items(): + process_refaire_entry(sid, labels) def process_student(sid):