Rudimentary support for --refaire

master
Sébastien Miquel 2026-04-01 22:20:11 +02:00
parent c1524a99c3
commit 95db769751
6 changed files with 155 additions and 46 deletions

View File

@ -12,7 +12,7 @@ ANNOT_WIDTH = 600
# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates}
# Coordinates are the real coordinates (hmin, hmax) of the image in the Group
# The gemini_result coordinates should be un-normalized !
def make_dictionary(root_dir):
def make_dictionary(root_dir, refaire=False, refaire_list=[]):
correction_path = os.path.join(root_dir, "correction.json")
# Load correction data
@ -81,6 +81,48 @@ def make_dictionary(root_dir):
"coordinates": coordinates
}
if refaire:
for copie_name, labels_to_redo in refaire_list:
sid = copie_name.replace("Copie", "") # Extract "01" from "Copie01"
if sid in result_data:
# Si des labels à refaire ne sont pas présent dans la correction
# On ajoute des dummies
if labels_to_redo: # Si la liste est non vide
for lbl in labels_to_redo:
pdf_path = os.path.join(root_dir,
f"Copie{sid}", f"{lbl}.pdf")
if not Path(pdf_path).exists():
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
continue
result_data[sid][lbl] = {
"pdf_path": pdf_path,
"result": {
"score": 0.0,
"confidence": 1.0,
"feedback": [],
"error": "non traité"
},
"coordinates": (0,0)
}
else: # Ce student id n'a jamais été corrigé
result_data[sid] = {}
for lbl in labels_to_redo:
pdf_path = os.path.join(root_dir,
f"Copie{sid}", f"{lbl}.pdf")
if not pdf_path.exists():
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
continue
result_data[sid][lbl] = {
"pdf_path": pdf_path,
"result": {
"score": 0.0,
"confidence": 1.0,
"feedback": [],
"error": "non traité"
},
"coordinates": (0,0)
}
return result_data
def make_base_image(pdf_path):

View File

@ -108,9 +108,9 @@ from utils import natural_key
def process_student(args):
"""Thread worker: Processes one student."""
root_dir, student_id, labels, overwrite = args
root_dir, student_id, labels, overwrite, sub_folder = args
output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}")
output_dir = os.path.join(root_dir, sub_folder, f"Copie{student_id}")
if os.path.exists(output_dir):
if not overwrite:
@ -230,14 +230,16 @@ if __name__ == "__main__":
else:
root_dir = input_path
results = annotating.make_dictionary(root_dir)
if not args.refaire:
results = annotating.make_dictionary(root_dir)
# --- ADD THE REFAIRE BLOCK HERE ---
if args.refaire:
refaire_path = os.path.join(root_dir, "refaire.json")
if os.path.exists(refaire_path):
with open(refaire_path, "r", encoding="utf-8") as f:
refaire_list = json.load(f)
results = annotating.make_dictionary(root_dir,
refaire=True,refaire_list=refaire_list)
filtered_results = {}
for copie_name, labels_to_redo in refaire_list:
@ -262,7 +264,10 @@ if __name__ == "__main__":
print(f"Student ID {target_id} not found in directory scan.")
results = {}
tasks = sorted([(root_dir, sid, lbls, overwrite) for sid, lbls in results.items()])
sub_folder = "BRnot" if args.refaire else "Bnot"
tasks = sorted([(root_dir, sid, lbls, overwrite, sub_folder)
for sid, lbls in results.items()])
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
results = executor.map(process_student, tasks)

View File

@ -2,8 +2,12 @@ import sys
import shutil
from pathlib import Path
def sync_annotated(dir_arg):
bgnot_dir = Path(dir_arg) / "BGnot"
def sync_annotated(dir_arg, refaire):
if not refaire:
bgnot_dir = Path(dir_arg) / "BGnot"
else:
bgnot_dir = Path(dir_arg) / "BRnot"
annotated_dir = Path.home() / "SyncCopies" / "Annotées"
if not annotated_dir.is_dir():
@ -22,9 +26,16 @@ def sync_annotated(dir_arg):
print("copying ", pdf_file, " to ", dest_file)
shutil.copy2(pdf_file, dest_file)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python script.py <dir>")
sys.exit(1)
import argparse
sync_annotated(sys.argv[1])
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Move to tablette folder.")
parser.add_argument("dir", help="The directory to process")
parser.add_argument("--refaire", action="store_true", help="Process only copies/labels defined in refaire.json")
args = parser.parse_args()
root_dir = args.dir
sync_annotated(root_dir, args.refaire)

View File

@ -5,6 +5,7 @@ import collections
import concurrent.futures
from pathlib import Path
from PIL import Image
import threading
import annotating
@ -179,7 +180,7 @@ import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Read grouped annotations and compile PDFs")
parser.add_argument("input_path", help="Directory path")
parser.add_argument("--with-refaire", action="store_true", help="Merge refaire annotations from Bnot")
parser.add_argument("--refaire", action="store_true", help="Merge refaire annotations from Bnot")
args = parser.parse_args()
root_dir = sys.argv[1]
@ -194,26 +195,52 @@ if __name__ == "__main__":
except FileNotFoundError:
all_labels = []
refaire_dict = {}
if args.refaire:
refaire_path = os.path.join(root_dir, "refaire.json")
if os.path.exists(refaire_path):
with open(refaire_path, "r", encoding="utf-8") as f:
refaire_list = json.load(f)
for c_name, labels in refaire_list:
sid = c_name.replace("Copie", "")
refaire_dict[sid] = labels
else:
print(f"Warning: --refaire flag used, but {refaire_path} not found.")
# Load original data
original_data = annotating.make_dictionary(root_dir)
if args.refaire and refaire_list:
original_data = annotating.make_dictionary(root_dir,
refaire=True,
refaire_list=refaire_list)
else:
original_data = annotating.make_dictionary(root_dir)
lock = threading.Lock()
actions_by_student = collections.defaultdict(list)
notes_by_student = collections.defaultdict(dict)
def process_bgnot_entry(entry):
def process_bgnot_entry(entry, only_ids=None):
gdir = os.path.join(bgnot_dir, entry)
if not os.path.isdir(gdir) or entry.startswith("Copie"):
return
bnote_path = os.path.join(gdir, "bnote.json")
with open(bnote_path, "r") as f:
bnote_data = json.load(f)
if only_ids:
id_found = False
for d in bnote_data["images"]:
if d["id"] in only_ids:
id_found = True
if not id_found:
return
actions, notes_img = detect_checks_and_notes(gdir)
bnote_path = os.path.join(gdir, "bnote.json")
if not os.path.exists(bnote_path) or notes_img is None:
return
with open(bnote_path, "r") as f:
bnote_data = json.load(f)
with lock:
for act in actions:
@ -230,13 +257,16 @@ if __name__ == "__main__":
def process_refaire_entry(sid, r_labels):
s_bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{sid}")
s_bnot_dir = os.path.join(root_dir, "BRnot", f"Copie{sid}")
if not os.path.exists(s_bnot_dir): return
if not r_labels: r_labels = list(original_data.get(sid, {}).keys())
if not r_labels:
r_labels = list(original_data.get(sid, {}).keys())
with lock:
actions_by_student[sid] = [a for a in actions_by_student[sid] if a.get('label') not in r_labels]
for lbl in r_labels: notes_by_student[sid].pop(lbl, None)
actions_by_student[sid] = [a for a in actions_by_student[sid]
if a.get('label') not in r_labels]
for lbl in r_labels:
notes_by_student[sid].pop(lbl, None)
b_actions, b_notes_img = detect_checks_and_notes(s_bnot_dir)
b_bnote_path = os.path.join(s_bnot_dir, "bnote.json")
@ -259,24 +289,20 @@ if __name__ == "__main__":
# --- 0. Read refaire.json if requested ---
refaire_dict = {}
if args.with_refaire:
refaire_path = os.path.join(root_dir, "refaire.json")
if os.path.exists(refaire_path):
with open(refaire_path, "r", encoding="utf-8") as f:
refaire_list = json.load(f)
for c_name, labels in refaire_list:
sid = c_name.replace("Copie", "")
refaire_dict[sid] = labels
else:
print(f"Warning: --with-refaire flag used, but {refaire_path} not found.")
# Part 1 : lecture des bgnot
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
executor.map(process_bgnot_entry, os.listdir(bgnot_dir))
if refaire_dict:
only_ids = [ids for ids in refaire_dict]
else:
only_ids = None
# Part 1.5: Refaire
if args.with_refaire and refaire_dict:
# Lecture des bgnot
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
executor.map(lambda x: process_bgnot_entry(x, only_ids=only_ids),
os.listdir(bgnot_dir))
# Refaire
if args.refaire and refaire_dict:
for sid, labels in refaire_dict.items():
process_refaire_entry(sid, labels)
@ -296,7 +322,11 @@ if __name__ == "__main__":
# --- 2. Process each student concurrently using 4 threads ---
sids = sorted(original_data.keys(), key=natural_key)
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = {executor.submit(process_student, sid): sid for sid in sids}
if refaire_dict:
futures = {executor.submit(process_student, sid): sid for sid in refaire_dict}
else:
futures = {executor.submit(process_student, sid): sid for sid in sids}
for future in concurrent.futures.as_completed(futures):
output = future.result()
if output:

View File

@ -150,7 +150,7 @@ def split_an_interro(base_dir, input_pdf, coords_list):
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python scrit.py <directory or pdf_file>")
print("Usage: python script.py <directory or pdf_file>")
sys.exit(1)
input_arg = Path(sys.argv[1])
@ -167,6 +167,7 @@ if __name__ == "__main__":
for pdf_path in pdf_files:
json_path = pdf_path.with_suffix(".json")
# print("Debug :", json_path)
if json_path.exists():
(name, coords) = decode_json(pdf_path)
print("Decoded name : ", name)

View File

@ -44,9 +44,29 @@ def process_directory(dir_arg):
# Création du lien symbolique (pointe vers le chemin absolu pour éviter les problèmes)
os.link(concat_file.absolute(), symlink_path)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python script.py <dir>")
sys.exit(1)
import argparse
process_directory(sys.argv[1])
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Move to tablette folder.")
parser.add_argument("dir", help="The directory to process")
parser.add_argument("--refaire", action="store_true", help="Process only copies/labels defined in refaire.json")
args = parser.parse_args()
root_dir = args.dir
if args.refaire:
base_dir = Path(root_dir)
brnot_dir = base_dir / "BRnot"
sync_dir = Path.home() / "SyncCopies" / "À Annoter" / root_dir
sync_dir.mkdir(parents=True, exist_ok=True)
for f in brnot_dir.iterdir():
concat_file = f / "Concat.pdf"
if f.is_dir() and concat_file.is_file():
symlink_path = sync_dir / f"{f.name}.pdf"
if symlink_path.exists():
symlink_path.unlink()
os.link(concat_file.absolute(), symlink_path)
sys.exit(0)
else:
process_directory(root_dir)