diff --git a/annotating_with_checks.py b/annotating_with_checks.py index 08e8000..0fbeb3c 100644 --- a/annotating_with_checks.py +++ b/annotating_with_checks.py @@ -208,6 +208,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate annotated PDFs.") parser.add_argument("input_path", help="Directory or specific file path") parser.add_argument("--overwrite", action="store_true", help="Overwrite existing output files") + parser.add_argument("--refaire", action="store_true", help="Process only copies/labels defined in refaire.json") # ADD THIS LINE + args = parser.parse_args() @@ -230,8 +232,30 @@ if __name__ == "__main__": results = annotating.make_dictionary(root_dir) - # Filter results if a specific target ID was requested - if target_id: + # --- ADD THE REFAIRE BLOCK HERE --- + if args.refaire: + refaire_path = os.path.join(root_dir, "refaire.json") + if os.path.exists(refaire_path): + with open(refaire_path, "r", encoding="utf-8") as f: + refaire_list = json.load(f) + + filtered_results = {} + for copie_name, labels_to_redo in refaire_list: + sid = copie_name.replace("Copie", "") # Extract "01" from "Copie01" + if sid in results: + if not labels_to_redo: + # Empty list: keep all labels for this Copie + filtered_results[sid] = results[sid] + else: + # Keep only the requested labels + filtered_results[sid] = { + lbl: data for lbl, data in results[sid].items() + if lbl in labels_to_redo + } + results = filtered_results + else: + print(f"Warning: --refaire flag used, but {refaire_path} not found.") + elif target_id: if target_id in results: results = {target_id: results[target_id]} else: diff --git a/correction.py b/correction.py index ec56a1c..9cddb14 100644 --- a/correction.py +++ b/correction.py @@ -16,6 +16,8 @@ parser = argparse.ArgumentParser() parser.add_argument("--overwrite", action="store_true", help="Force redo requests even if output exists") parser.add_argument("--limit", type=int, help="limit calls to gemini rpo integer") +parser.add_argument("--refaire", action="store_true", + help="Redo specific copies/labels defined in refaire.json") args, _ = parser.parse_known_args() @@ -675,6 +677,67 @@ def process_single_task(task_tuple): flush_thread_log() if __name__ == "__main__": + if args.refaire: + refaire_path = Path(INPUT_DIR) / "refaire.json" + overwritten_path = Path(INPUT_DIR) / "overwritten_correction.json" + + if refaire_path.exists(): + with open(refaire_path, "r", encoding="utf-8") as f: + refaire_list = json.load(f) + + overwritten_data = [] + if overwritten_path.exists(): + with open(overwritten_path, "r", encoding="utf-8") as f: + overwritten_data = json.load(f) + + dirty_results = False + + for copie_name, labels in refaire_list: + pid = copie_name.replace("Copie", "") + copie_dir = Path(INPUT_DIR) / copie_name + + # If list is empty, redo all labels available for this Copie + if not labels: + labels = [p.stem for p in copie_dir.glob("*.pdf")] + + for label in labels: + # 1. Extract and backup old corrections + if label in results: + for batch in results[label]: + to_remove = None + for item in batch: + if item.get("id") == pid: + to_remove = item + break + if to_remove: + batch.remove(to_remove) + overwritten_data.append({ + "pid": pid, + "label": label, + "data": to_remove, + "timestamp": time.time() + }) + dirty_results = True + # Clean up empty batches + results[label] = [b for b in results[label] if b] + + # 2. Make new group and add to tasks + pdf_path = copie_dir / f"{label}.pdf" + if pdf_path.exists(): + idx = get_next_group_idx(INPUT_DIR, label) + height = grouping.get_pdf_height(str(pdf_path)) + grouping.create_jpg(label, idx, [(pid, str(pdf_path), height)], INPUT_DIR) + new_group_path = str(Path(INPUT_DIR) / label / f"Group_{idx+1}.jpg") + tasks_to_process.append((new_group_path, label)) + + if dirty_results: + with open(output_path, "w", encoding="utf-8") as f: + json.dump(results, f, indent=2) + with open(overwritten_path, "w", encoding="utf-8") as f: + json.dump(overwritten_data, f, indent=2) + else: + print(f"Warning: --refaire flag used, but {refaire_path} not found.", file=sys.stderr) + print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...") with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor: diff --git a/reading_grouped_annotations.py b/reading_grouped_annotations.py index f782636..09eea71 100644 --- a/reading_grouped_annotations.py +++ b/reading_grouped_annotations.py @@ -57,7 +57,7 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la fb["norectangle"] = True dirty_labels.add(label) logs.append(f" > Cleared all feedbacks in {label}") - + elif act['type'] == 'del_global': if act['index'] < len(global_fb): global_fb[act['index']]["to_delete"] = True @@ -171,13 +171,14 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la return "\n".join(logs) - from utils import read_all_labels +import argparse if __name__ == "__main__": - if len(sys.argv) < 2: - print("Usage: python reading_grouped_annotations.py