Using folders "Copies" and "Par label", hopefully
parent
7e7045293a
commit
c6c1a052e1
11
Readme.org
11
Readme.org
|
|
@ -1,7 +1,7 @@
|
||||||
#+title: Script
|
#+title: Script
|
||||||
#+author: Sébastien Miquel
|
#+author: Sébastien Miquel
|
||||||
#+date: 14-03-2026
|
#+date: 14-03-2026
|
||||||
# Time-stamp: <14-05-26 08:55>
|
# Time-stamp: <17-05-26 10:51>
|
||||||
#+OPTIONS:
|
#+OPTIONS:
|
||||||
|
|
||||||
* Méta
|
* Méta
|
||||||
|
|
@ -101,13 +101,13 @@ export GEMINI_API_KEY=…
|
||||||
|
|
||||||
Les key bindings ne sont pas adaptés à un clavier azerty… À changer…
|
Les key bindings ne sont pas adaptés à un clavier azerty… À changer…
|
||||||
|
|
||||||
Fix issues with =python page_splitter.py Interro14/Copie01.pdf=
|
Fix issues with =python page_splitter.py Interro14/Copies/Copie01.pdf=
|
||||||
4. =python cutleft.py Interro=
|
4. =python cutleft.py Interro=
|
||||||
|
|
||||||
Découpe la partie gauche des copies, là où il devrait y avoir les
|
Découpe la partie gauche des copies, là où il devrait y avoir les
|
||||||
labels des exercices/questions.
|
labels des exercices/questions.
|
||||||
|
|
||||||
Rerun on a single file with =python cutleft.py Interro/Copie01.pdf=
|
Rerun on a single file with =python cutleft.py Interro/Copies/Copie01.pdf=
|
||||||
|
|
||||||
** Génération d'information sur l'énoncé
|
** Génération d'information sur l'énoncé
|
||||||
|
|
||||||
|
|
@ -136,7 +136,7 @@ Set proxy with ~export HTTPS_PROXY="http://10.0.0.1:3128"~
|
||||||
+ `|…` n'est pas arrêté verticalement par son type opposé.
|
+ `|…` n'est pas arrêté verticalement par son type opposé.
|
||||||
+ `…|` est stoppé horizontalement par le `|…` le plus proche.
|
+ `…|` est stoppé horizontalement par le `|…` le plus proche.
|
||||||
Pour modifier une seule copie :
|
Pour modifier une seule copie :
|
||||||
=python plotting.py Interro/Copie01.pdf=
|
=python plotting.py Interro/Copies/Copie01.pdf=
|
||||||
|
|
||||||
It also generates les =Copie01.json=, à partir des =Copie01_01.json=
|
It also generates les =Copie01.json=, à partir des =Copie01_01.json=
|
||||||
1. En cas de soucis, (par exemple les pages ne sont pas dans le bon ordre)
|
1. En cas de soucis, (par exemple les pages ne sont pas dans le bon ordre)
|
||||||
|
|
@ -147,6 +147,7 @@ Set proxy with ~export HTTPS_PROXY="http://10.0.0.1:3128"~
|
||||||
3. =python splitting_int.py Interro=
|
3. =python splitting_int.py Interro=
|
||||||
|
|
||||||
Découpe les copies suivant les exercices
|
Découpe les copies suivant les exercices
|
||||||
|
Peut-être appelé avec une seule copie.
|
||||||
4. =python grouping.py Interro=
|
4. =python grouping.py Interro=
|
||||||
|
|
||||||
Regroupe les mêmes questions de différentes copies en groupes de
|
Regroupe les mêmes questions de différentes copies en groupes de
|
||||||
|
|
@ -159,7 +160,7 @@ Set proxy with ~export HTTPS_PROXY="http://10.0.0.1:3128"~
|
||||||
1. Il faut créer des persp, pour indication de comment corriger, et
|
1. Il faut créer des persp, pour indication de comment corriger, et
|
||||||
relancer =enonce_info.py=
|
relancer =enonce_info.py=
|
||||||
2. =python correction.py Interro --limit 240= OU
|
2. =python correction.py Interro --limit 240= OU
|
||||||
=python correction.py Interro/Ex\ 2/Group_1.jpg= OU
|
=python correction.py Interro/Par\ label/Ex\ 2/Group_1.jpg= OU
|
||||||
=python correction.py Interro --overwrite=
|
=python correction.py Interro --overwrite=
|
||||||
=python correction.py Interro --pro-by-label= (needs `labels_for_pro`)
|
=python correction.py Interro --pro-by-label= (needs `labels_for_pro`)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
|
||||||
# Find coordinates
|
# Find coordinates
|
||||||
coordinates = None
|
coordinates = None
|
||||||
height,width= None, None
|
height,width= None, None
|
||||||
label_dir = os.path.join(root_dir, label)
|
label_dir = Path(root_dir) / "Par label" / label
|
||||||
|
|
||||||
# Search all json files in Dir/label
|
# Search all json files in Dir/label
|
||||||
json_files = glob.glob(os.path.join(label_dir, "*.json"))
|
json_files = glob.glob(os.path.join(label_dir, "*.json"))
|
||||||
|
|
@ -59,7 +59,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
|
||||||
break
|
break
|
||||||
|
|
||||||
# Construct PDF path: Dir/Copie{id}/{label}.pdf
|
# Construct PDF path: Dir/Copie{id}/{label}.pdf
|
||||||
pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
|
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
|
||||||
|
|
||||||
# Initialize dictionary structure for this ID if missing
|
# Initialize dictionary structure for this ID if missing
|
||||||
if student_id not in result_data:
|
if student_id not in result_data:
|
||||||
|
|
@ -89,8 +89,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
|
||||||
# On ajoute des dummies
|
# On ajoute des dummies
|
||||||
if labels_to_redo: # Si la liste est non vide
|
if labels_to_redo: # Si la liste est non vide
|
||||||
for lbl in labels_to_redo:
|
for lbl in labels_to_redo:
|
||||||
pdf_path = os.path.join(root_dir,
|
pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf"
|
||||||
f"Copie{sid}", f"{lbl}.pdf")
|
|
||||||
if not Path(pdf_path).exists():
|
if not Path(pdf_path).exists():
|
||||||
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
|
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
|
||||||
continue
|
continue
|
||||||
|
|
@ -107,8 +106,7 @@ def make_dictionary(root_dir, refaire=False, refaire_list=[]):
|
||||||
else: # Ce student id n'a jamais été corrigé
|
else: # Ce student id n'a jamais été corrigé
|
||||||
result_data[sid] = {}
|
result_data[sid] = {}
|
||||||
for lbl in labels_to_redo:
|
for lbl in labels_to_redo:
|
||||||
pdf_path = os.path.join(root_dir,
|
pdf_path = Path(root_dir) / "Copies" / f"Copie{sid}" / f"{lbl}.pdf"
|
||||||
f"Copie{sid}", f"{lbl}.pdf")
|
|
||||||
if not pdf_path.exists():
|
if not pdf_path.exists():
|
||||||
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
|
print("Debug : asked to refaire", sid, lbl, "but pdf absent")
|
||||||
continue
|
continue
|
||||||
|
|
@ -567,13 +565,13 @@ def process_student(student_id, labels_data, root_dir, all_labels, overwrite):
|
||||||
d_notes = dict.fromkeys(all_labels, "")
|
d_notes = dict.fromkeys(all_labels, "")
|
||||||
label_images = []
|
label_images = []
|
||||||
|
|
||||||
|
# !! Trier par l'ordre des labels plutôt
|
||||||
sorted_labels = sorted(list(labels_data.items()), key=natural_key)
|
sorted_labels = sorted(list(labels_data.items()), key=natural_key)
|
||||||
|
|
||||||
for label, content in sorted_labels:
|
for label, content in sorted_labels:
|
||||||
# 1. Find PDF path
|
# 1. Find PDF path
|
||||||
copie_folder = f"Copie{student_id}"
|
copie_folder = f"Copie{student_id}"
|
||||||
pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf")
|
pdf_full_path = Path(root_dir) / "Copies" / copie_folder / f"{label}.pdf"
|
||||||
pdf_full_path = os.path.join(root_dir, pdf_rel_path)
|
|
||||||
|
|
||||||
if not os.path.exists(pdf_full_path):
|
if not os.path.exists(pdf_full_path):
|
||||||
print(f"File not found: {pdf_full_path}")
|
print(f"File not found: {pdf_full_path}")
|
||||||
|
|
@ -629,13 +627,14 @@ def process_correction(root_dir, data, all_labels, overwrite=False):
|
||||||
# # Wait for all threads to complete
|
# # Wait for all threads to complete
|
||||||
# concurrent.futures.wait(futures)
|
# concurrent.futures.wait(futures)
|
||||||
|
|
||||||
# Ne pas thread cette applications
|
# Ne pas thread cette application
|
||||||
# 1. Il faut protéger les appels à matplotlib
|
# 1. Il faut protéger les appels à matplotlib
|
||||||
# 2. tu vas perdre les erreurs
|
# 2. tu vas perdre les erreurs
|
||||||
for student_id, labels in sorted(data.items()):
|
for student_id, labels in sorted(data.items()):
|
||||||
process_student(student_id, labels, root_dir, all_labels, overwrite)
|
process_student(student_id, labels, root_dir, all_labels, overwrite)
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import utils
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Annotate copies")
|
parser = argparse.ArgumentParser(description="Annotate copies")
|
||||||
|
|
@ -644,7 +643,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
root_dir = args.root_dir
|
root_dir = args.root_dir
|
||||||
labels = list(filter(None, (Path(root_dir) / "labels").read_text().splitlines()))
|
labels = utils.read_all_labels(root_dir)
|
||||||
results = make_dictionary(root_dir)
|
results = make_dictionary(root_dir)
|
||||||
# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates}
|
# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates}
|
||||||
# Coordinates are the real coordinates (hmin, hmax) of the image in the Group
|
# Coordinates are the real coordinates (hmin, hmax) of the image in the Group
|
||||||
|
|
|
||||||
|
|
@ -38,14 +38,20 @@ for path_str in args.paths:
|
||||||
# Handle individual file
|
# Handle individual file
|
||||||
# Note: assumes structure InterroTest/Ex 2/Group_1.jpg to get parents[1]
|
# Note: assumes structure InterroTest/Ex 2/Group_1.jpg to get parents[1]
|
||||||
label = arg_path.parent.name
|
label = arg_path.parent.name
|
||||||
|
INPUT_DIR = arg_path.parent.parent.parent
|
||||||
|
COPIES_DIR = INPUT_DIR / "Copies"
|
||||||
|
GROUPS_DIR = INPUT_DIR / "Par label"
|
||||||
tasks.append((str(arg_path), label))
|
tasks.append((str(arg_path), label))
|
||||||
if label not in results:
|
if label not in results:
|
||||||
results[label] = []
|
results[label] = []
|
||||||
|
|
||||||
elif arg_path.is_dir():
|
elif arg_path.is_dir():
|
||||||
|
INPUT_DIR = arg_path
|
||||||
|
COPIES_DIR = INPUT_DIR / "Copies"
|
||||||
|
GROUPS_DIR = INPUT_DIR / "Par label"
|
||||||
# Handle directory (original behavior)
|
# Handle directory (original behavior)
|
||||||
for sub in arg_path.iterdir():
|
for sub in GROUPS_DIR.iterdir():
|
||||||
if sub.is_dir() and sub.name.startswith("Ex"):
|
if sub.is_dir():
|
||||||
label = sub.name
|
label = sub.name
|
||||||
if label not in results:
|
if label not in results:
|
||||||
results[label] = []
|
results[label] = []
|
||||||
|
|
@ -145,7 +151,7 @@ do not score or give feedback to any other question."""
|
||||||
|
|
||||||
def make_prompt(full_label):
|
def make_prompt(full_label):
|
||||||
def read_longest_prefix_file(subdir):
|
def read_longest_prefix_file(subdir):
|
||||||
dir_path = Path(INPUT_DIR) / subdir
|
dir_path = INPUT_DIR / subdir
|
||||||
matches = [f for f in dir_path.iterdir()
|
matches = [f for f in dir_path.iterdir()
|
||||||
if f.is_file()
|
if f.is_file()
|
||||||
and full_label.startswith(f.name)
|
and full_label.startswith(f.name)
|
||||||
|
|
@ -167,7 +173,6 @@ from google.genai import types
|
||||||
import base64
|
import base64
|
||||||
import shlex
|
import shlex
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
|
@ -210,7 +215,7 @@ def flush_thread_log(tid=None):
|
||||||
tid = tid or threading.current_thread().name
|
tid = tid or threading.current_thread().name
|
||||||
with log_lock:
|
with log_lock:
|
||||||
if thread_logs.get(tid):
|
if thread_logs.get(tid):
|
||||||
with open(Path(INPUT_DIR) / "correction_log", "a", encoding="utf-8") as f:
|
with open(INPUT_DIR / "correction_log", "a", encoding="utf-8") as f:
|
||||||
f.write(f"--- Task Log [{tid}] ---\n")
|
f.write(f"--- Task Log [{tid}] ---\n")
|
||||||
f.write("\n".join(thread_logs[tid]) + "\n\n")
|
f.write("\n".join(thread_logs[tid]) + "\n\n")
|
||||||
thread_logs[tid].clear()
|
thread_logs[tid].clear()
|
||||||
|
|
@ -311,8 +316,8 @@ def generate_request(file, full_label):
|
||||||
return (contents, generate_content_config)
|
return (contents, generate_content_config)
|
||||||
|
|
||||||
client = genai.Client(api_key=api_key)
|
client = genai.Client(api_key=api_key)
|
||||||
output_path = Path(INPUT_DIR) / "correction.json"
|
output_path = INPUT_DIR / "correction.json"
|
||||||
progress_path = Path(INPUT_DIR) / "correction_progress.json"
|
progress_path = INPUT_DIR / "correction_progress.json"
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
overwrite = args.overwrite
|
overwrite = args.overwrite
|
||||||
limit = args.limit
|
limit = args.limit
|
||||||
|
|
@ -407,9 +412,9 @@ def get_single_image_bytes(pdf_path):
|
||||||
return img_byte_arr.getvalue()
|
return img_byte_arr.getvalue()
|
||||||
|
|
||||||
def correct_boxes_with_gemini(pid, label, original_feedbacks,
|
def correct_boxes_with_gemini(pid, label, original_feedbacks,
|
||||||
root_dir, yming, ymaxg, width_r, total_height):
|
yming, ymaxg, width_r, total_height):
|
||||||
"""Requests corrected bounding boxes from Gemini Flash on the single image."""
|
"""Requests corrected bounding boxes from Gemini Flash on the single image."""
|
||||||
pdf_path = Path(root_dir) / f"Copie{pid}" / f"{label}.pdf"
|
pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
|
||||||
img_bytes = get_single_image_bytes(pdf_path)
|
img_bytes = get_single_image_bytes(pdf_path)
|
||||||
|
|
||||||
localized_feedbacks = [f for f in original_feedbacks if f["box_2d"]]
|
localized_feedbacks = [f for f in original_feedbacks if f["box_2d"]]
|
||||||
|
|
@ -473,9 +478,9 @@ it goes wrong, and the feedback is what went wrong.
|
||||||
import shutil
|
import shutil
|
||||||
import grouping
|
import grouping
|
||||||
|
|
||||||
def get_next_group_idx(root_dir, label):
|
def get_next_group_idx(label):
|
||||||
"""Finds the next available Group index for a given label."""
|
"""Finds the next available Group index for a given label."""
|
||||||
target_folder = Path(root_dir) / label
|
target_folder = GROUPS_DIR / label
|
||||||
target_folder.mkdir(exist_ok=True)
|
target_folder.mkdir(exist_ok=True)
|
||||||
existing = list(target_folder.glob("Group_*.jpg"))
|
existing = list(target_folder.glob("Group_*.jpg"))
|
||||||
if not existing: return 0
|
if not existing: return 0
|
||||||
|
|
@ -489,7 +494,7 @@ def handle_label_errors(pid, label, res, pdf_path):
|
||||||
error_type = res.get("error")
|
error_type = res.get("error")
|
||||||
|
|
||||||
all_labels = read_all_labels(INPUT_DIR)
|
all_labels = read_all_labels(INPUT_DIR)
|
||||||
labels_txt = (Path(INPUT_DIR) / "labels").read_text(encoding="utf-8", errors="replace")
|
labels_txt = (INPUT_DIR / "labels").read_text(encoding="utf-8", errors="replace")
|
||||||
enonce = enonce_total(INPUT_DIR)
|
enonce = enonce_total(INPUT_DIR)
|
||||||
|
|
||||||
if error_type == "wrong-label":
|
if error_type == "wrong-label":
|
||||||
|
|
@ -523,7 +528,7 @@ Here is a list of all possible labels. You need to answer with one of these :
|
||||||
if new_label == label:
|
if new_label == label:
|
||||||
res["error"] = ""
|
res["error"] = ""
|
||||||
return []
|
return []
|
||||||
new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{new_label}.pdf"
|
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf"
|
||||||
if new_pdf_path.exists():
|
if new_pdf_path.exists():
|
||||||
tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
|
tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
|
||||||
res["error"] = f"wrg-lbl:{new_label}?exists"
|
res["error"] = f"wrg-lbl:{new_label}?exists"
|
||||||
|
|
@ -533,12 +538,12 @@ Here is a list of all possible labels. You need to answer with one of these :
|
||||||
shutil.move(str(pdf_path), str(new_pdf_path))
|
shutil.move(str(pdf_path), str(new_pdf_path))
|
||||||
# Since we moved the file, this Copie/label should not be taken
|
# Since we moved the file, this Copie/label should not be taken
|
||||||
# into account in the future, I think
|
# into account in the future, I think
|
||||||
idx = get_next_group_idx(INPUT_DIR, new_label)
|
idx = get_next_group_idx(new_label)
|
||||||
height = grouping.get_pdf_height(str(new_pdf_path))
|
height = grouping.get_pdf_height(str(new_pdf_path))
|
||||||
grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)],
|
grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)],
|
||||||
INPUT_DIR)
|
GROUPS_DIR)
|
||||||
tprint(f"\t\tMaking {new_label} group {idx+1}")
|
tprint(f"\t\tMaking {new_label} group {idx+1}")
|
||||||
new_tasks.append((str(Path(INPUT_DIR) / new_label / f"Group_{idx+1}.jpg"),
|
new_tasks.append((str(GROUPS_DIR / new_label / f"Group_{idx+1}.jpg"),
|
||||||
new_label, False))
|
new_label, False))
|
||||||
|
|
||||||
elif error_type == "additional-answer":
|
elif error_type == "additional-answer":
|
||||||
|
|
@ -580,15 +585,15 @@ Here is a list of all possible labels. You need to answer with a list one of the
|
||||||
error += f"{add_label}??"
|
error += f"{add_label}??"
|
||||||
keep_error = True
|
keep_error = True
|
||||||
continue
|
continue
|
||||||
new_pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{add_label}.pdf"
|
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf"
|
||||||
if not new_pdf_path.exists():
|
if not new_pdf_path.exists():
|
||||||
shutil.copy(str(pdf_path), str(new_pdf_path))
|
shutil.copy(str(pdf_path), str(new_pdf_path))
|
||||||
tprint(f"\t\tCopying Copie{pid} : {label} -> {add_label}")
|
tprint(f"\t\tCopying Copie{pid} : {label} -> {add_label}")
|
||||||
idx = get_next_group_idx(INPUT_DIR, add_label)
|
idx = get_next_group_idx(add_label)
|
||||||
tprint(f"\t\tMaking {add_label} group {idx+1}")
|
tprint(f"\t\tMaking {add_label} group {idx+1}")
|
||||||
height = grouping.get_pdf_height(str(new_pdf_path))
|
height = grouping.get_pdf_height(str(new_pdf_path))
|
||||||
grouping.create_jpg(add_label, idx, [(pid, str(new_pdf_path), height)], INPUT_DIR)
|
grouping.create_jpg(add_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR)
|
||||||
new_tasks.append((str(Path(INPUT_DIR) / add_label / f"Group_{idx+1}.jpg"),
|
new_tasks.append((str(GROUPS_DIR / add_label / f"Group_{idx+1}.jpg"),
|
||||||
add_label, False))
|
add_label, False))
|
||||||
error += f"(->){add_label}"
|
error += f"(->){add_label}"
|
||||||
keep_error = True
|
keep_error = True
|
||||||
|
|
@ -657,7 +662,7 @@ def process_single_task(task_tuple, precomputed_response=None):
|
||||||
res = p["result"]
|
res = p["result"]
|
||||||
yming, ymaxg, width_r = d_data[pid]
|
yming, ymaxg, width_r = d_data[pid]
|
||||||
|
|
||||||
pdf_path = Path(INPUT_DIR) / f"Copie{pid}" / f"{label}.pdf"
|
pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
|
||||||
if (not can_spawn_tasks) and res["error"] == "additional-answer":
|
if (not can_spawn_tasks) and res["error"] == "additional-answer":
|
||||||
tprint("\tSwallowing an additional-answer from a subsequent task.")
|
tprint("\tSwallowing an additional-answer from a subsequent task.")
|
||||||
res["error"]= ""
|
res["error"]= ""
|
||||||
|
|
@ -680,17 +685,22 @@ def process_single_task(task_tuple, precomputed_response=None):
|
||||||
pid, label, group_name)
|
pid, label, group_name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (ymin < yming - 50 or
|
if (ymin < yming - 50 or ymax > ymaxg + 50 or xmax / 1000 > width_r):
|
||||||
ymax > ymaxg + 50 or
|
|
||||||
xmax / 1000 > width_r):
|
|
||||||
needs_correction.append(i)
|
needs_correction.append(i)
|
||||||
break
|
break
|
||||||
|
if ymin < yming - 5:
|
||||||
|
ymin = yming - 5
|
||||||
|
b[0] = ymin * 1000 // total_height
|
||||||
|
if ymax > ymaxg + 5:
|
||||||
|
ymax = ymaxg + 5
|
||||||
|
b[2] = ymax * 1000 // total_height
|
||||||
|
|
||||||
|
|
||||||
if needs_correction:
|
if needs_correction:
|
||||||
tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...")
|
tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...")
|
||||||
try:
|
try:
|
||||||
res["feedback"] = correct_boxes_with_gemini(
|
res["feedback"] = correct_boxes_with_gemini(
|
||||||
pid, label, res["feedback"], INPUT_DIR,
|
pid, label, res["feedback"],
|
||||||
yming, ymaxg, width_r, total_height)
|
yming, ymaxg, width_r, total_height)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
tprint(f"\tCorrection failed for Copie {pid}, {group_name} : {e}\n\tRemoving the boxes")
|
tprint(f"\tCorrection failed for Copie {pid}, {group_name} : {e}\n\tRemoving the boxes")
|
||||||
|
|
@ -726,8 +736,8 @@ def process_single_task(task_tuple, precomputed_response=None):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if args.refaire:
|
if args.refaire:
|
||||||
refaire_path = Path(INPUT_DIR) / "refaire.json"
|
refaire_path = INPUT_DIR / "refaire.json"
|
||||||
overwritten_path = Path(INPUT_DIR) / "overwritten_correction.json"
|
overwritten_path = INPUT_DIR / "overwritten_correction.json"
|
||||||
|
|
||||||
if refaire_path.exists():
|
if refaire_path.exists():
|
||||||
with open(refaire_path, "r", encoding="utf-8") as f:
|
with open(refaire_path, "r", encoding="utf-8") as f:
|
||||||
|
|
@ -742,7 +752,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
for copie_name, labels in refaire_list:
|
for copie_name, labels in refaire_list:
|
||||||
pid = copie_name.replace("Copie", "")
|
pid = copie_name.replace("Copie", "")
|
||||||
copie_dir = Path(INPUT_DIR) / copie_name
|
copie_dir = COPIES_DIR / copie_name
|
||||||
|
|
||||||
# If list is empty, redo all labels available for this Copie
|
# If list is empty, redo all labels available for this Copie
|
||||||
if not labels:
|
if not labels:
|
||||||
|
|
@ -772,10 +782,10 @@ if __name__ == "__main__":
|
||||||
# 2. Make new group and add to tasks
|
# 2. Make new group and add to tasks
|
||||||
pdf_path = copie_dir / f"{label}.pdf"
|
pdf_path = copie_dir / f"{label}.pdf"
|
||||||
if pdf_path.exists():
|
if pdf_path.exists():
|
||||||
idx = get_next_group_idx(INPUT_DIR, label)
|
idx = get_next_group_idx(label)
|
||||||
height = grouping.get_pdf_height(str(pdf_path))
|
height = grouping.get_pdf_height(str(pdf_path))
|
||||||
grouping.create_jpg(label, idx, [(pid, str(pdf_path), height)], INPUT_DIR)
|
grouping.create_jpg(label, idx, [(pid, str(pdf_path), height)], GROUPS_DIR)
|
||||||
new_group_path = str(Path(INPUT_DIR) / label / f"Group_{idx+1}.jpg")
|
new_group_path = str(GROUPS_DIR / label / f"Group_{idx+1}.jpg")
|
||||||
tasks_to_process.append((new_group_path, label))
|
tasks_to_process.append((new_group_path, label))
|
||||||
|
|
||||||
if dirty_results:
|
if dirty_results:
|
||||||
|
|
@ -813,8 +823,8 @@ if __name__ == "__main__":
|
||||||
tasks_to_process = [] # Run nothing live if just `--batch`
|
tasks_to_process = [] # Run nothing live if just `--batch`
|
||||||
|
|
||||||
if batch_tasks:
|
if batch_tasks:
|
||||||
batch_flash_file = Path(INPUT_DIR) / "batch_requests_flash.jsonl"
|
batch_flash_file = INPUT_DIR / "batch_requests_flash.jsonl"
|
||||||
batch_pro_file = Path(INPUT_DIR) / "batch_requests_pro.jsonl"
|
batch_pro_file = INPUT_DIR / "batch_requests_pro.jsonl"
|
||||||
|
|
||||||
count_flash = 0
|
count_flash = 0
|
||||||
count_pro = 0
|
count_pro = 0
|
||||||
|
|
@ -873,7 +883,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
batched_responses = {}
|
batched_responses = {}
|
||||||
if args.deal_with_batched:
|
if args.deal_with_batched:
|
||||||
batch_results_path = Path(INPUT_DIR) / "batched_correction_result.jsonl"
|
batch_results_path = INPUT_DIR / "batched_correction_result.jsonl"
|
||||||
if batch_results_path.exists():
|
if batch_results_path.exists():
|
||||||
print(f"Loading batch results from {batch_results_path}...")
|
print(f"Loading batch results from {batch_results_path}...")
|
||||||
with open(batch_results_path, "r", encoding="utf-8") as f:
|
with open(batch_results_path, "r", encoding="utf-8") as f:
|
||||||
|
|
|
||||||
26
cutleft.py
26
cutleft.py
|
|
@ -20,17 +20,35 @@ if len(sys.argv) < 2:
|
||||||
path_arg = sys.argv[1]
|
path_arg = sys.argv[1]
|
||||||
files = []
|
files = []
|
||||||
INPUT_DIR = ""
|
INPUT_DIR = ""
|
||||||
|
COPIES_DIR = ""
|
||||||
|
|
||||||
if os.path.isfile(path_arg) and path_arg.lower().endswith('.pdf'):
|
if os.path.isfile(path_arg) and path_arg.lower().endswith('.pdf'):
|
||||||
INPUT_DIR = os.path.dirname(path_arg)
|
COPIES_DIR = os.path.abspath(os.path.dirname(path_arg))
|
||||||
|
# If the file is inside a "Copies" folder, set INPUT_DIR to the parent
|
||||||
|
if os.path.basename(COPIES_DIR).lower() == 'copies':
|
||||||
|
INPUT_DIR = os.path.dirname(COPIES_DIR)
|
||||||
|
else:
|
||||||
|
INPUT_DIR = COPIES_DIR
|
||||||
files = [os.path.basename(path_arg)]
|
files = [os.path.basename(path_arg)]
|
||||||
elif os.path.isdir(path_arg):
|
elif os.path.isdir(path_arg):
|
||||||
INPUT_DIR = path_arg
|
# Support passing either the base dir or the Copies dir directly
|
||||||
files = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith('.pdf') and
|
abs_path = os.path.abspath(path_arg)
|
||||||
|
if os.path.basename(abs_path).lower() == 'copies':
|
||||||
|
COPIES_DIR = abs_path
|
||||||
|
INPUT_DIR = os.path.dirname(abs_path)
|
||||||
|
else:
|
||||||
|
INPUT_DIR = abs_path
|
||||||
|
COPIES_DIR = os.path.join(INPUT_DIR, 'Copies')
|
||||||
|
|
||||||
|
if os.path.exists(COPIES_DIR):
|
||||||
|
files = sorted([f for f in os.listdir(COPIES_DIR) if f.lower().endswith('.pdf') and
|
||||||
"nonc" not in f.lower()])
|
"nonc" not in f.lower()])
|
||||||
|
else:
|
||||||
|
sys.exit(f"Error: Could not find 'Copies' directory inside {INPUT_DIR}")
|
||||||
else:
|
else:
|
||||||
sys.exit("Error: Input must be a directory or a PDF file.")
|
sys.exit("Error: Input must be a directory or a PDF file.")
|
||||||
|
|
||||||
|
|
||||||
OUTPUT_DIR = os.path.join(INPUT_DIR, 'Cutleft')
|
OUTPUT_DIR = os.path.join(INPUT_DIR, 'Cutleft')
|
||||||
|
|
||||||
if not os.path.exists(OUTPUT_DIR):
|
if not os.path.exists(OUTPUT_DIR):
|
||||||
|
|
@ -90,7 +108,7 @@ pdf_cache_lock = threading.Lock()
|
||||||
|
|
||||||
@lru_cache(maxsize=3)
|
@lru_cache(maxsize=3)
|
||||||
def _get_pdf_pages_cached(filename):
|
def _get_pdf_pages_cached(filename):
|
||||||
pdf_path = os.path.join(INPUT_DIR, filename)
|
pdf_path = os.path.join(COPIES_DIR, filename)
|
||||||
return convert_from_path(pdf_path)
|
return convert_from_path(pdf_path)
|
||||||
|
|
||||||
def get_pdf_pages(filename):
|
def get_pdf_pages(filename):
|
||||||
|
|
|
||||||
|
|
@ -250,7 +250,7 @@ def process_copy_group(group_key, files):
|
||||||
for image_file in files:
|
for image_file in files:
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
base_name = image_file.stem
|
base_name = image_file.stem
|
||||||
output_json = INPUT_DIR / f"{base_name}.json"
|
output_json = INPUT_DIR / "Copies" / f"{base_name}.json"
|
||||||
|
|
||||||
# Check existing
|
# Check existing
|
||||||
if output_json.exists() and not args.overwrite:
|
if output_json.exists() and not args.overwrite:
|
||||||
|
|
|
||||||
21
grouping.py
21
grouping.py
|
|
@ -3,6 +3,7 @@ import json
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
@ -213,9 +214,9 @@ def create_jpg(identifier, group_index, group, root_dir):
|
||||||
from utils import natural_key
|
from utils import natural_key
|
||||||
|
|
||||||
|
|
||||||
def process_identifier(identifier, files_info, root_dir):
|
def process_identifier(identifier, files_info, output_dir):
|
||||||
# Clear output directory if it exists
|
# Clear output directory if it exists
|
||||||
target_folder = os.path.join(root_dir, identifier)
|
target_folder = os.path.join(output_dir, identifier)
|
||||||
if os.path.exists(target_folder):
|
if os.path.exists(target_folder):
|
||||||
shutil.rmtree(target_folder)
|
shutil.rmtree(target_folder)
|
||||||
os.makedirs(target_folder, exist_ok=True)
|
os.makedirs(target_folder, exist_ok=True)
|
||||||
|
|
@ -224,27 +225,31 @@ def process_identifier(identifier, files_info, root_dir):
|
||||||
file_groups = group_files(files_info)
|
file_groups = group_files(files_info)
|
||||||
|
|
||||||
for idx, group in enumerate(file_groups):
|
for idx, group in enumerate(file_groups):
|
||||||
create_jpg(identifier, idx, group, root_dir)
|
create_jpg(identifier, idx, group, output_dir)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print("Usage: python app.py <Path_to_Dir>")
|
print("Usage: python app.py <Path_to_Dir>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
root_dir = sys.argv[1]
|
root_dir = Path(sys.argv[1])
|
||||||
|
|
||||||
|
copies_dir = root_dir / "Copies"
|
||||||
|
par_label_dir = root_dir / "Par label"
|
||||||
|
|
||||||
print("Scanning files...")
|
print("Scanning files...")
|
||||||
data = collect_files(root_dir)
|
data = collect_files(copies_dir)
|
||||||
|
|
||||||
print(f"Found {len(data)} identifiers. Processing...")
|
print(f"Found {len(data)} identifiers. Processing...")
|
||||||
|
|
||||||
# Sort identifiers naturally
|
# Sort identifiers naturally
|
||||||
sorted_identifiers = sorted(data.keys(), key=natural_key)
|
sorted_identifiers = sorted(data.keys(), key=natural_key)
|
||||||
|
|
||||||
# Process using 4 threads
|
# Process using 8 threads
|
||||||
with ThreadPoolExecutor(max_workers=4) as executor:
|
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||||
for identifier in sorted_identifiers:
|
for identifier in sorted_identifiers:
|
||||||
executor.submit(process_identifier, identifier, data[identifier], root_dir)
|
executor.submit(process_identifier, identifier, data[identifier],
|
||||||
|
par_label_dir)
|
||||||
|
|
||||||
print("Done.")
|
print("Done.")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,9 @@ class PDFPreviewer:
|
||||||
# Check for existing original in backup and restore if found
|
# Check for existing original in backup and restore if found
|
||||||
dir_name = os.path.dirname(os.path.abspath(path))
|
dir_name = os.path.dirname(os.path.abspath(path))
|
||||||
file_name = os.path.basename(path)
|
file_name = os.path.basename(path)
|
||||||
|
if os.path.basename(dir_name) == "Copies":
|
||||||
|
dir_name = os.path.dirname(dir_name)
|
||||||
|
path = os.path.join(dir_name, file_name)
|
||||||
backup_path = os.path.join(dir_name, "Copies Originales", file_name)
|
backup_path = os.path.join(dir_name, "Copies Originales", file_name)
|
||||||
|
|
||||||
if os.path.exists(backup_path):
|
if os.path.exists(backup_path):
|
||||||
|
|
@ -313,9 +316,12 @@ class PDFPreviewer:
|
||||||
file_name = os.path.basename(abs_path)
|
file_name = os.path.basename(abs_path)
|
||||||
|
|
||||||
backup_dir = os.path.join(dir_name, "Copies Originales")
|
backup_dir = os.path.join(dir_name, "Copies Originales")
|
||||||
|
copies_dir = os.path.join(dir_name, "Copies")
|
||||||
os.makedirs(backup_dir, exist_ok=True)
|
os.makedirs(backup_dir, exist_ok=True)
|
||||||
|
os.makedirs(copies_dir, exist_ok=True)
|
||||||
|
|
||||||
backup_path = os.path.join(backup_dir, file_name)
|
backup_path = os.path.join(backup_dir, file_name)
|
||||||
|
copies_path = os.path.join(copies_dir, file_name)
|
||||||
|
|
||||||
# Remove backup if it already exists (overwrite)
|
# Remove backup if it already exists (overwrite)
|
||||||
if os.path.exists(backup_path):
|
if os.path.exists(backup_path):
|
||||||
|
|
@ -325,7 +331,7 @@ class PDFPreviewer:
|
||||||
shutil.move(self.pdf_path, backup_path)
|
shutil.move(self.pdf_path, backup_path)
|
||||||
|
|
||||||
# Move the temp output file to replace the original
|
# Move the temp output file to replace the original
|
||||||
shutil.move(self.final_file, self.pdf_path)
|
shutil.move(self.final_file, copies_path)
|
||||||
|
|
||||||
# print(f"Original moved to {backup_path}, new file saved at {self.pdf_path}")
|
# print(f"Original moved to {backup_path}, new file saved at {self.pdf_path}")
|
||||||
|
|
||||||
|
|
|
||||||
31
plotting.py
31
plotting.py
|
|
@ -101,7 +101,7 @@ def worker_thread(base_dir, files_to_process, all_labels):
|
||||||
previous_copie = None
|
previous_copie = None
|
||||||
last_label_index = None
|
last_label_index = None
|
||||||
for img_path in files_to_process:
|
for img_path in files_to_process:
|
||||||
json_path = base_dir / f"{img_path.stem}.json"
|
json_path = base_dir / "Copies" / f"{img_path.stem}.json"
|
||||||
copie_part = int(img_path.stem[-2:])
|
copie_part = int(img_path.stem[-2:])
|
||||||
copie = img_path.stem[:-3]
|
copie = img_path.stem[:-3]
|
||||||
if copie != previous_copie:
|
if copie != previous_copie:
|
||||||
|
|
@ -222,7 +222,7 @@ class ImageViewer:
|
||||||
def save_current_batch(self):
|
def save_current_batch(self):
|
||||||
"""Writes the accumulated data to the main JSON file."""
|
"""Writes the accumulated data to the main JSON file."""
|
||||||
if self.active_copie_name and self.accumulated_results:
|
if self.active_copie_name and self.accumulated_results:
|
||||||
main_json_path = self.base_dir / f"{self.active_copie_name}.json"
|
main_json_path = self.base_dir / "Copies" / f"{self.active_copie_name}.json"
|
||||||
print(f"Writing aggregated result to {main_json_path}")
|
print(f"Writing aggregated result to {main_json_path}")
|
||||||
with open(main_json_path, 'w') as f:
|
with open(main_json_path, 'w') as f:
|
||||||
json.dump(self.accumulated_results, f)
|
json.dump(self.accumulated_results, f)
|
||||||
|
|
@ -327,7 +327,7 @@ class ImageViewer:
|
||||||
def on_open_ori_pdf(self, event):
|
def on_open_ori_pdf(self, event):
|
||||||
if self.is_viewing and self.current_json_path:
|
if self.is_viewing and self.current_json_path:
|
||||||
new_filename = self.current_json_path.stem.split('_')[0] + ".pdf"
|
new_filename = self.current_json_path.stem.split('_')[0] + ".pdf"
|
||||||
pdf_path = self.current_json_path.parent / "Copies Originales" / new_filename
|
pdf_path = self.base_dir / "Copies Originales" / new_filename
|
||||||
print(f"Opening {pdf_path}")
|
print(f"Opening {pdf_path}")
|
||||||
subprocess.Popen(['xdg-open', str(pdf_path.absolute())])
|
subprocess.Popen(['xdg-open', str(pdf_path.absolute())])
|
||||||
|
|
||||||
|
|
@ -363,20 +363,21 @@ if __name__ == "__main__":
|
||||||
files_to_process = []
|
files_to_process = []
|
||||||
|
|
||||||
if input_path.is_file():
|
if input_path.is_file():
|
||||||
|
# Correctly identify base_dir if we are in 'Copies' or 'Cutleft'
|
||||||
base_dir = input_path.parent
|
if input_path.parent.name in ["Copies", "Cutleft"]:
|
||||||
stem = input_path.stem
|
|
||||||
img_path = base_dir / "Cutleft" / f"{stem}.jpg"
|
|
||||||
files_to_process = [img_path]
|
|
||||||
if not img_path.exists() and input_path.parent.name == "Cutleft":
|
|
||||||
base_dir = input_path.parent.parent
|
base_dir = input_path.parent.parent
|
||||||
img_path = input_path
|
else:
|
||||||
files_to_process = [img_path]
|
base_dir = input_path.parent
|
||||||
if not img_path.exists():
|
|
||||||
# We're given Copie01.pdf, look for parts
|
stem = input_path.stem
|
||||||
cutleft_dir = base_dir / "Cutleft"
|
cutleft_dir = base_dir / "Cutleft"
|
||||||
files_to_process = sorted(list(cutleft_dir.glob(f"{img_path.stem}_*.jpg")),
|
img_path = cutleft_dir / f"{stem}.jpg"
|
||||||
key=natural_key)
|
|
||||||
|
if img_path.exists():
|
||||||
|
files_to_process = [img_path]
|
||||||
|
else:
|
||||||
|
# We're given something like Copie01.pdf, look for its split image parts
|
||||||
|
files_to_process = sorted(list(cutleft_dir.glob(f"{stem}_*.jpg")), key=natural_key)
|
||||||
else:
|
else:
|
||||||
base_dir = input_path
|
base_dir = input_path
|
||||||
cutleft_dir = base_dir / "Cutleft"
|
cutleft_dir = base_dir / "Cutleft"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import os
|
||||||
import json
|
import json
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import shutil
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
from PIL import Image, ImageChops, ImageFilter
|
from PIL import Image, ImageChops, ImageFilter
|
||||||
Image.MAX_IMAGE_PIXELS = None
|
Image.MAX_IMAGE_PIXELS = None
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
|
|
@ -99,7 +100,7 @@ def detect_checks_and_notes(output_dir):
|
||||||
density = changed_pixels / roi.size
|
density = changed_pixels / roi.size
|
||||||
|
|
||||||
if density > DENSITY_THRESHOLD:
|
if density > DENSITY_THRESHOLD:
|
||||||
print("A checked box !", density, b)
|
# print("A checked box !", density, b)
|
||||||
actions.append(box)
|
actions.append(box)
|
||||||
# It's checked, so we mask this area out for manual notes
|
# It's checked, so we mask this area out for manual notes
|
||||||
# Expand mask slightly to catch sloppy ticks
|
# Expand mask slightly to catch sloppy ticks
|
||||||
|
|
@ -254,7 +255,7 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye
|
||||||
|
|
||||||
# B. Regenerate Label Image
|
# B. Regenerate Label Image
|
||||||
# We always regenerate to ensure Concat.jpg is consistent with any modifications
|
# We always regenerate to ensure Concat.jpg is consistent with any modifications
|
||||||
pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
|
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
|
||||||
if not os.path.exists(pdf_path): continue
|
if not os.path.exists(pdf_path): continue
|
||||||
|
|
||||||
(base_img, _, _) = annotating.make_base_image(pdf_path)
|
(base_img, _, _) = annotating.make_base_image(pdf_path)
|
||||||
|
|
@ -328,7 +329,6 @@ def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_laye
|
||||||
full_img.save(os.path.join(output_dir, "Concat_F.jpg"))
|
full_img.save(os.path.join(output_dir, "Concat_F.jpg"))
|
||||||
print(f" Saved regenerated Concat_F.jpg")
|
print(f" Saved regenerated Concat_F.jpg")
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from utils import read_all_labels
|
from utils import read_all_labels
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,8 @@ def save_paginated_pdf(image_groups, output_path):
|
||||||
if pages:
|
if pages:
|
||||||
pages[0].save(output_path, "PDF", resolution=100.0, save_all=True, append_images=pages[1:])
|
pages[0].save(output_path, "PDF", resolution=100.0, save_all=True, append_images=pages[1:])
|
||||||
|
|
||||||
def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, label_notes, all_labels):
|
def apply_actions_and_regenerate_grouped(root_dir, data, student_id,
|
||||||
|
actions, label_notes, all_labels):
|
||||||
"""
|
"""
|
||||||
Modifies data based on actions, pastes label-specific note crops,
|
Modifies data based on actions, pastes label-specific note crops,
|
||||||
regenerates label images for consistency, saves dirty ones,
|
regenerates label images for consistency, saves dirty ones,
|
||||||
|
|
@ -161,7 +162,7 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la
|
||||||
result = content['result']
|
result = content['result']
|
||||||
d_notes[label] = str(result.get('score', 0))
|
d_notes[label] = str(result.get('score', 0))
|
||||||
|
|
||||||
pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
|
pdf_path = Path(root_dir) / "Copies" / f"Copie{student_id}" / f"{label}.pdf"
|
||||||
if not os.path.exists(pdf_path): continue
|
if not os.path.exists(pdf_path): continue
|
||||||
|
|
||||||
(base_img, _, _) = annotating.make_base_image(pdf_path)
|
(base_img, _, _) = annotating.make_base_image(pdf_path)
|
||||||
|
|
@ -204,13 +205,15 @@ def apply_actions_and_regenerate_grouped(root_dir, data, student_id, actions, la
|
||||||
concat_list.append(final_img)
|
concat_list.append(final_img)
|
||||||
|
|
||||||
perfect_no_comment = True
|
perfect_no_comment = True
|
||||||
if float(d_notes[label]) != 4.0:
|
if float(d_notes[label]) < 4.0:
|
||||||
perfect_no_comment = False
|
perfect_no_comment = False
|
||||||
else:
|
else:
|
||||||
if len(result.get('feedback', [])) != 0:
|
lfb = result.get('feedback', [])
|
||||||
|
for e in lfb:
|
||||||
|
if "to_delete" not in e or not e["to_delete"]:
|
||||||
perfect_no_comment = False
|
perfect_no_comment = False
|
||||||
|
|
||||||
if not perfect_no_comment:
|
if not perfect_no_comment or has_notes:
|
||||||
extras = get_extra_pdfs_as_images(root_dir, label, annotating)
|
extras = get_extra_pdfs_as_images(root_dir, label, annotating)
|
||||||
extras.append(final_img)
|
extras.append(final_img)
|
||||||
concat_list_F.append(extras)
|
concat_list_F.append(extras)
|
||||||
|
|
@ -333,7 +336,8 @@ if __name__ == "__main__":
|
||||||
if hmax > hmin:
|
if hmax > hmin:
|
||||||
crop = notes_img.crop((0, hmin, notes_img.width, hmax))
|
crop = notes_img.crop((0, hmin, notes_img.width, hmax))
|
||||||
if has_significant_notes(crop):
|
if has_significant_notes(crop):
|
||||||
notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)}
|
notes_by_student[sid][lbl] = {'img': crop,
|
||||||
|
'old_header_h': img_info.get("header_height", 0)}
|
||||||
|
|
||||||
|
|
||||||
def process_refaire_entry(sid, r_labels):
|
def process_refaire_entry(sid, r_labels):
|
||||||
|
|
@ -364,7 +368,9 @@ if __name__ == "__main__":
|
||||||
if hmax > hmin:
|
if hmax > hmin:
|
||||||
crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax))
|
crop = b_notes_img.crop((0, hmin, b_notes_img.width, hmax))
|
||||||
if has_significant_notes(crop):
|
if has_significant_notes(crop):
|
||||||
notes_by_student[sid][lbl] = {'img': crop, 'old_header_h': img_info.get("header_height", 0)}
|
notes_by_student[sid][lbl] = \
|
||||||
|
{'img': crop,
|
||||||
|
'old_header_h': img_info.get("header_height", 0)}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ def decode_json(pdf_file):
|
||||||
def split_an_interro(base_dir, input_pdf, coords_list):
|
def split_an_interro(base_dir, input_pdf, coords_list):
|
||||||
doc = fitz.open(input_pdf)
|
doc = fitz.open(input_pdf)
|
||||||
|
|
||||||
output_dir = base_dir / input_pdf.stem
|
output_dir = base_dir / "Copies" / input_pdf.stem
|
||||||
generated_files = set()
|
generated_files = set()
|
||||||
parts_by_label = defaultdict(list)
|
parts_by_label = defaultdict(list)
|
||||||
|
|
||||||
|
|
@ -197,10 +197,13 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
if input_arg.is_file():
|
if input_arg.is_file():
|
||||||
base_dir = input_arg.parent
|
base_dir = input_arg.parent
|
||||||
|
if base_dir.name == "Copies":
|
||||||
|
base_dir = base_dir.parent
|
||||||
pdf_files = [input_arg]
|
pdf_files = [input_arg]
|
||||||
elif input_arg.is_dir():
|
elif input_arg.is_dir():
|
||||||
base_dir = input_arg
|
base_dir = input_arg
|
||||||
pdf_files = sorted(base_dir.glob("*.pdf"))
|
copies_dir = base_dir / "Copies"
|
||||||
|
pdf_files = sorted(copies_dir.glob("*.pdf"))
|
||||||
else:
|
else:
|
||||||
print(f"Error: {input_arg} is not a valid file or directory.")
|
print(f"Error: {input_arg} is not a valid file or directory.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
|
||||||
4
utils.py
4
utils.py
|
|
@ -5,9 +5,7 @@ def natural_key(text):
|
||||||
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
|
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
|
||||||
|
|
||||||
def read_all_labels(base_dir):
|
def read_all_labels(base_dir):
|
||||||
return sorted(list(filter(None,
|
return list(filter(None, (Path(base_dir) / "labels").read_text().splitlines()))
|
||||||
(Path(base_dir) / "labels").read_text().splitlines())),
|
|
||||||
key = natural_key)
|
|
||||||
|
|
||||||
def enonce_total(base_dir):
|
def enonce_total(base_dir):
|
||||||
text_dir = Path(base_dir) / 'Text'
|
text_dir = Path(base_dir) / 'Text'
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue