Miscs
parent
a80187ba80
commit
27c0dae20e
16
Readme.org
16
Readme.org
|
|
@ -1,7 +1,7 @@
|
||||||
#+title: Script
|
#+title: Script
|
||||||
#+author: Sébastien Miquel
|
#+author: Sébastien Miquel
|
||||||
#+date: 14-03-2026
|
#+date: 14-03-2026
|
||||||
# Time-stamp: <06-06-26 10:10>
|
# Time-stamp: <06-06-26 16:23>
|
||||||
#+OPTIONS:
|
#+OPTIONS:
|
||||||
|
|
||||||
* Méta
|
* Méta
|
||||||
|
|
@ -85,7 +85,17 @@ export GEMINI_API_KEY=…
|
||||||
6. Suivre les étapes plus bas.
|
6. Suivre les étapes plus bas.
|
||||||
|
|
||||||
* Étapes et Script
|
* Étapes et Script
|
||||||
** Prétraitement
|
** Prétraitement de l'énoncé
|
||||||
|
|
||||||
|
- Dans le dossier de l'évaluation, mettre :
|
||||||
|
+ `enonce.pdf`
|
||||||
|
+ `enonce.tex`
|
||||||
|
+ `correction.tex`.
|
||||||
|
- `python gemini_for_enonce.py Interro`
|
||||||
|
Se charge de créer des dossiers `Text` et `Sol` avec
|
||||||
|
+ Le fichier `Text` contient
|
||||||
|
|
||||||
|
** Prétraitement des copies
|
||||||
|
|
||||||
1. =./rotate_all.sh Interro=
|
1. =./rotate_all.sh Interro=
|
||||||
(facultatif)
|
(facultatif)
|
||||||
|
|
@ -248,7 +258,7 @@ OU
|
||||||
4. On peut faire des changements manuels aux =score.json= ici, puis
|
4. On peut faire des changements manuels aux =score.json= ici, puis
|
||||||
- `python reading_annotations.py --update-score Interro`
|
- `python reading_annotations.py --update-score Interro`
|
||||||
- `python reading_grouped_annotations.py --update-score Interro`
|
- `python reading_grouped_annotations.py --update-score Interro`
|
||||||
pour mettre à jour les scores dans les images.
|
pour mettre à jour les scores dans les images.
|
||||||
4. (gestion perso)
|
4. (gestion perso)
|
||||||
+ =gestion_classe ne= pour créer l'interro puis
|
+ =gestion_classe ne= pour créer l'interro puis
|
||||||
+ =gestion_classe we= (set barème here)
|
+ =gestion_classe we= (set barème here)
|
||||||
|
|
|
||||||
133
correction.py
133
correction.py
|
|
@ -186,7 +186,7 @@ def call_gemini_with_retries(model_id, contents, config,
|
||||||
tprint(f"\tGemini Pro minute limit hit. Waiting {wait_time:.1f}s...")
|
tprint(f"\tGemini Pro minute limit hit. Waiting {wait_time:.1f}s...")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
continue # Retry same model
|
continue # Retry same model
|
||||||
|
|
||||||
# Immediately fallback to Flash without waiting if it's a Pro quota error
|
# Immediately fallback to Flash without waiting if it's a Pro quota error
|
||||||
if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
|
if is_quota_error and model_id == MODEL_ID_pro and fallback_model_id:
|
||||||
tprint(f"\tGemini Pro quota hit ({e}). \n\n\tFalling back to Flash permanently...")
|
tprint(f"\tGemini Pro quota hit ({e}). \n\n\tFalling back to Flash permanently...")
|
||||||
|
|
@ -269,8 +269,9 @@ def handle_label_errors(pid, label, res, pdf_path):
|
||||||
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf"
|
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf"
|
||||||
|
|
||||||
if base_new_pdf_path.exists() or new_pdf_path.exists():
|
if base_new_pdf_path.exists() or new_pdf_path.exists():
|
||||||
tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists.")
|
tprint(f"\t\tCopie{pid} tried to move wrong {label} to {new_label}, but it already exists. Delaying.")
|
||||||
res["error"] = f"wrg-lbl:{new_label}?exists"
|
# res["error"] = f"wrg-lbl:{new_label}?exists"
|
||||||
|
res["error"] = f"wrg-lbl:{new_label}?delayed"
|
||||||
else:
|
else:
|
||||||
res["error"] = f"wrg-lbl-moved-to:{new_label}"
|
res["error"] = f"wrg-lbl-moved-to:{new_label}"
|
||||||
tprint(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.")
|
tprint(f"\t\tCopie{pid} : moving wrong {label} to {new_label}.")
|
||||||
|
|
@ -323,8 +324,9 @@ def handle_label_errors(pid, label, res, pdf_path):
|
||||||
keep_error = True
|
keep_error = True
|
||||||
else:
|
else:
|
||||||
keep_error = True
|
keep_error = True
|
||||||
error += f"(xx){add_label}"
|
# error += f"(xx){add_label}"
|
||||||
tprint(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}")
|
error += f"(delayed){add_label}"
|
||||||
|
tprint(f"\t\tAlready present (not copied) Copie{pid} : {label} -> {add_label}. Delaying.")
|
||||||
if not keep_error:
|
if not keep_error:
|
||||||
res["error"] = ""
|
res["error"] = ""
|
||||||
else:
|
else:
|
||||||
|
|
@ -487,6 +489,80 @@ def process_single_task(task_tuple, precomputed_response=None):
|
||||||
finally:
|
finally:
|
||||||
flush_thread_log()
|
flush_thread_log()
|
||||||
|
|
||||||
|
def resolve_delayed_moves():
|
||||||
|
"""Scans the current results to find delayed moves and executes them if space was freed."""
|
||||||
|
new_tasks = []
|
||||||
|
with io_lock:
|
||||||
|
for label, batches in results.items():
|
||||||
|
for batch in batches:
|
||||||
|
for p in batch:
|
||||||
|
err = p.get("result", {}).get("error", "")
|
||||||
|
if not err or ("?delayed" not in err and "(delayed)" not in err):
|
||||||
|
continue
|
||||||
|
|
||||||
|
pid = p["id"]
|
||||||
|
pdf_path = COPIES_DIR / f"Copie{pid}" / f"{label}.pdf"
|
||||||
|
|
||||||
|
if not pdf_path.exists():
|
||||||
|
if pdf_path.with_name(f"{label}_new.pdf").exists():
|
||||||
|
pdf_path = pdf_path.with_name(f"{label}_new.pdf")
|
||||||
|
elif pdf_path.with_name(f"{label}_old.pdf").exists():
|
||||||
|
pdf_path = pdf_path.with_name(f"{label}_old.pdf")
|
||||||
|
|
||||||
|
# 1. Résolution de wrong-label
|
||||||
|
if err.startswith("wrg-lbl:") and "?delayed" in err:
|
||||||
|
new_label = err.split(":")[1].split("?")[0]
|
||||||
|
base_new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}.pdf"
|
||||||
|
new_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{new_label}_new.pdf"
|
||||||
|
|
||||||
|
# Si la place s'est libérée (l'ancien a été bougé vers _old)
|
||||||
|
if not base_new_pdf_path.exists() and not new_pdf_path.exists():
|
||||||
|
tprint(f"Resolving delayed move: Copie{pid} {label} -> {new_label}")
|
||||||
|
p["result"]["error"] = f"wrg-lbl-moved-to:{new_label}"
|
||||||
|
p["result"]["suffixe"] = "_old" # Très important pour l'ignorer ensuite
|
||||||
|
|
||||||
|
shutil.copy(str(pdf_path), str(new_pdf_path))
|
||||||
|
old_pdf_path = pdf_path.with_name(f"{label}_old.pdf")
|
||||||
|
if pdf_path != old_pdf_path:
|
||||||
|
shutil.move(str(pdf_path), str(old_pdf_path))
|
||||||
|
|
||||||
|
idx = get_next_group_idx(new_label)
|
||||||
|
height = grouping.get_pdf_height(str(new_pdf_path))
|
||||||
|
grouping.create_jpg(new_label, idx, [(pid, str(new_pdf_path), height)], GROUPS_DIR)
|
||||||
|
new_tasks.append((str(GROUPS_DIR / new_label / f"Group_{idx+1}.jpg"), new_label, False))
|
||||||
|
|
||||||
|
# 2. Résolution de additional-answer
|
||||||
|
elif err.startswith("al:") and "(delayed)" in err:
|
||||||
|
import re
|
||||||
|
delayed_matches = re.findall(r'\(delayed\)([^?()]+)', err)
|
||||||
|
new_err = err
|
||||||
|
resolved_any = False
|
||||||
|
|
||||||
|
for add_label in delayed_matches:
|
||||||
|
base_add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}.pdf"
|
||||||
|
add_pdf_path = COPIES_DIR / f"Copie{pid}" / f"{add_label}_new.pdf"
|
||||||
|
|
||||||
|
if not base_add_pdf_path.exists() and not add_pdf_path.exists():
|
||||||
|
tprint(f"Resolving delayed additional-answer: Copie{pid} {label} -> {add_label}")
|
||||||
|
new_err = new_err.replace(f"(delayed){add_label}", f"(->){add_label}")
|
||||||
|
resolved_any = True
|
||||||
|
|
||||||
|
shutil.copy(str(pdf_path), str(add_pdf_path))
|
||||||
|
idx = get_next_group_idx(add_label)
|
||||||
|
height = grouping.get_pdf_height(str(add_pdf_path))
|
||||||
|
grouping.create_jpg(add_label, idx, [(pid, str(add_pdf_path), height)], GROUPS_DIR)
|
||||||
|
new_tasks.append((str(GROUPS_DIR / add_label / f"Group_{idx+1}.jpg"), add_label, False))
|
||||||
|
|
||||||
|
if resolved_any:
|
||||||
|
p["result"]["error"] = new_err
|
||||||
|
|
||||||
|
if new_tasks:
|
||||||
|
# Sauvegarder les modifications d'erreurs (les tags delayed enlevés)
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(results, f, indent=2)
|
||||||
|
|
||||||
|
return new_tasks
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if args.refaire:
|
if args.refaire:
|
||||||
refaire_path = INPUT_DIR / "refaire.json"
|
refaire_path = INPUT_DIR / "refaire.json"
|
||||||
|
|
@ -666,24 +742,37 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
print(f"Warning: Batch results file {batch_results_path} not found.", file=sys.stderr)
|
print(f"Warning: Batch results file {batch_results_path} not found.", file=sys.stderr)
|
||||||
|
|
||||||
print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...")
|
made_progress = True
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor:
|
while tasks_to_process or made_progress:
|
||||||
futures = {}
|
if tasks_to_process:
|
||||||
for task in tasks_to_process:
|
print(f"Starting processing on {len(tasks_to_process)} tasks with {NB_THREADS} threads...")
|
||||||
file_path = task[0]
|
with concurrent.futures.ThreadPoolExecutor(max_workers=NB_THREADS) as executor:
|
||||||
precomp = batched_responses.get(file_path)
|
futures = {}
|
||||||
futures[executor.submit(process_single_task, task, precomp)] = task
|
for task in tasks_to_process:
|
||||||
|
file_path = task[0]
|
||||||
|
precomp = batched_responses.get(file_path)
|
||||||
|
futures[executor.submit(process_single_task, task, precomp)] = task
|
||||||
|
|
||||||
# Process tasks as they complete, allowing dynamic task addition
|
for future in concurrent.futures.as_completed(futures):
|
||||||
for future in concurrent.futures.as_completed(futures):
|
try:
|
||||||
try:
|
new_generated_tasks = future.result()
|
||||||
new_generated_tasks = future.result()
|
if new_generated_tasks:
|
||||||
if new_generated_tasks:
|
for new_task in new_generated_tasks:
|
||||||
for new_task in new_generated_tasks:
|
futures[executor.submit(process_single_task, new_task)] = new_task
|
||||||
# New tasks from wrong-label/additional-answer will fallback to live API
|
except Exception as e:
|
||||||
futures[executor.submit(process_single_task, new_task)] = new_task
|
print(f"Exception during task execution: {e}", file=sys.stderr)
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception during task execution: {e}", file=sys.stderr)
|
tasks_to_process = [] # Vider la liste une fois traitée
|
||||||
|
|
||||||
|
# Après avoir traité toutes les tâches actuelles (live ou batched),
|
||||||
|
# on tente de débloquer les mouvements qui étaient en attente
|
||||||
|
delayed_tasks = resolve_delayed_moves()
|
||||||
|
if delayed_tasks:
|
||||||
|
print(f"Resolved {len(delayed_tasks)} delayed moves! Running executor for new tasks...")
|
||||||
|
tasks_to_process.extend(delayed_tasks)
|
||||||
|
made_progress = True
|
||||||
|
else:
|
||||||
|
made_progress = False
|
||||||
|
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
print("Time elapsed : ", end_time - start_time)
|
print("Time elapsed : ", end_time - start_time)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,6 @@
|
||||||
|
import shlex
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -7,7 +9,9 @@ from typing import List
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
|
|
||||||
MODEL_ID = "gemini-3-flash-preview"
|
# Bug : l'output est limité à 8k token…
|
||||||
|
# MODEL_ID = "gemini-3-flash-preview"
|
||||||
|
MODEL_ID = "gemini-3.1-flash-lite"
|
||||||
api_key = os.environ.get("GEMINI_API_KEY")
|
api_key = os.environ.get("GEMINI_API_KEY")
|
||||||
|
|
||||||
class QuestionItem(BaseModel):
|
class QuestionItem(BaseModel):
|
||||||
|
|
@ -84,14 +88,25 @@ def process_exam(folder_path: str):
|
||||||
response_json_schema=ExamExtraction.model_json_schema(),
|
response_json_schema=ExamExtraction.model_json_schema(),
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Sending request to Gemini...")
|
cache_file = folder / "gemini_response.json"
|
||||||
response = client.models.generate_content(
|
|
||||||
model=MODEL_ID,
|
|
||||||
contents=contents,
|
|
||||||
config=config
|
|
||||||
)
|
|
||||||
|
|
||||||
extracted_data = ExamExtraction.model_validate_json(response.text)
|
if cache_file.is_file():
|
||||||
|
print("Loading cached response from gemini_response.json...")
|
||||||
|
response_text = cache_file.read_text(encoding="utf-8")
|
||||||
|
else:
|
||||||
|
print("Sending request to Gemini...")
|
||||||
|
response = client.models.generate_content(
|
||||||
|
model=MODEL_ID,
|
||||||
|
contents=contents,
|
||||||
|
config=config
|
||||||
|
)
|
||||||
|
response_text = response.text
|
||||||
|
|
||||||
|
print("Saving response to cache...")
|
||||||
|
cache_file.write_text(response_text, encoding="utf-8")
|
||||||
|
|
||||||
|
# Validate from the text variable (cached or fresh)
|
||||||
|
extracted_data = ExamExtraction.model_validate_json(response_text)
|
||||||
|
|
||||||
# 2. Setup output directories
|
# 2. Setup output directories
|
||||||
text_dir = folder / "Text"
|
text_dir = folder / "Text"
|
||||||
|
|
@ -101,28 +116,80 @@ def process_exam(folder_path: str):
|
||||||
|
|
||||||
labels_file = folder / "labels"
|
labels_file = folder / "labels"
|
||||||
|
|
||||||
print("Writing files...")
|
# Step 1: Write initial labels
|
||||||
|
print("Writing initial labels file...")
|
||||||
with open(labels_file, "w", encoding="utf-8") as flabels:
|
with open(labels_file, "w", encoding="utf-8") as flabels:
|
||||||
for q in extracted_data.questions:
|
for q in extracted_data.questions:
|
||||||
# Sanitize label for filesystem (prevent directory traversal if label contains '/')
|
flabels.write(f"{q.label}\n")
|
||||||
safe_label = q.label.replace("/", "_")
|
|
||||||
|
|
||||||
flabels.write(f"{safe_label}\n")
|
# Step 2: Open labels file for user editing
|
||||||
|
print("Opening labels file for editing...")
|
||||||
|
editor = os.environ.get("EDITOR")
|
||||||
|
try:
|
||||||
|
if editor:
|
||||||
|
subprocess.run(shlex.split(editor) + [str(labels_file)])
|
||||||
|
else:
|
||||||
|
# Fallbacks if $EDITOR is not set
|
||||||
|
if sys.platform.startswith("linux"):
|
||||||
|
subprocess.Popen(["xdg-open", str(labels_file)])
|
||||||
|
elif sys.platform == "darwin":
|
||||||
|
subprocess.Popen(["open", str(labels_file)])
|
||||||
|
else:
|
||||||
|
os.startfile(str(labels_file))
|
||||||
|
|
||||||
# Fix double-escaped newlines
|
# xdg-open/open usually do not block, so we wait for user confirmation
|
||||||
q_content = q.question_content.replace("\\n", "\n")
|
input("Press ENTER here once you have saved and closed the labels file...")
|
||||||
s_content = q.solution_content.replace("\\n", "\n")
|
except Exception:
|
||||||
|
print("Error running editor, using labels as given.")
|
||||||
|
|
||||||
# Write Text/label
|
# Step 3 & 4: Read the edited file back and create a mapping
|
||||||
with open(text_dir / safe_label, "w", encoding="utf-8") as f:
|
with open(labels_file, "r", encoding="utf-8") as flabels:
|
||||||
f.write(f"{q.label}\n{q.question_content}")
|
edited_lines = [line.strip() for line in flabels if line.strip()]
|
||||||
|
|
||||||
# Write Sol/label
|
mapping = []
|
||||||
with open(sol_dir / safe_label, "w", encoding="utf-8") as f:
|
final_labels = []
|
||||||
f.write(f"{q.label}\n{q.solution_content}")
|
orig_idx = 0
|
||||||
|
|
||||||
print(f"Success! Processed {len(extracted_data.questions)} questions.")
|
for line in edited_lines:
|
||||||
|
if line.startswith("+"):
|
||||||
|
new_label = line[1:].lstrip()
|
||||||
|
final_labels.append(new_label)
|
||||||
|
# New label, no source content
|
||||||
|
mapping.append((new_label, None))
|
||||||
|
else:
|
||||||
|
new_label = line
|
||||||
|
final_labels.append(new_label)
|
||||||
|
# Map to initial order, advancing index only for non-'+' items
|
||||||
|
q_item = extracted_data.questions[orig_idx] if orig_idx < len(extracted_data.questions) else None
|
||||||
|
mapping.append((new_label, q_item))
|
||||||
|
orig_idx += 1
|
||||||
|
|
||||||
|
# Rewrite the labels file cleanly (removing '+' prefixes)
|
||||||
|
with open(labels_file, "w", encoding="utf-8") as flabels:
|
||||||
|
for lbl in final_labels:
|
||||||
|
flabels.write(f"{lbl}\n")
|
||||||
|
|
||||||
|
# Step 5: Write the final question and solution files
|
||||||
|
print("Writing question and solution files...")
|
||||||
|
for new_label, q_item in mapping:
|
||||||
|
safe_label = new_label.replace("/", "_")
|
||||||
|
|
||||||
|
if q_item:
|
||||||
|
q_content = q_item.question_content.replace("\\n", "\n")
|
||||||
|
s_content = q_item.solution_content.replace("\\n", "\n")
|
||||||
|
else:
|
||||||
|
q_content = ""
|
||||||
|
s_content = ""
|
||||||
|
|
||||||
|
# Write Text/label
|
||||||
|
with open(text_dir / safe_label, "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"{new_label}\n{q_content}")
|
||||||
|
|
||||||
|
# Write Sol/label
|
||||||
|
with open(sol_dir / safe_label, "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"{new_label}\n{s_content}")
|
||||||
|
|
||||||
|
print(f"Success! Processed {len(mapping)} labels.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if not api_key:
|
if not api_key:
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ be missing.
|
||||||
|
|
||||||
##labels##
|
##labels##
|
||||||
|
|
||||||
|
##wrong_labels##
|
||||||
|
|
||||||
Here's a list of the names of the students, pick the one that matches
|
Here's a list of the names of the students, pick the one that matches
|
||||||
the best or `\"Unknown\"` if you cannot read the name
|
the best or `\"Unknown\"` if you cannot read the name
|
||||||
|
|
||||||
|
|
@ -116,6 +118,8 @@ be missing.
|
||||||
|
|
||||||
##labels##
|
##labels##
|
||||||
|
|
||||||
|
##wrong_labels##
|
||||||
|
|
||||||
Since this copy isn't the first part of a sequence, simply set the
|
Since this copy isn't the first part of a sequence, simply set the
|
||||||
name to `\"Continued\"`."""
|
name to `\"Continued\"`."""
|
||||||
|
|
||||||
|
|
@ -128,7 +132,7 @@ class AnnotationData(BaseModel):
|
||||||
list: List[BoxItem] = Field(description="List of bounding box items")
|
list: List[BoxItem] = Field(description="List of bounding box items")
|
||||||
|
|
||||||
|
|
||||||
def generate_request(file, labels, names, context_labels):
|
def generate_request(file, labels, names, context_labels, wrong_labels):
|
||||||
"""Generates request for Gemini with context."""
|
"""Generates request for Gemini with context."""
|
||||||
|
|
||||||
image_path = Path(file)
|
image_path = Path(file)
|
||||||
|
|
@ -142,6 +146,11 @@ def generate_request(file, labels, names, context_labels):
|
||||||
else:
|
else:
|
||||||
text = my_prompt2.replace("##labels##", labels)\
|
text = my_prompt2.replace("##labels##", labels)\
|
||||||
.replace("##prev_context##", context_str)
|
.replace("##prev_context##", context_str)
|
||||||
|
if wrong_labels:
|
||||||
|
text= text.replace("##wrong_labels##\n\n", f"On a previous request, you answered with the following wrong labels : {wrong_labels}. These are wrong, since they do not exactly match any of the labels in the previous list.")
|
||||||
|
else:
|
||||||
|
text = text.replace("##wrong_labels##\n\n", "")
|
||||||
|
|
||||||
|
|
||||||
contents = [
|
contents = [
|
||||||
types.Content(
|
types.Content(
|
||||||
|
|
@ -271,12 +280,14 @@ def process_copy_group(group_key, files):
|
||||||
print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...")
|
print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} accumulated labels...")
|
||||||
|
|
||||||
attempt = -1
|
attempt = -1
|
||||||
|
wrong_labels = []
|
||||||
while True:
|
while True:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
if attempt > 0:
|
if attempt > 0:
|
||||||
time.sleep(10 * attempt)
|
time.sleep(10 * attempt)
|
||||||
try:
|
try:
|
||||||
contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels)
|
contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels,
|
||||||
|
wrong_labels)
|
||||||
|
|
||||||
response = client.models.generate_content(
|
response = client.models.generate_content(
|
||||||
model=MODEL_ID,
|
model=MODEL_ID,
|
||||||
|
|
@ -289,6 +300,7 @@ def process_copy_group(group_key, files):
|
||||||
name = annota.name
|
name = annota.name
|
||||||
if unknown:
|
if unknown:
|
||||||
print(f"Error: {image_file.name} contained unknown labels: {unknown}")
|
print(f"Error: {image_file.name} contained unknown labels: {unknown}")
|
||||||
|
wrong_labels.extend(unknown)
|
||||||
print("Retrying request...")
|
print("Retrying request...")
|
||||||
continue # Retry immediately
|
continue # Retry immediately
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,8 @@ from pypdf import PdfReader, PdfWriter
|
||||||
CM_TO_POINTS = (1 / 2.54) * 72
|
CM_TO_POINTS = (1 / 2.54) * 72
|
||||||
|
|
||||||
def list_pdf_files(directory):
|
def list_pdf_files(directory):
|
||||||
return list(reversed(sorted(glob.glob(os.path.join(directory, "*.pdf")))))
|
l = list(reversed(sorted(glob.glob(os.path.join(directory, "*.pdf")))))
|
||||||
|
return [u for u in l if "enonce" not in u]
|
||||||
|
|
||||||
class PDFPreviewer:
|
class PDFPreviewer:
|
||||||
|
|
||||||
|
|
@ -84,7 +85,10 @@ class PDFPreviewer:
|
||||||
self.num = 0
|
self.num = 0
|
||||||
self.global_rotation = 0 # Rotation appliquée à tous les fichiers
|
self.global_rotation = 0 # Rotation appliquée à tous les fichiers
|
||||||
self.history = []
|
self.history = []
|
||||||
self.setup_next_file()
|
if not self.setup_next_file():
|
||||||
|
print(f"Aucun fichier PDF valide trouvé dans : {path}")
|
||||||
|
master.destroy()
|
||||||
|
return
|
||||||
|
|
||||||
self._resize_job = None # For debouncing resize events
|
self._resize_job = None # For debouncing resize events
|
||||||
|
|
||||||
|
|
@ -462,30 +466,72 @@ class PDFPreviewer:
|
||||||
ri = 0
|
ri = 0
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(ps):
|
while i < len(ps):
|
||||||
# Si c'est une copie double
|
psk = ps[i]['keep']
|
||||||
if (ps[i]['keep'] == "both" or ps[i]['keep'] == "right") \
|
|
||||||
and i < len(ps)-1 and (ps[i+1]['keep'] != "right"):
|
# Si c'est une copie double (on s'assure qu'on a bien 2 pages consécutives modifiables)
|
||||||
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
|
if psk in ["both", "right", "left", "none"] and i < len(ps)-1 and ps[i+1]['keep'] in ["both", "right", "left", "none"]:
|
||||||
ri += 1
|
|
||||||
if ps[i+1]['keep'] != "none":
|
# 1. Page de garde (Extérieur Droit)
|
||||||
|
if ps[i]['keep'] in ["both", "right"]:
|
||||||
|
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
|
||||||
|
ri += 1
|
||||||
|
|
||||||
|
# 2. Intérieur Gauche
|
||||||
|
if ps[i+1]['keep'] in ["both", "left"]:
|
||||||
shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
|
shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
|
||||||
ri += 1
|
ri += 1
|
||||||
if ps[i+1]['keep'] != "left":
|
|
||||||
shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
|
# 3. Intérieur Droit
|
||||||
ri += 1
|
if ps[i+1]['keep'] in ["both", "right"]:
|
||||||
if ps[i]['keep'] == "both":
|
shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
|
||||||
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
|
ri += 1
|
||||||
ri += 1
|
|
||||||
i += 2
|
# 4. Dos de la copie (Extérieur Gauche)
|
||||||
else:
|
if ps[i]['keep'] in ["both", "left"]:
|
||||||
psk = ps[i]['keep']
|
|
||||||
if psk == "left" or psk == "both" or psk == "as_is":
|
|
||||||
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
|
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
|
||||||
ri += 1
|
ri += 1
|
||||||
if psk == "right" or psk == "both":
|
|
||||||
|
i += 2
|
||||||
|
else:
|
||||||
|
# Si c'est une page simple (ou as_is)
|
||||||
|
if psk in ["left", "both", "as_is"]:
|
||||||
|
shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
|
||||||
|
ri += 1
|
||||||
|
if psk in ["right", "both"]:
|
||||||
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
|
shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
|
||||||
ri += 1
|
ri += 1
|
||||||
i += 1
|
i += 1
|
||||||
|
# def reorder_pdfs(self):
|
||||||
|
# """Reordonne les pages, si ce sont des copies doubles."""
|
||||||
|
# self.clean_up_dir(self.reorder_dir)
|
||||||
|
# ps = self.page_settings
|
||||||
|
# ri = 0
|
||||||
|
# i = 0
|
||||||
|
# while i < len(ps):
|
||||||
|
# # Si c'est une copie double
|
||||||
|
# if (ps[i]['keep'] == "both" or ps[i]['keep'] == "right") \
|
||||||
|
# and i < len(ps)-1 and (ps[i+1]['keep'] != "right"):
|
||||||
|
# shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
|
||||||
|
# ri += 1
|
||||||
|
# if ps[i+1]['keep'] != "none":
|
||||||
|
# shutil.copy2(self.split_filename_left(i+1), self.reorder_filename(ri))
|
||||||
|
# ri += 1
|
||||||
|
# if ps[i+1]['keep'] != "left":
|
||||||
|
# shutil.copy2(self.split_filename_right(i+1), self.reorder_filename(ri))
|
||||||
|
# ri += 1
|
||||||
|
# if ps[i]['keep'] == "both":
|
||||||
|
# shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
|
||||||
|
# ri += 1
|
||||||
|
# i += 2
|
||||||
|
# else:
|
||||||
|
# psk = ps[i]['keep']
|
||||||
|
# if psk == "left" or psk == "both" or psk == "as_is":
|
||||||
|
# shutil.copy2(self.split_filename_left(i), self.reorder_filename(ri))
|
||||||
|
# ri += 1
|
||||||
|
# if psk == "right" or psk == "both":
|
||||||
|
# shutil.copy2(self.split_filename_right(i), self.reorder_filename(ri))
|
||||||
|
# ri += 1
|
||||||
|
# i += 1
|
||||||
|
|
||||||
def concate_files(self):
|
def concate_files(self):
|
||||||
writer = PdfWriter()
|
writer = PdfWriter()
|
||||||
|
|
|
||||||
|
|
@ -157,6 +157,14 @@ class ImageViewer:
|
||||||
self.root = root
|
self.root = root
|
||||||
self.root.resizable(False, False) # If you resize, coordinates will be wrong
|
self.root.resizable(False, False) # If you resize, coordinates will be wrong
|
||||||
|
|
||||||
|
screen_w = root.winfo_screenwidth()
|
||||||
|
screen_h = root.winfo_screenheight()
|
||||||
|
|
||||||
|
x = int(screen_w * 0.1)
|
||||||
|
y = int(screen_h * 0.05)
|
||||||
|
|
||||||
|
root.geometry(f"+{x}+{y}")
|
||||||
|
|
||||||
self.base_dir = base_dir
|
self.base_dir = base_dir
|
||||||
self.root.title("Bounding Box Viewer")
|
self.root.title("Bounding Box Viewer")
|
||||||
self.label = tk.Label(root, text="Waiting for images...")
|
self.label = tk.Label(root, text="Waiting for images...")
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,10 @@ do not score or give feedback to any other question."""
|
||||||
def make_prompt(input_dir,full_label):
|
def make_prompt(input_dir,full_label):
|
||||||
def read_longest_prefix_file(subdir):
|
def read_longest_prefix_file(subdir):
|
||||||
dir_path = input_dir / subdir
|
dir_path = input_dir / subdir
|
||||||
|
if not dir_path.exists():
|
||||||
|
if subdir != "Persp":
|
||||||
|
print("Warning !! Directory doesn't exist : ", dir_path)
|
||||||
|
return ""
|
||||||
matches = [f for f in dir_path.iterdir()
|
matches = [f for f in dir_path.iterdir()
|
||||||
if f.is_file()
|
if f.is_file()
|
||||||
and full_label.startswith(f.name)
|
and full_label.startswith(f.name)
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,11 @@ for file in *.pdf; do
|
||||||
# Handle case where no pdfs exist
|
# Handle case where no pdfs exist
|
||||||
[ -e "$file" ] || continue
|
[ -e "$file" ] || continue
|
||||||
|
|
||||||
|
if [ "$file" = "enonce.pdf" ]; then
|
||||||
|
echo "Skipping: $file"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
# Rename with 0-padding (e.g., Copie01.pdf)
|
# Rename with 0-padding (e.g., Copie01.pdf)
|
||||||
mv -- "$file" "$(printf "Copie%02d.pdf" "$count")"
|
mv -- "$file" "$(printf "Copie%02d.pdf" "$count")"
|
||||||
((count++))
|
((count++))
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,11 @@ cd "$1" || { echo "Error: Cannot access directory '$1'"; exit 1; }
|
||||||
shopt -s nullglob
|
shopt -s nullglob
|
||||||
|
|
||||||
for file in *.pdf; do
|
for file in *.pdf; do
|
||||||
|
if [ "$file" = "enonce.pdf" ]; then
|
||||||
|
echo "Skipping: $file"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
# Rotate to a temporary file
|
# Rotate to a temporary file
|
||||||
if qpdf --rotate=+180 "$file" "temp_rotated.pdf"; then
|
if qpdf --rotate=+180 "$file" "temp_rotated.pdf"; then
|
||||||
mv "temp_rotated.pdf" "$file"
|
mv "temp_rotated.pdf" "$file"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue