Miscs

2026-04-14 09:16:17 +02:00 · 2026-04-14 09:16:17 +02:00 · 882c9b64ba
parent 822a531679
commit 882c9b64ba
5 changed files with 21106 additions and 39 deletions
--- a/correction.py
+++ b/correction.py
@ -623,25 +623,6 @@ def process_single_task(task_tuple):
                            needs_correction.append(i)
                            break

-                        #
-                        # if ymin < yming-50 or ymax > ymaxg+50:
-                        #     print("Error : Gemini answered box2d too low/up", pid, label, group_name)
-                        #     if ymax < yming or ymin > ymaxg:
-                        #         print("Removing the box.")
-                        #         f["box_2d"] = None
-                        #         continue
-                        #     nymin = max(ymin, yming) * 1000 // total_height
-                        #     nymax = min(ymax, ymaxg) * 1000 // total_height
-                        #     f["box_2d"] = [nymin, xmin, nymax, xmax]
-
-                        # if f["box_2d"] and xmax / 1000 > width_r:
-                        #     print("Error : Gemini answered box2d too right", pid, label, group_name)
-                        #     if xmin / 1000 > width_r:
-                        #         print("Removing the box.")
-                        #         f["box_2d"] = None
-                        #         continue
-                        #     f["box_2d"][3] = int(width_r * 1000)
-
                if needs_correction:
                    tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...")
                    try:
--- a/gemini_for_labels.py
+++ b/gemini_for_labels.py
@ -211,7 +211,7 @@ for path_str in args.input_paths:
 labels_txt = (INPUT_DIR / "labels").read_text()
 valid_labels_set = set(line.strip() for line in labels_txt.splitlines() if line.strip())
 names_path = (INPUT_DIR / "names")
-if !os.path.exists(names_path):
+if not os.path.exists(names_path):
    names_path = Path("names")
 names_txt = names_path.read_text()

--- a/liste_francais.txt
+++ b/liste_francais.txt
--- a/plotting.py
+++ b/plotting.py
@ -9,7 +9,7 @@ from tkinter import messagebox
 from pathlib import Path
 from PIL import Image, ImageDraw, ImageFont, ImageTk

-print("o to open pdf, O original pdf, e to emacs part, click for coordinates")
+print("o to open pdf, O original pdf, e to emacs part, i to interro, click for coordinates")

 # --- Configuration & Globals ---
 padding = 60
@ -54,7 +54,7 @@ def convert_list(l, group_id, json_schema):
        ll.append(ee)
    return ll

-def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
+def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages, last_label_index):
    im = Image.open(image_path)
    im.load()
    width, height = im.size
@ -62,7 +62,6 @@ def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
    new_im.paste(im, (0, 0))
    draw = ImageDraw.Draw(new_im)
    bounding_boxes.sort(key=lambda b: (page_number(b["box_2d"], nb_pages), b["box_2d"][0]))
-    last_label_index = -1

    for bbox in bounding_boxes:
        raw_y_min = int(bbox["box_2d"][0] * height / 1000)
@ -88,7 +87,7 @@ def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
                draw.text((abs_x_min + 8, abs_y_min - 30), label, fill=color, font=font)
            else:
                draw.text((abs_x_min + 8, abs_y_max + 6), label, fill=color, font=font)
-    return new_im
+    return (new_im, last_label_index)

 # --- Processing Logic (Worker Thread) ---

@ -97,10 +96,15 @@ def worker_thread(base_dir, files_to_process, all_labels):
    Iterates through files, prepares VISUALS only, and puts metadata in queue.
    Does NOT write final JSON files anymore.
    """
+    previous_copie = None
+    last_label_index = None
    for img_path in files_to_process:
        json_path = base_dir / f"{img_path.stem}.json"
        copie_part = int(img_path.stem[-2:])
        copie = img_path.stem[:-3]
+        if copie != previous_copie:
+            last_label_index = -1
+            previous_copie = copie
        json_schema_path = base_dir / 'Cutleft' / f"{copie}_schema.json"

        try:
@ -127,7 +131,8 @@ def worker_thread(base_dir, files_to_process, all_labels):

            try:
                print(f"Buffering {img_path.name}...")
-                pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages)
+                (pil_image, last_label_index) = \
+                    prepare_image(str(img_path), bb_list, all_labels, nb_pages, last_label_index)

                metadata = {
                    "copie": copie,
@ -169,6 +174,7 @@ class ImageViewer:
        self.root.bind('<Return>', self.on_enter)
        self.root.bind('e', self.on_edit)
        self.root.bind('o', self.on_open_pdf)
+        self.root.bind('i', self.on_open_interro)
        self.root.bind('O', self.on_open_ori_pdf)
        self.root.bind('<Escape>', lambda e: self.root.quit())
        self.label.bind('<Button-1>', self.on_click)
@ -265,6 +271,12 @@ class ImageViewer:
            subprocess.Popen(['xdg-open', str(pdf_path.absolute())])

    def on_open_ori_pdf(self, event):
+        if self.is_viewing and self.current_json_path:
+            pdf_path = "/home/sebastien/Staging/Interro/" + base_dir + "pdf"
+            print(f"Opening {pdf_path}")
+            subprocess.Popen(['xdg-open', pdf_path])
+
+    def on_open_interro(self, event):
        if self.is_viewing and self.current_json_path:
            new_filename = self.current_json_path.stem.split('_')[0] + ".pdf"
            pdf_path = self.current_json_path.parent / "Copies Originales" / new_filename
--- a/post-correction.py
+++ b/post-correction.py
@ -20,28 +20,28 @@ import ftfy
 import re
 import urllib.request

-# url = "https://raw.githubusercontent.com/hbenbel/French-Dictionary/master/dictionary/dictionary.txt"
-# french_words = urllib.request.urlopen(url).read().decode('utf-8').splitlines()
+with open('liste_francais.txt', 'r') as f:
+    french_words = f.read().splitlines()

 # 2. Pre-compute an O(1) lookup dictionary
 # We simulate the corruption by replacing accents with null bytes (\x00)
-# lookup_map = {}
-# for word in french_words:
-#     # Replace all French accents with \x00 to create the "broken" key
-#     broken_key = re.sub(r'[éèêëàâäîïôöùûüçœÉÈÊËÀÂÄÎÏÔÖÙÛÜÇŒ]', '\x00', word)
-#     if '\x00' in broken_key:
-#         lookup_map[broken_key] = word # e.g., "\x00cole" -> "école"
+lookup_map = {}
+for word in french_words:
+    # Replace all French accents with \x00 to create the "broken" key
+    broken_key = re.sub(r'[éèêëàâäîïôöùûüçœÉÈÊËÀÂÄÎÏÔÖÙÛÜÇŒ]', '\x00', word)
+    if '\x00' in broken_key:
+        lookup_map[broken_key] = word # e.g., "\x00cole" -> "école"

 # 3. Fast replace function
 def fast_fix(text):
    # Find words containing regular letters and null bytes
-    # def replacer(match):
-    #     broken_word = match.group(0)
-    #     # Return the fixed word from our map, or leave it if not found
-    #     # (Handles case-insensitivity by falling back to lowercase map)
-    #     return lookup_map.get(broken_word.lower(), broken_word)
+    def replacer(match):
+        broken_word = match.group(0)
+        # Return the fixed word from our map, or leave it if not found
+        # (Handles case-insensitivity by falling back to lowercase map)
+        return lookup_map.get(broken_word.lower(), broken_word)

-    # return re.sub(r'[a-zA-Z\x00]+', replacer, text)
+    return re.sub(r'[a-zA-Z\x00]+', replacer, text)
    return text