Miscs
parent
822a531679
commit
882c9b64ba
|
|
@ -623,25 +623,6 @@ def process_single_task(task_tuple):
|
|||
needs_correction.append(i)
|
||||
break
|
||||
|
||||
#
|
||||
# if ymin < yming-50 or ymax > ymaxg+50:
|
||||
# print("Error : Gemini answered box2d too low/up", pid, label, group_name)
|
||||
# if ymax < yming or ymin > ymaxg:
|
||||
# print("Removing the box.")
|
||||
# f["box_2d"] = None
|
||||
# continue
|
||||
# nymin = max(ymin, yming) * 1000 // total_height
|
||||
# nymax = min(ymax, ymaxg) * 1000 // total_height
|
||||
# f["box_2d"] = [nymin, xmin, nymax, xmax]
|
||||
|
||||
# if f["box_2d"] and xmax / 1000 > width_r:
|
||||
# print("Error : Gemini answered box2d too right", pid, label, group_name)
|
||||
# if xmin / 1000 > width_r:
|
||||
# print("Removing the box.")
|
||||
# f["box_2d"] = None
|
||||
# continue
|
||||
# f["box_2d"][3] = int(width_r * 1000)
|
||||
|
||||
if needs_correction:
|
||||
tprint(f"\tBox anomalies detected for Copie {pid} {group_name}. \n\tRequesting isolated correction from Gemini Flash...")
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ for path_str in args.input_paths:
|
|||
labels_txt = (INPUT_DIR / "labels").read_text()
|
||||
valid_labels_set = set(line.strip() for line in labels_txt.splitlines() if line.strip())
|
||||
names_path = (INPUT_DIR / "names")
|
||||
if !os.path.exists(names_path):
|
||||
if not os.path.exists(names_path):
|
||||
names_path = Path("names")
|
||||
names_txt = names_path.read_text()
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
22
plotting.py
22
plotting.py
|
|
@ -9,7 +9,7 @@ from tkinter import messagebox
|
|||
from pathlib import Path
|
||||
from PIL import Image, ImageDraw, ImageFont, ImageTk
|
||||
|
||||
print("o to open pdf, O original pdf, e to emacs part, click for coordinates")
|
||||
print("o to open pdf, O original pdf, e to emacs part, i to interro, click for coordinates")
|
||||
|
||||
# --- Configuration & Globals ---
|
||||
padding = 60
|
||||
|
|
@ -54,7 +54,7 @@ def convert_list(l, group_id, json_schema):
|
|||
ll.append(ee)
|
||||
return ll
|
||||
|
||||
def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
||||
def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages, last_label_index):
|
||||
im = Image.open(image_path)
|
||||
im.load()
|
||||
width, height = im.size
|
||||
|
|
@ -62,7 +62,6 @@ def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
|||
new_im.paste(im, (0, 0))
|
||||
draw = ImageDraw.Draw(new_im)
|
||||
bounding_boxes.sort(key=lambda b: (page_number(b["box_2d"], nb_pages), b["box_2d"][0]))
|
||||
last_label_index = -1
|
||||
|
||||
for bbox in bounding_boxes:
|
||||
raw_y_min = int(bbox["box_2d"][0] * height / 1000)
|
||||
|
|
@ -88,7 +87,7 @@ def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
|||
draw.text((abs_x_min + 8, abs_y_min - 30), label, fill=color, font=font)
|
||||
else:
|
||||
draw.text((abs_x_min + 8, abs_y_max + 6), label, fill=color, font=font)
|
||||
return new_im
|
||||
return (new_im, last_label_index)
|
||||
|
||||
# --- Processing Logic (Worker Thread) ---
|
||||
|
||||
|
|
@ -97,10 +96,15 @@ def worker_thread(base_dir, files_to_process, all_labels):
|
|||
Iterates through files, prepares VISUALS only, and puts metadata in queue.
|
||||
Does NOT write final JSON files anymore.
|
||||
"""
|
||||
previous_copie = None
|
||||
last_label_index = None
|
||||
for img_path in files_to_process:
|
||||
json_path = base_dir / f"{img_path.stem}.json"
|
||||
copie_part = int(img_path.stem[-2:])
|
||||
copie = img_path.stem[:-3]
|
||||
if copie != previous_copie:
|
||||
last_label_index = -1
|
||||
previous_copie = copie
|
||||
json_schema_path = base_dir / 'Cutleft' / f"{copie}_schema.json"
|
||||
|
||||
try:
|
||||
|
|
@ -127,7 +131,8 @@ def worker_thread(base_dir, files_to_process, all_labels):
|
|||
|
||||
try:
|
||||
print(f"Buffering {img_path.name}...")
|
||||
pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages)
|
||||
(pil_image, last_label_index) = \
|
||||
prepare_image(str(img_path), bb_list, all_labels, nb_pages, last_label_index)
|
||||
|
||||
metadata = {
|
||||
"copie": copie,
|
||||
|
|
@ -169,6 +174,7 @@ class ImageViewer:
|
|||
self.root.bind('<Return>', self.on_enter)
|
||||
self.root.bind('e', self.on_edit)
|
||||
self.root.bind('o', self.on_open_pdf)
|
||||
self.root.bind('i', self.on_open_interro)
|
||||
self.root.bind('O', self.on_open_ori_pdf)
|
||||
self.root.bind('<Escape>', lambda e: self.root.quit())
|
||||
self.label.bind('<Button-1>', self.on_click)
|
||||
|
|
@ -265,6 +271,12 @@ class ImageViewer:
|
|||
subprocess.Popen(['xdg-open', str(pdf_path.absolute())])
|
||||
|
||||
def on_open_ori_pdf(self, event):
|
||||
if self.is_viewing and self.current_json_path:
|
||||
pdf_path = "/home/sebastien/Staging/Interro/" + base_dir + "pdf"
|
||||
print(f"Opening {pdf_path}")
|
||||
subprocess.Popen(['xdg-open', pdf_path])
|
||||
|
||||
def on_open_interro(self, event):
|
||||
if self.is_viewing and self.current_json_path:
|
||||
new_filename = self.current_json_path.stem.split('_')[0] + ".pdf"
|
||||
pdf_path = self.current_json_path.parent / "Copies Originales" / new_filename
|
||||
|
|
|
|||
|
|
@ -20,28 +20,28 @@ import ftfy
|
|||
import re
|
||||
import urllib.request
|
||||
|
||||
# url = "https://raw.githubusercontent.com/hbenbel/French-Dictionary/master/dictionary/dictionary.txt"
|
||||
# french_words = urllib.request.urlopen(url).read().decode('utf-8').splitlines()
|
||||
with open('liste_francais.txt', 'r') as f:
|
||||
french_words = f.read().splitlines()
|
||||
|
||||
# 2. Pre-compute an O(1) lookup dictionary
|
||||
# We simulate the corruption by replacing accents with null bytes (\x00)
|
||||
# lookup_map = {}
|
||||
# for word in french_words:
|
||||
# # Replace all French accents with \x00 to create the "broken" key
|
||||
# broken_key = re.sub(r'[éèêëàâäîïôöùûüçœÉÈÊËÀÂÄÎÏÔÖÙÛÜÇŒ]', '\x00', word)
|
||||
# if '\x00' in broken_key:
|
||||
# lookup_map[broken_key] = word # e.g., "\x00cole" -> "école"
|
||||
lookup_map = {}
|
||||
for word in french_words:
|
||||
# Replace all French accents with \x00 to create the "broken" key
|
||||
broken_key = re.sub(r'[éèêëàâäîïôöùûüçœÉÈÊËÀÂÄÎÏÔÖÙÛÜÇŒ]', '\x00', word)
|
||||
if '\x00' in broken_key:
|
||||
lookup_map[broken_key] = word # e.g., "\x00cole" -> "école"
|
||||
|
||||
# 3. Fast replace function
|
||||
def fast_fix(text):
|
||||
# Find words containing regular letters and null bytes
|
||||
# def replacer(match):
|
||||
# broken_word = match.group(0)
|
||||
# # Return the fixed word from our map, or leave it if not found
|
||||
# # (Handles case-insensitivity by falling back to lowercase map)
|
||||
# return lookup_map.get(broken_word.lower(), broken_word)
|
||||
def replacer(match):
|
||||
broken_word = match.group(0)
|
||||
# Return the fixed word from our map, or leave it if not found
|
||||
# (Handles case-insensitivity by falling back to lowercase map)
|
||||
return lookup_map.get(broken_word.lower(), broken_word)
|
||||
|
||||
# return re.sub(r'[a-zA-Z\x00]+', replacer, text)
|
||||
return re.sub(r'[a-zA-Z\x00]+', replacer, text)
|
||||
return text
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue