Some files, and things.
parent
56b158969d
commit
8059544e26
173
cutleft.py
173
cutleft.py
|
|
@ -1,6 +1,7 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import json # Added for schema output
|
||||||
import tkinter as tk
|
import tkinter as tk
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from queue import Queue, Empty
|
from queue import Queue, Empty
|
||||||
|
|
@ -35,9 +36,56 @@ if not os.path.exists(OUTPUT_DIR):
|
||||||
|
|
||||||
# --- Processing Logic ---
|
# --- Processing Logic ---
|
||||||
|
|
||||||
|
def distribute_pages(total_pages, max_per_file=5):
|
||||||
|
"""
|
||||||
|
Calculates how to split pages into chunks <= max_per_file,
|
||||||
|
balancing the number of columns per file.
|
||||||
|
Example: 12 pages, max 5 -> [4, 4, 4]
|
||||||
|
"""
|
||||||
|
if total_pages == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Calculate minimum number of files needed
|
||||||
|
num_files = (total_pages + max_per_file - 1) // max_per_file
|
||||||
|
|
||||||
|
# Calculate base size and remainder
|
||||||
|
base_count = total_pages // num_files
|
||||||
|
remainder = total_pages % num_files
|
||||||
|
|
||||||
|
distribution = []
|
||||||
|
for i in range(num_files):
|
||||||
|
# Distribute remainder to the first few files
|
||||||
|
count = base_count + (1 if i < remainder else 0)
|
||||||
|
distribution.append(count)
|
||||||
|
|
||||||
|
return distribution
|
||||||
|
|
||||||
|
def stitch_images(image_list):
|
||||||
|
"""Helper to stitch a list of images horizontally with delimiters."""
|
||||||
|
if not image_list:
|
||||||
|
return None
|
||||||
|
|
||||||
|
num_images = len(image_list)
|
||||||
|
total_width = sum(img.width for img in image_list) + (num_images - 1) * DELIMITER_WIDTH
|
||||||
|
max_height = max(img.height for img in image_list)
|
||||||
|
|
||||||
|
combined = Image.new('RGB', (total_width, max_height), color=(255, 255, 255))
|
||||||
|
|
||||||
|
x_offset = 0
|
||||||
|
for idx, img in enumerate(image_list):
|
||||||
|
combined.paste(img, (x_offset, 0))
|
||||||
|
x_offset += img.width
|
||||||
|
if idx < num_images - 1:
|
||||||
|
delimiter = Image.new('RGB', (DELIMITER_WIDTH, max_height), color=DELIMITER_COLOR)
|
||||||
|
combined.paste(delimiter, (x_offset, 0))
|
||||||
|
x_offset += DELIMITER_WIDTH
|
||||||
|
|
||||||
|
return combined
|
||||||
|
|
||||||
def process_single_pdf(filename, shift_offset=0):
|
def process_single_pdf(filename, shift_offset=0):
|
||||||
"""
|
"""
|
||||||
Converts PDF to stitched JPG image (PIL object).
|
Converts PDF to stitched images.
|
||||||
|
Returns a tuple: (preview_image_resized, list_of_split_images, schema_dict)
|
||||||
"""
|
"""
|
||||||
pdf_path = os.path.join(INPUT_DIR, filename)
|
pdf_path = os.path.join(INPUT_DIR, filename)
|
||||||
try:
|
try:
|
||||||
|
|
@ -61,35 +109,57 @@ def process_single_pdf(filename, shift_offset=0):
|
||||||
if not cropped_images:
|
if not cropped_images:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Combine
|
# 1. Generate Schema / Distribution
|
||||||
num_images = len(cropped_images)
|
col_distribution = distribute_pages(len(cropped_images), max_per_file=5)
|
||||||
total_width = sum(img.width for img in cropped_images) + (num_images - 1) * DELIMITER_WIDTH
|
|
||||||
max_height = max(img.height for img in cropped_images)
|
|
||||||
|
|
||||||
combined = Image.new('RGB', (total_width, max_height), color=(255, 255, 255))
|
# 2. Generate Split Images (Full Resolution)
|
||||||
|
split_images = []
|
||||||
|
current_idx = 0
|
||||||
|
for count in col_distribution:
|
||||||
|
chunk = cropped_images[current_idx : current_idx + count]
|
||||||
|
stitched_chunk = stitch_images(chunk)
|
||||||
|
split_images.append(stitched_chunk)
|
||||||
|
current_idx += count
|
||||||
|
|
||||||
x_offset = 0
|
# 3. Generate Preview (All stitched together, Resized)
|
||||||
for idx, img in enumerate(cropped_images):
|
full_stitch = stitch_images(cropped_images)
|
||||||
combined.paste(img, (x_offset, 0))
|
preview_resized = full_stitch.resize(OUTPUT_SIZE, Image.LANCZOS)
|
||||||
x_offset += img.width
|
|
||||||
if idx < num_images - 1:
|
|
||||||
delimiter = Image.new('RGB', (DELIMITER_WIDTH, max_height), color=DELIMITER_COLOR)
|
|
||||||
combined.paste(delimiter, (x_offset, 0))
|
|
||||||
x_offset += DELIMITER_WIDTH
|
|
||||||
|
|
||||||
# Resize
|
schema = {
|
||||||
resized = combined.resize(OUTPUT_SIZE, Image.LANCZOS)
|
"original_filename": filename,
|
||||||
return resized
|
"total_pages": len(cropped_images),
|
||||||
|
"number_of_files": len(split_images),
|
||||||
|
"columns_per_file": col_distribution
|
||||||
|
}
|
||||||
|
|
||||||
|
return (preview_resized, split_images, schema)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {filename}: {e}")
|
print(f"Error processing {filename}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def save_image(pil_img, filename):
|
def save_results(result_tuple, filename):
|
||||||
output_filename = os.path.splitext(filename)[0] + ".jpg"
|
"""
|
||||||
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
Saves the split images and the schema JSON.
|
||||||
pil_img.save(output_path, "JPEG", quality=95)
|
"""
|
||||||
print(f"Saved: {output_filename}")
|
_, splits, schema = result_tuple
|
||||||
|
base_name = os.path.splitext(filename)[0]
|
||||||
|
|
||||||
|
# Save Images
|
||||||
|
for i, img in enumerate(splits):
|
||||||
|
# Suffix _01, _02, etc.
|
||||||
|
suffix = f"_{i+1:02d}"
|
||||||
|
output_filename = f"{base_name}{suffix}.jpg"
|
||||||
|
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
||||||
|
img.save(output_path, "JPEG", quality=95)
|
||||||
|
print(f"Saved: {output_filename}")
|
||||||
|
|
||||||
|
# Save Schema
|
||||||
|
json_filename = f"{base_name}_schema.json"
|
||||||
|
json_path = os.path.join(OUTPUT_DIR, json_filename)
|
||||||
|
with open(json_path, 'w') as f:
|
||||||
|
json.dump(schema, f, indent=4)
|
||||||
|
print(f"Saved schema: {json_filename}")
|
||||||
|
|
||||||
# --- GUI Application ---
|
# --- GUI Application ---
|
||||||
|
|
||||||
|
|
@ -98,10 +168,10 @@ class ImageReviewer:
|
||||||
self.files = file_list
|
self.files = file_list
|
||||||
self.index = 0
|
self.index = 0
|
||||||
self.current_shift = 0
|
self.current_shift = 0
|
||||||
self.current_pil = None
|
self.current_preview = None # Only stores the resized preview for GUI
|
||||||
self.is_processing = False
|
self.is_processing = False
|
||||||
|
|
||||||
# Queue for pre-fetched images (index, image)
|
# Queue for pre-fetched results (index, (preview, splits, schema))
|
||||||
self.prefetch_queue = Queue(maxsize=1)
|
self.prefetch_queue = Queue(maxsize=1)
|
||||||
# Queue for manual re-processing results
|
# Queue for manual re-processing results
|
||||||
self.manual_queue = Queue()
|
self.manual_queue = Queue()
|
||||||
|
|
@ -142,12 +212,11 @@ class ImageReviewer:
|
||||||
if target < len(self.files):
|
if target < len(self.files):
|
||||||
if idx_to_process != target:
|
if idx_to_process != target:
|
||||||
fname = self.files[target]
|
fname = self.files[target]
|
||||||
img = process_single_pdf(fname, shift_offset=0)
|
result = process_single_pdf(fname, shift_offset=0)
|
||||||
if img:
|
if result:
|
||||||
self.prefetch_queue.put((target, img)) # Blocks if full
|
self.prefetch_queue.put((target, result)) # Blocks if full
|
||||||
idx_to_process = target
|
idx_to_process = target
|
||||||
|
|
||||||
# Crucial fix: Sleep briefly to release CPU
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
def load_current_image(self, use_prefetch=False):
|
def load_current_image(self, use_prefetch=False):
|
||||||
|
|
@ -159,19 +228,17 @@ class ImageReviewer:
|
||||||
filename = self.files[self.index]
|
filename = self.files[self.index]
|
||||||
self.is_processing = False
|
self.is_processing = False
|
||||||
|
|
||||||
img_found = None
|
result_found = None
|
||||||
|
|
||||||
if use_prefetch and not self.prefetch_queue.empty():
|
if use_prefetch and not self.prefetch_queue.empty():
|
||||||
q_idx, q_img = self.prefetch_queue.queue[0]
|
q_idx, q_result = self.prefetch_queue.queue[0]
|
||||||
if q_idx == self.index:
|
if q_idx == self.index:
|
||||||
_, img_found = self.prefetch_queue.get()
|
_, result_found = self.prefetch_queue.get()
|
||||||
self.current_shift = 0
|
self.current_shift = 0
|
||||||
print(f"Loaded {filename} from prefetch.")
|
print(f"Loaded {filename} from prefetch.")
|
||||||
|
|
||||||
if img_found:
|
if result_found:
|
||||||
self.current_pil = img_found
|
self.handle_processing_result(result_found, filename)
|
||||||
save_image(self.current_pil, filename)
|
|
||||||
self.update_display(filename)
|
|
||||||
else:
|
else:
|
||||||
# Not in queue (first load or queue mismatch), process manually
|
# Not in queue (first load or queue mismatch), process manually
|
||||||
self.trigger_processing(filename, self.current_shift)
|
self.trigger_processing(filename, self.current_shift)
|
||||||
|
|
@ -182,8 +249,8 @@ class ImageReviewer:
|
||||||
self.label_info.configure(text=f"Processing {filename} (Shift {shift})... Please wait.", fg="red")
|
self.label_info.configure(text=f"Processing {filename} (Shift {shift})... Please wait.", fg="red")
|
||||||
|
|
||||||
def worker():
|
def worker():
|
||||||
img = process_single_pdf(filename, shift)
|
res = process_single_pdf(filename, shift)
|
||||||
self.manual_queue.put(img)
|
self.manual_queue.put(res)
|
||||||
|
|
||||||
Thread(target=worker, daemon=True).start()
|
Thread(target=worker, daemon=True).start()
|
||||||
self.check_manual_queue(filename)
|
self.check_manual_queue(filename)
|
||||||
|
|
@ -191,11 +258,9 @@ class ImageReviewer:
|
||||||
def check_manual_queue(self, filename):
|
def check_manual_queue(self, filename):
|
||||||
"""Polls the manual queue for result."""
|
"""Polls the manual queue for result."""
|
||||||
try:
|
try:
|
||||||
img = self.manual_queue.get_nowait()
|
result = self.manual_queue.get_nowait()
|
||||||
self.current_pil = img
|
if result:
|
||||||
if self.current_pil:
|
self.handle_processing_result(result, filename)
|
||||||
save_image(self.current_pil, filename)
|
|
||||||
self.update_display(filename)
|
|
||||||
else:
|
else:
|
||||||
print(f"Failed to process {filename}, skipping.")
|
print(f"Failed to process {filename}, skipping.")
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
@ -205,13 +270,29 @@ class ImageReviewer:
|
||||||
# Check again in 100ms
|
# Check again in 100ms
|
||||||
self.root.after(100, lambda: self.check_manual_queue(filename))
|
self.root.after(100, lambda: self.check_manual_queue(filename))
|
||||||
|
|
||||||
def update_display(self, filename):
|
def handle_processing_result(self, result, filename):
|
||||||
if self.current_pil:
|
"""Unpacks result, saves files, and updates display."""
|
||||||
tk_image = ImageTk.PhotoImage(self.current_pil)
|
preview, splits, schema = result
|
||||||
|
self.current_preview = preview
|
||||||
|
|
||||||
|
# Save immediately upon loading/calculating
|
||||||
|
save_results(result, filename)
|
||||||
|
self.update_display(filename, schema)
|
||||||
|
|
||||||
|
def update_display(self, filename, schema=None):
|
||||||
|
if self.current_preview:
|
||||||
|
tk_image = ImageTk.PhotoImage(self.current_preview)
|
||||||
self.label_img.configure(image=tk_image)
|
self.label_img.configure(image=tk_image)
|
||||||
self.label_img.image = tk_image
|
self.label_img.image = tk_image
|
||||||
|
|
||||||
|
schema_info = ""
|
||||||
|
if schema:
|
||||||
|
cols = str(schema['columns_per_file'])
|
||||||
|
schema_info = f"\nFiles: {schema['number_of_files']} | Cols: {cols}"
|
||||||
|
|
||||||
self.label_info.configure(
|
self.label_info.configure(
|
||||||
text=f"[{self.index+1}/{len(self.files)}] {filename} | Shift: {self.current_shift}px\n"
|
text=f"[{self.index+1}/{len(self.files)}] {filename} | Shift: {self.current_shift}px"
|
||||||
|
f"{schema_info}\n"
|
||||||
f"Enter: Next | n: +50 | N: +100 | t: -50",
|
f"Enter: Next | n: +50 | N: +100 | t: -50",
|
||||||
fg="black"
|
fg="black"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
166
gemini-batch.py
166
gemini-batch.py
|
|
@ -1,166 +0,0 @@
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from google import genai
|
|
||||||
from google.genai import types
|
|
||||||
import base64
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
sys.exit("Usage: python script.py <directory_path>")
|
|
||||||
|
|
||||||
INPUT_DIR = sys.argv[1]
|
|
||||||
CUTLEFT_DIR = os.path.join(INPUT_DIR, 'Cutleft')
|
|
||||||
|
|
||||||
|
|
||||||
MODEL_ID = "gemini-3-flash-preview"
|
|
||||||
api_key="REMOVED_API_KEY"
|
|
||||||
|
|
||||||
my_prompt = """I'm giving you an image of the left columns of a written exam.
|
|
||||||
Students answer several exercises, which can have several questions.
|
|
||||||
|
|
||||||
The image consists of several columns, separated by vertical black
|
|
||||||
lines. The image should be read top to bottom and then left to right,
|
|
||||||
meaning first column, then second column, etc.
|
|
||||||
|
|
||||||
In their sheet, students delimit exercises and questions using
|
|
||||||
delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need
|
|
||||||
to give me the bounding boxes of each delimiter.
|
|
||||||
|
|
||||||
When giving the bounding box of the first question of an exercise, the
|
|
||||||
box should be large enough to contain both the exercice label
|
|
||||||
(`Exercice i`) and the question label (`1)`) parts.
|
|
||||||
|
|
||||||
You also need to give me the student name. It should appear on the top
|
|
||||||
left of the image. Disregard any mention of `MPSI 3`, it is their
|
|
||||||
class. A list of possible student names will be given below.
|
|
||||||
|
|
||||||
You will answer with a JSON object, containing a `name` field with the
|
|
||||||
name, and a `list` field, with the list of the bounding boxes and
|
|
||||||
their labels. The box_2d should be [ymin, xmin, ymax, xmax] normalized
|
|
||||||
to 0-1000.
|
|
||||||
|
|
||||||
Here is an example :
|
|
||||||
{\"name\" : \"John Doe\", \"list\" : [{\"box_2d\": (10, 20, 30, 40), \"label\" : \"Ex 1 : 1)\"}]}
|
|
||||||
|
|
||||||
Do not provide a box_2d for the name. Only for the labels.
|
|
||||||
|
|
||||||
You may find the same label present several times, as a student either
|
|
||||||
recall the current label on a new page, or adds content to its answer
|
|
||||||
later on. Give the position of each instance of each label.
|
|
||||||
|
|
||||||
For this exam you should look for the labels given below, separated by
|
|
||||||
newlines. A student need not have answered every question, so some may
|
|
||||||
be missing.
|
|
||||||
|
|
||||||
##labels##
|
|
||||||
|
|
||||||
Here's a list of the names of the students, pick the one that matches
|
|
||||||
the best or `\"Unknown\"` if you cannot read the name
|
|
||||||
|
|
||||||
##names##"""
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
def process_batch(directory):
|
|
||||||
client = genai.Client(api_key=api_key)
|
|
||||||
image_files = list(Path(directory).glob("*.jpg"))
|
|
||||||
|
|
||||||
if not image_files:
|
|
||||||
print("No .jpg files found.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# 1. Upload images to File API (Batch requirement)
|
|
||||||
batch_requests = []
|
|
||||||
print(f"Uploading {len(image_files)} images to File API...")
|
|
||||||
|
|
||||||
for img_path in tqdm(image_files, unit="img"):
|
|
||||||
# Upload file
|
|
||||||
file_ref = client.files.upload(path=img_path)
|
|
||||||
|
|
||||||
# Construct Request for JSONL
|
|
||||||
# Note: We must serialize config manually for the JSONL body
|
|
||||||
req_body = {
|
|
||||||
"contents": [
|
|
||||||
{"role": "user", "parts": [
|
|
||||||
{"fileData": {"mimeType": file_ref.mime_type, "fileUri": file_ref.uri}},
|
|
||||||
{"text": my_prompt}
|
|
||||||
]}
|
|
||||||
],
|
|
||||||
"generationConfig": {
|
|
||||||
"temperature": 1.0,
|
|
||||||
"topP": 0.95,
|
|
||||||
"maxOutputTokens": 65535,
|
|
||||||
"thinkingConfig": {"thinkingBudget": -1}
|
|
||||||
},
|
|
||||||
"safetySettings": [
|
|
||||||
{"category": cat, "threshold": "BLOCK_NONE"}
|
|
||||||
for cat in ["HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
||||||
"HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_HARASSMENT"]
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Batch Request Entry
|
|
||||||
batch_requests.append({
|
|
||||||
"custom_id": img_path.name,
|
|
||||||
"method": "POST",
|
|
||||||
"url": f"/v1beta/models/{MODEL_ID}:generateContent",
|
|
||||||
"body": req_body
|
|
||||||
})
|
|
||||||
|
|
||||||
# 2. Create and Upload Batch Source File (JSONL)
|
|
||||||
batch_file_path = os.path.join(INPUT_DIR, "batch_input.jsonl")
|
|
||||||
with open(batch_file_path, "w") as f:
|
|
||||||
for req in batch_requests:
|
|
||||||
f.write(json.dumps(req) + "\n")
|
|
||||||
|
|
||||||
batch_input_file = client.files.upload(path=batch_file_path)
|
|
||||||
|
|
||||||
# 3. Submit Batch Job
|
|
||||||
print("Submitting batch job...")
|
|
||||||
job = client.batches.create(
|
|
||||||
model=MODEL_ID,
|
|
||||||
src=batch_input_file.name
|
|
||||||
)
|
|
||||||
print(f"Batch Job ID: {job.name}")
|
|
||||||
|
|
||||||
# 4. Poll for Completion
|
|
||||||
pbar = tqdm(desc="Processing Batch", unit="poll")
|
|
||||||
while True:
|
|
||||||
job = client.batches.get(name=job.name)
|
|
||||||
if job.state == "ACTIVE":
|
|
||||||
pbar.set_description("Processing")
|
|
||||||
elif job.state == "SUCCEEDED" or job.state == "FAILED":
|
|
||||||
break
|
|
||||||
|
|
||||||
pbar.update(1)
|
|
||||||
time.sleep(10) # Poll every 10 seconds
|
|
||||||
|
|
||||||
pbar.close()
|
|
||||||
|
|
||||||
if job.state == "FAILED":
|
|
||||||
print(f"Batch job failed: {job.error}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# 5. Retrieve and Save Results
|
|
||||||
print("Downloading results...")
|
|
||||||
# The output file is a remote URI, we download its content
|
|
||||||
output_content = client.files.content(path=job.output_file.name)
|
|
||||||
|
|
||||||
# Parse JSONL output and map back to files
|
|
||||||
# Output format: {"custom_id": "...", "response": {...}}
|
|
||||||
results_saved = 0
|
|
||||||
for line in output_content.decode("utf-8").splitlines():
|
|
||||||
if not line: continue
|
|
||||||
result = json.loads(line)
|
|
||||||
|
|
||||||
filename = result.get("custom_id")
|
|
||||||
if filename:
|
|
||||||
output_path = Path(directory) / f"{filename}.json"
|
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
|
||||||
# Save the full response part
|
|
||||||
json.dump(result.get("response", {}), f, indent=2)
|
|
||||||
results_saved += 1
|
|
||||||
|
|
||||||
print(f"Batch complete. Saved {results_saved} result files.")
|
|
||||||
|
|
||||||
process_batch(CUTLEFT_DIR)
|
|
||||||
119
gemini.py
119
gemini.py
|
|
@ -1,119 +0,0 @@
|
||||||
from google import genai
|
|
||||||
from google.genai import types
|
|
||||||
import base64
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
MODEL_ID = "gemini-3-flash-preview"
|
|
||||||
api_key="REMOVED_API_KEY"
|
|
||||||
|
|
||||||
my_prompt = """I'm giving you an image of the left columns of a written exam.
|
|
||||||
Students answer several exercises, which can have several questions.
|
|
||||||
|
|
||||||
The image consists of several columns, separated by vertical black
|
|
||||||
lines. The image should be read top to bottom and then left to right,
|
|
||||||
meaning first column, then second column, etc.
|
|
||||||
|
|
||||||
In their sheet, students delimit exercises and questions using
|
|
||||||
delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need
|
|
||||||
to give me the bounding boxes of each delimiter.
|
|
||||||
|
|
||||||
When giving the bounding box of the first question of an exercise, the
|
|
||||||
box should be large enough to contain both the exercice label
|
|
||||||
(`Exercice i`) and the question label (`1)`) parts.
|
|
||||||
|
|
||||||
You also need to give me the student name. It should appear on the top
|
|
||||||
left of the image. Disregard any mention of `MPSI 3`, it is their
|
|
||||||
class. A list of possible student names will be given below.
|
|
||||||
|
|
||||||
You will answer with a JSON object, containing a `name` field with the
|
|
||||||
name, and a `list` field, with the list of the bounding boxes and
|
|
||||||
their labels. The box_2d should be [ymin, xmin, ymax, xmax] normalized
|
|
||||||
to 0-1000.
|
|
||||||
|
|
||||||
Here is an example :
|
|
||||||
{\"name\" : \"John Doe\", \"list\" : [{\"box_2d\": (10, 20, 30, 40), \"label\" : \"Ex 1 : 1)\"}]}
|
|
||||||
|
|
||||||
Do not provide a box_2d for the name. Only for the labels.
|
|
||||||
|
|
||||||
You may find the same label present several times, as a student either
|
|
||||||
recall the current label on a new page, or adds content to its answer
|
|
||||||
later on. Give the position of each instance of each label.
|
|
||||||
|
|
||||||
For this exam you should look for the labels given below, separated by
|
|
||||||
newlines. A student need not have answered every question, so some may
|
|
||||||
be missing.
|
|
||||||
|
|
||||||
##labels##
|
|
||||||
|
|
||||||
Here's a list of the names of the students, pick the one that matches
|
|
||||||
the best or `\"Unknown\"` if you cannot read the name
|
|
||||||
|
|
||||||
##names##"""
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
class BoxItem(BaseModel):
|
|
||||||
box_2d: List[int] = Field(description="Bounding box coordinates (e.g., [ymin, xmin, ymax, xmax])")
|
|
||||||
label: str = Field(description="The label associated with the specific box")
|
|
||||||
|
|
||||||
class AnnotationData(BaseModel):
|
|
||||||
name: str = Field(description="The name identifier")
|
|
||||||
list: List[BoxItem] = Field(description="List of bounding box items")
|
|
||||||
|
|
||||||
|
|
||||||
def generate_request(file, labels):
|
|
||||||
"""Generates request for Gemini."""
|
|
||||||
|
|
||||||
image_path = Path(file)
|
|
||||||
|
|
||||||
contents = [
|
|
||||||
types.Content(
|
|
||||||
role="user",
|
|
||||||
parts=[
|
|
||||||
types.Part.from_bytes(
|
|
||||||
data=image_path.read_bytes(),
|
|
||||||
mime_type="image/jpeg"
|
|
||||||
),
|
|
||||||
types.Part.from_text(text=my_prompt + labels),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
generate_content_config = types.GenerateContentConfig(
|
|
||||||
temperature=1.0,
|
|
||||||
top_p=0.95,
|
|
||||||
seed=0,
|
|
||||||
max_output_tokens=65535,
|
|
||||||
response_mime_type= "application/json",
|
|
||||||
response_json_schema= AnnotationData.model_json_schema(),
|
|
||||||
# Thinking config is not compatible with response_json ? Unsure.
|
|
||||||
# thinking_config=types.ThinkingConfig(
|
|
||||||
# thinking_budget=-1,
|
|
||||||
# ),
|
|
||||||
# thinking_config=types.ThinkingConfig(
|
|
||||||
# include_thoughts=True,
|
|
||||||
# thinking_budget=1024, # Optimized for Gemini 3 capabilities
|
|
||||||
# ),
|
|
||||||
)
|
|
||||||
return (contents, generate_content_config)
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
sys.exit("Usage: python script.py Staging/cutleft1000.jpg labels")
|
|
||||||
|
|
||||||
INPUT_FILE = sys.argv[1]
|
|
||||||
contents, config = generate_request(INPUT_FILE)
|
|
||||||
|
|
||||||
client = genai.Client(api_key=api_key)
|
|
||||||
|
|
||||||
for chunk in client.models.generate_content_stream(
|
|
||||||
model=MODEL_ID,
|
|
||||||
contents=contents,
|
|
||||||
config=config,
|
|
||||||
):
|
|
||||||
if chunk.text:
|
|
||||||
print(chunk.text, end="", flush=True)
|
|
||||||
|
|
@ -3,12 +3,15 @@ from google.genai import types
|
||||||
import base64
|
import base64
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List
|
from typing import List, Dict
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import argparse
|
import argparse
|
||||||
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
MODEL_ID = "gemini-3-flash-preview"
|
MODEL_ID = "gemini-3-flash-preview"
|
||||||
api_key="REMOVED_API_KEY"
|
api_key="REMOVED_API_KEY"
|
||||||
|
|
@ -24,6 +27,55 @@ In their sheet, students delimit exercises and questions using
|
||||||
delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need
|
delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need
|
||||||
to give me the bounding boxes of each delimiter.
|
to give me the bounding boxes of each delimiter.
|
||||||
|
|
||||||
|
When giving the bounding box of the first question of an exercise, the
|
||||||
|
box should be large enough to contain both the exercice label
|
||||||
|
(`Exercice i`) and the question label (`1)`) parts. If they are
|
||||||
|
horizontally far apart (example : if the `1)` is to the left and the
|
||||||
|
`Exercice i` is either to the right, or in the middle) then give only
|
||||||
|
the bounding box of the question label `1)` part. You should still
|
||||||
|
label it as `Exercice i : 1)` though.
|
||||||
|
|
||||||
|
You also need to give me the student name. It should appear on the top
|
||||||
|
left of the image. Disregard any mention of `MPSI 3`, it is their
|
||||||
|
class. A list of possible student names will be given below.
|
||||||
|
|
||||||
|
You will answer with a JSON object, containing a `name` field with the
|
||||||
|
name, and a `list` field, with the list of the bounding boxes and
|
||||||
|
their labels. The box_2d should be [ymin, xmin, ymax, xmax] normalized
|
||||||
|
to 0-1000.
|
||||||
|
|
||||||
|
Here is an example :
|
||||||
|
{\"name\" : \"John Doe\", \"list\" : [{\"box_2d\": (10, 20, 30, 40), \"label\" : \"Ex 1 : 1)\"}]}
|
||||||
|
|
||||||
|
Do not provide a box_2d for the name. Only for the labels. Order the
|
||||||
|
box_2d by their position in the page, column by column : first column
|
||||||
|
(top to bottom), then second column, etc.
|
||||||
|
|
||||||
|
You may find the same label present several times, as a student either
|
||||||
|
recall the current label on a new page, or adds content to its answer
|
||||||
|
later on. Give the position of each instance of each label.
|
||||||
|
|
||||||
|
For this exam you should look for the labels given below, separated by
|
||||||
|
newlines. A student need not have answered every question, so some may
|
||||||
|
be missing.
|
||||||
|
|
||||||
|
##labels##
|
||||||
|
|
||||||
|
Here's a list of the names of the students, pick the one that matches
|
||||||
|
the best or `\"Unknown\"` if you cannot read the name
|
||||||
|
|
||||||
|
##names##"""
|
||||||
|
my_prompt2 = """I'm giving you an image of the left columns of a written exam.
|
||||||
|
Students answer several exercises, which can have several questions.
|
||||||
|
|
||||||
|
The image consists of several columns, separated by vertical black
|
||||||
|
lines. The image should be read top to bottom and then left to right,
|
||||||
|
meaning first column, then second column, etc.
|
||||||
|
|
||||||
|
In their sheet, students delimit exercises and questions using
|
||||||
|
delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need
|
||||||
|
to give me the bounding boxes of each delimiter.
|
||||||
|
|
||||||
When giving the bounding box of the first question of an exercise, the
|
When giving the bounding box of the first question of an exercise, the
|
||||||
box should be large enough to contain both the exercice label
|
box should be large enough to contain both the exercice label
|
||||||
(`Exercice i`) and the question label (`1)`) parts.
|
(`Exercice i`) and the question label (`1)`) parts.
|
||||||
|
|
@ -46,16 +98,26 @@ You may find the same label present several times, as a student either
|
||||||
recall the current label on a new page, or adds content to its answer
|
recall the current label on a new page, or adds content to its answer
|
||||||
later on. Give the position of each instance of each label.
|
later on. Give the position of each instance of each label.
|
||||||
|
|
||||||
|
This image is one part of a sequence (e.g., part 2 of 3) for a single
|
||||||
|
student. Here is the list of labels found in the *previous* parts of
|
||||||
|
this copy:
|
||||||
|
|
||||||
|
[
|
||||||
|
##prev_context##
|
||||||
|
]
|
||||||
|
|
||||||
|
If the first column starts with a number like =3)= or =c)=, look at
|
||||||
|
the labels in the list above. If the last relevant label was =Ex 4 :
|
||||||
|
2)=, you should label the new box =Ex 4 : 3)=.
|
||||||
|
|
||||||
For this exam you should look for the labels given below, separated by
|
For this exam you should look for the labels given below, separated by
|
||||||
newlines. A student need not have answered every question, so some may
|
newlines. A student need not have answered every question, so some may
|
||||||
be missing.
|
be missing.
|
||||||
|
|
||||||
##labels##
|
##labels##
|
||||||
|
|
||||||
Here's a list of the names of the students, pick the one that matches
|
Since this copy isn't the first part of a sequence, simply set the
|
||||||
the best or `\"Unknown\"` if you cannot read the name
|
name to `\"Continued\"`."""
|
||||||
|
|
||||||
##names##"""
|
|
||||||
|
|
||||||
class BoxItem(BaseModel):
|
class BoxItem(BaseModel):
|
||||||
box_2d: List[int] = Field(description="Bounding box coordinates (e.g., [ymin, xmin, ymax, xmax])")
|
box_2d: List[int] = Field(description="Bounding box coordinates (e.g., [ymin, xmin, ymax, xmax])")
|
||||||
|
|
@ -66,12 +128,21 @@ class AnnotationData(BaseModel):
|
||||||
list: List[BoxItem] = Field(description="List of bounding box items")
|
list: List[BoxItem] = Field(description="List of bounding box items")
|
||||||
|
|
||||||
|
|
||||||
def generate_request(file, labels, names):
|
def generate_request(file, labels, names, context_labels):
|
||||||
"""Generates request for Gemini."""
|
"""Generates request for Gemini with context."""
|
||||||
|
|
||||||
image_path = Path(file)
|
image_path = Path(file)
|
||||||
|
|
||||||
text = my_prompt.replace("##labels##",labels).replace("##names##", names)
|
# Format context list as a string
|
||||||
|
context_str = ", ".join([f'"{l}"' for l in context_labels]) if context_labels else "No previous context"
|
||||||
|
|
||||||
|
if context_labels == []:
|
||||||
|
text = my_prompt.replace("##labels##", labels)\
|
||||||
|
.replace("##names##", names)
|
||||||
|
else:
|
||||||
|
text = my_prompt2.replace("##labels##", labels)\
|
||||||
|
.replace("##prev_context##", context_str)
|
||||||
|
|
||||||
contents = [
|
contents = [
|
||||||
types.Content(
|
types.Content(
|
||||||
role="user",
|
role="user",
|
||||||
|
|
@ -97,74 +168,115 @@ def generate_request(file, labels, names):
|
||||||
|
|
||||||
# Argument Parsing
|
# Argument Parsing
|
||||||
parser = argparse.ArgumentParser(description="Process a directory or specific file using Gemini.")
|
parser = argparse.ArgumentParser(description="Process a directory or specific file using Gemini.")
|
||||||
parser.add_argument("input_path", help="The input directory or specific file (e.g., Dir/File.pdf)")
|
parser.add_argument("input_path", help="The input directory or specific file")
|
||||||
parser.add_argument("--overwrite", action="store_true", help="Regenerate output even if it exists")
|
parser.add_argument("--overwrite", action="store_true", help="Regenerate output even if it exists")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
input_arg = Path(args.input_path)
|
input_arg = Path(args.input_path)
|
||||||
image_files = []
|
image_files = []
|
||||||
|
|
||||||
# Logic to handle Directory vs File argument
|
# Setup Paths and Files
|
||||||
if input_arg.is_file():
|
if input_arg.is_file():
|
||||||
# If argument is Dir/Copiedd.pdf
|
|
||||||
INPUT_DIR = input_arg.parent
|
INPUT_DIR = input_arg.parent
|
||||||
CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
|
CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
|
||||||
|
# For a single file, we verify it exists but we might miss context if we don't look for siblings
|
||||||
# Look for matching .jpg in Cutleft (e.g., Copiedd.jpg)
|
# Simplification: We add just this file, context will be empty.
|
||||||
target_image = CUTLEFT_DIR / f"{input_arg.stem}.jpg"
|
target_image = CUTLEFT_DIR / f"{input_arg.stem}.jpg"
|
||||||
|
|
||||||
if target_image.exists():
|
if target_image.exists():
|
||||||
image_files = [target_image]
|
image_files = [target_image]
|
||||||
else:
|
else:
|
||||||
print(f"Error: Corresponding image {target_image} not found.")
|
print(f"Error: {target_image} not found.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
# If argument is just Dir
|
|
||||||
INPUT_DIR = input_arg
|
INPUT_DIR = input_arg
|
||||||
CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
|
CUTLEFT_DIR = INPUT_DIR / 'Cutleft'
|
||||||
image_files = sorted(list(CUTLEFT_DIR.glob("*.jpg")))
|
image_files = sorted(list(CUTLEFT_DIR.glob("*.jpg")))
|
||||||
|
|
||||||
labels = (INPUT_DIR / "labels").read_text()
|
labels_txt = (INPUT_DIR / "labels").read_text()
|
||||||
names = (INPUT_DIR / "names").read_text()
|
names_txt = (INPUT_DIR / "names").read_text()
|
||||||
client = genai.Client(api_key=api_key)
|
client = genai.Client(api_key=api_key)
|
||||||
|
|
||||||
# Target > 3.0s per request to stay under 20 RPM
|
# Group files by Copy ID (e.g. Copie01_01.jpg -> Copie01)
|
||||||
|
# regex: match everything before the last underscore if it ends in digits
|
||||||
|
file_groups = defaultdict(list)
|
||||||
|
for img in image_files:
|
||||||
|
stem = img.stem
|
||||||
|
# match CopieXX_YY -> Group CopieXX
|
||||||
|
match = re.match(r"(.+)_(\d+)$", stem)
|
||||||
|
if match:
|
||||||
|
group_key = match.group(1)
|
||||||
|
file_groups[group_key].append(img)
|
||||||
|
else:
|
||||||
|
# Fallback for files without underscore numbering
|
||||||
|
file_groups[stem].append(img)
|
||||||
|
|
||||||
|
# Sort files within each group to ensure sequential processing
|
||||||
|
for key in file_groups:
|
||||||
|
file_groups[key].sort(key=lambda x: x.name)
|
||||||
|
|
||||||
TARGET_INTERVAL = 3.5
|
TARGET_INTERVAL = 3.5
|
||||||
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
def process_copy_group(group_key, files):
|
||||||
|
"""Processes a list of files belonging to one copy sequentially to maintain context."""
|
||||||
|
|
||||||
def process_image(image_file):
|
# Context accumulator for this specific copy
|
||||||
start_time = time.time()
|
accumulated_labels = []
|
||||||
base_name, _ = os.path.splitext(image_file.name)
|
|
||||||
output_json = os.path.join(INPUT_DIR, f"{base_name}.json")
|
|
||||||
|
|
||||||
# Skip if already processed unless overwrite is enabled
|
for image_file in files:
|
||||||
if os.path.exists(output_json) and not args.overwrite:
|
start_time = time.time()
|
||||||
print(f"Skipping {image_file.name}, output exists.")
|
base_name = image_file.stem
|
||||||
return
|
output_json = INPUT_DIR / f"{base_name}.json"
|
||||||
|
|
||||||
print(f"Processing {image_file.name}...")
|
# Check existing
|
||||||
|
if output_json.exists() and not args.overwrite:
|
||||||
|
print(f"[{group_key}] Skipping {image_file.name}, output exists.")
|
||||||
|
# If skipping, we should try to load existing labels to keep context for next parts
|
||||||
|
try:
|
||||||
|
with open(output_json, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
for item in data.get('list', []):
|
||||||
|
accumulated_labels.append(item['label'])
|
||||||
|
except:
|
||||||
|
pass # If read fails, next part has no context
|
||||||
|
continue
|
||||||
|
|
||||||
try:
|
print(f"[{group_key}] Processing {image_file.name} with {len(accumulated_labels)} ctx items...")
|
||||||
# Prepare and execute request
|
|
||||||
contents, config = generate_request(image_file, labels, names)
|
|
||||||
response = client.models.generate_content(
|
|
||||||
model=MODEL_ID,
|
|
||||||
contents=contents,
|
|
||||||
config=config
|
|
||||||
)
|
|
||||||
annota = AnnotationData.model_validate_json(response.text)
|
|
||||||
# Save result
|
|
||||||
with open(output_json, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(annota.model_dump(), f, indent=2)
|
|
||||||
|
|
||||||
except Exception as e:
|
try:
|
||||||
print(f"Error processing {image_file.name}: {e}")
|
contents, config = generate_request(image_file, labels_txt, names_txt, accumulated_labels)
|
||||||
|
|
||||||
# Rate Limiting (Note: This limits per-thread, not global total)
|
response = client.models.generate_content(
|
||||||
elapsed = time.time() - start_time
|
model=MODEL_ID,
|
||||||
time.sleep(max(0, TARGET_INTERVAL - elapsed))
|
contents=contents,
|
||||||
|
config=config
|
||||||
|
)
|
||||||
|
|
||||||
# Run with 6 threads
|
annota = AnnotationData.model_validate_json(response.text)
|
||||||
|
|
||||||
|
# Save result
|
||||||
|
with open(output_json, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(annota.model_dump(), f, indent=2)
|
||||||
|
|
||||||
|
# Update context for the next part in this group
|
||||||
|
for box in annota.list:
|
||||||
|
accumulated_labels.append(box.label)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {image_file.name}: {e}")
|
||||||
|
|
||||||
|
# Rate Limiting
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
time.sleep(max(0, TARGET_INTERVAL - elapsed))
|
||||||
|
|
||||||
|
# Run ThreadPool on GROUPS (Copies), not individual files
|
||||||
|
# Each thread handles one student's full exam copy sequentially
|
||||||
with ThreadPoolExecutor(max_workers=6) as executor:
|
with ThreadPoolExecutor(max_workers=6) as executor:
|
||||||
executor.map(process_image, image_files)
|
# Convert dict items to arguments for map
|
||||||
|
# executor.map expects a function and an iterable.
|
||||||
|
# We use a lambda or separate function to unpack the tuple if needed,
|
||||||
|
# but here we'll just submit futures.
|
||||||
|
futures = [executor.submit(process_copy_group, k, v) for k, v in file_groups.items()]
|
||||||
|
|
||||||
|
# Wait for all to complete
|
||||||
|
for future in futures:
|
||||||
|
future.result()
|
||||||
|
|
|
||||||
208
plotting.py
208
plotting.py
|
|
@ -6,44 +6,57 @@ import subprocess
|
||||||
import tkinter as tk
|
import tkinter as tk
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from PIL import Image, ImageDraw, ImageFont, ImageTk
|
from PIL import Image, ImageDraw, ImageFont, ImageTk
|
||||||
from pypdf import PdfReader
|
|
||||||
|
|
||||||
# --- Configuration & Globals ---
|
# --- Configuration & Globals ---
|
||||||
padding = 60 # White margin to the right
|
padding = 60
|
||||||
|
|
||||||
|
# Queue payload: (pil_image, json_path, metadata)
|
||||||
|
# metadata is a dict: {'copie': str, 'part': int, 'schema': dict}
|
||||||
|
image_queue = queue.Queue(maxsize=5)
|
||||||
|
|
||||||
image_queue = queue.Queue(maxsize=5) # Buffer a few images ahead
|
|
||||||
try:
|
try:
|
||||||
font = ImageFont.truetype("DejaVuSans.ttf", size=30)
|
font = ImageFont.truetype("DejaVuSans.ttf", size=30)
|
||||||
except OSError:
|
except OSError:
|
||||||
font = ImageFont.load_default()
|
font = ImageFont.load_default()
|
||||||
|
|
||||||
# --- Processing Logic (Worker Thread) ---
|
# --- Helper Functions (Shared) ---
|
||||||
|
|
||||||
def page_number(b, nb_pages):
|
def page_number(b, nb_pages):
|
||||||
column_width = 1000 // nb_pages
|
column_width = 1000 // nb_pages
|
||||||
center_x = (b[1] + b[3]) // 2
|
center_x = (b[1] + b[3]) // 2
|
||||||
return center_x // column_width
|
return center_x // column_width
|
||||||
|
|
||||||
|
def convert_box2d(b, pn_ori, npn, tot_ori, tot_dest):
|
||||||
|
l = b.copy()
|
||||||
|
l[1] = (l[1] - (1000 // tot_ori) * (pn_ori-1)) * tot_ori // tot_dest\
|
||||||
|
+ (1000 // tot_dest) * (npn - 1)
|
||||||
|
l[3] = (l[3] - (1000 // tot_ori) * (pn_ori-1)) * tot_ori // tot_dest\
|
||||||
|
+ (1000 // tot_dest) * (npn - 1)
|
||||||
|
return l
|
||||||
|
|
||||||
|
def convert_list(l, group_id, json_schema):
|
||||||
|
ll = []
|
||||||
|
nb_pages = json_schema["columns_per_file"][group_id-1]
|
||||||
|
nb_previous_pages = sum([json_schema["columns_per_file"][i] for i in range(group_id-1)])
|
||||||
|
nb_tot_pages = sum([e for e in json_schema["columns_per_file"]])
|
||||||
|
for e in l:
|
||||||
|
ee = e.copy()
|
||||||
|
pn = page_number(e["box_2d"], nb_pages)
|
||||||
|
npn = pn + nb_previous_pages
|
||||||
|
ee["box_2d"] = convert_box2d(ee["box_2d"], pn, npn, nb_pages, nb_tot_pages)
|
||||||
|
ee["part"] = group_id
|
||||||
|
ee["pn"] = npn
|
||||||
|
ll.append(ee)
|
||||||
|
return ll
|
||||||
|
|
||||||
def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
||||||
"""
|
|
||||||
Draws boxes on the image and returns the PIL Image object.
|
|
||||||
Does NOT display it.
|
|
||||||
"""
|
|
||||||
im = Image.open(image_path)
|
im = Image.open(image_path)
|
||||||
# Ensure image is loaded so we can pass it between threads safely
|
|
||||||
im.load()
|
im.load()
|
||||||
|
|
||||||
width, height = im.size
|
width, height = im.size
|
||||||
|
|
||||||
# Add white padding to the right
|
|
||||||
new_im = Image.new(im.mode, (width + padding, height), "white")
|
new_im = Image.new(im.mode, (width + padding, height), "white")
|
||||||
new_im.paste(im, (0, 0))
|
new_im.paste(im, (0, 0))
|
||||||
|
|
||||||
draw = ImageDraw.Draw(new_im)
|
draw = ImageDraw.Draw(new_im)
|
||||||
|
|
||||||
bounding_boxes.sort(key=lambda b: (page_number(b["box_2d"], nb_pages), b["box_2d"][0]))
|
bounding_boxes.sort(key=lambda b: (page_number(b["box_2d"], nb_pages), b["box_2d"][0]))
|
||||||
|
|
||||||
last_label_index = -1
|
last_label_index = -1
|
||||||
|
|
||||||
for bbox in bounding_boxes:
|
for bbox in bounding_boxes:
|
||||||
|
|
@ -51,7 +64,6 @@ def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
||||||
raw_x_min = int(bbox["box_2d"][1] * width / 1000)
|
raw_x_min = int(bbox["box_2d"][1] * width / 1000)
|
||||||
raw_y_max = int(bbox["box_2d"][2] * height / 1000)
|
raw_y_max = int(bbox["box_2d"][2] * height / 1000)
|
||||||
raw_x_max = int(bbox["box_2d"][3] * width / 1000)
|
raw_x_max = int(bbox["box_2d"][3] * width / 1000)
|
||||||
|
|
||||||
abs_y_min = max(0, raw_y_min - 10)
|
abs_y_min = max(0, raw_y_min - 10)
|
||||||
abs_x_min = max(0, raw_x_min - 10)
|
abs_x_min = max(0, raw_x_min - 10)
|
||||||
abs_y_max = min(height, raw_y_max + 10)
|
abs_y_max = min(height, raw_y_max + 10)
|
||||||
|
|
@ -59,60 +71,67 @@ def prepare_image(image_path: str, bounding_boxes, all_labels, nb_pages):
|
||||||
|
|
||||||
color = "black"
|
color = "black"
|
||||||
label = bbox.get("label")
|
label = bbox.get("label")
|
||||||
|
|
||||||
if label and label in all_labels:
|
if label and label in all_labels:
|
||||||
current_index = all_labels.index(label)
|
current_index = all_labels.index(label)
|
||||||
if current_index < last_label_index:
|
if current_index < last_label_index:
|
||||||
color = "red"
|
color = "red"
|
||||||
last_label_index = current_index
|
last_label_index = current_index
|
||||||
|
|
||||||
draw.rectangle(
|
draw.rectangle(((abs_x_min, abs_y_min), (abs_x_max, abs_y_max)), outline=color, width=4)
|
||||||
((abs_x_min, abs_y_min), (abs_x_max, abs_y_max)),
|
|
||||||
outline=color,
|
|
||||||
width=4,
|
|
||||||
)
|
|
||||||
if label:
|
if label:
|
||||||
# draw.text((abs_x_min + 8, abs_y_min + 6), label, fill=color, font=font)
|
|
||||||
if abs_y_min > 80:
|
if abs_y_min > 80:
|
||||||
draw.text((abs_x_min + 8, abs_y_min - 30), label, fill=color, font=font)
|
draw.text((abs_x_min + 8, abs_y_min - 30), label, fill=color, font=font)
|
||||||
else:
|
else:
|
||||||
draw.text((abs_x_min + 8, abs_y_max + 6), label, fill=color, font=font)
|
draw.text((abs_x_min + 8, abs_y_max + 6), label, fill=color, font=font)
|
||||||
|
|
||||||
return new_im
|
return new_im
|
||||||
|
|
||||||
|
# --- Processing Logic (Worker Thread) ---
|
||||||
|
|
||||||
def worker_thread(base_dir, files_to_process, all_labels):
|
def worker_thread(base_dir, files_to_process, all_labels):
|
||||||
"""
|
"""
|
||||||
Iterates through files, processes them, and puts them in the queue.
|
Iterates through files, prepares VISUALS only, and puts metadata in queue.
|
||||||
|
Does NOT write final JSON files anymore.
|
||||||
"""
|
"""
|
||||||
for img_path in files_to_process:
|
for img_path in files_to_process:
|
||||||
json_path = base_dir / f"{img_path.stem}.json"
|
json_path = base_dir / f"{img_path.stem}.json"
|
||||||
pdf_path = base_dir / f"{img_path.stem}.pdf"
|
copie_part = int(img_path.stem[-2:])
|
||||||
|
copie = img_path.stem[:-3]
|
||||||
|
json_schema_path = base_dir / 'Cutleft' / f"{copie}_schema.json"
|
||||||
|
|
||||||
nb_pages = 1
|
try:
|
||||||
if pdf_path.exists():
|
with open(json_schema_path, 'r') as f:
|
||||||
try:
|
json_schema = json.load(f)
|
||||||
nb_pages = len(PdfReader(pdf_path).pages)
|
except:
|
||||||
except Exception:
|
print("No json_schema : ", json_schema_path)
|
||||||
pass
|
continue
|
||||||
|
|
||||||
|
nb_pages = json_schema["columns_per_file"][copie_part-1]
|
||||||
|
|
||||||
if json_path.exists():
|
if json_path.exists():
|
||||||
try:
|
try:
|
||||||
|
# Read strictly for visualization purposes
|
||||||
with open(json_path, 'r') as f:
|
with open(json_path, 'r') as f:
|
||||||
json_result = json.load(f)
|
json_result = json.load(f)
|
||||||
|
|
||||||
bb_list = json_result.get("list", [])
|
bb_list = json_result.get("list", [])
|
||||||
print(f"Processing {img_path.name}...")
|
print(f"Buffering {img_path.name}...")
|
||||||
|
|
||||||
# Draw boxes
|
|
||||||
pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages)
|
pil_image = prepare_image(str(img_path), bb_list, all_labels, nb_pages)
|
||||||
|
|
||||||
# Block if queue is full (waiting for user to view)
|
# Package metadata needed for final calculation later
|
||||||
image_queue.put((pil_image, json_path))
|
metadata = {
|
||||||
|
"copie": copie,
|
||||||
|
"part": copie_part,
|
||||||
|
"schema": json_schema,
|
||||||
|
"name": json_result.get("name", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
image_queue.put((pil_image, json_path, metadata))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {img_path.name}: {e}")
|
print(f"Error processing {img_path.name}: {e}")
|
||||||
|
|
||||||
# Sentinel to indicate finished
|
# Sentinel to indicate finished
|
||||||
image_queue.put((None, None))
|
image_queue.put((None, None, None))
|
||||||
|
|
||||||
# --- GUI Logic (Main Thread) ---
|
# --- GUI Logic (Main Thread) ---
|
||||||
|
|
||||||
|
|
@ -121,57 +140,66 @@ class ImageViewer:
|
||||||
self.root = root
|
self.root = root
|
||||||
self.base_dir = base_dir
|
self.base_dir = base_dir
|
||||||
self.root.title("Bounding Box Viewer")
|
self.root.title("Bounding Box Viewer")
|
||||||
|
|
||||||
# UI Elements
|
|
||||||
self.label = tk.Label(root, text="Waiting for images...")
|
self.label = tk.Label(root, text="Waiting for images...")
|
||||||
self.label.pack(expand=True, fill="both")
|
self.label.pack(expand=True, fill="both")
|
||||||
|
|
||||||
# State
|
# Display State
|
||||||
self.current_image = None
|
self.current_image = None
|
||||||
self.current_json_path = None
|
self.current_json_path = None
|
||||||
|
self.current_meta = None # Stores schema/copie info
|
||||||
self.is_viewing = False
|
self.is_viewing = False
|
||||||
self.scale_factor = 1.0 # To track resizing
|
self.scale_factor = 1.0
|
||||||
self.orig_size = (1, 1) # To track original dimensions
|
self.orig_size = (1, 1)
|
||||||
|
|
||||||
# Input Bindings
|
# Data Aggregation State
|
||||||
|
self.active_copie_name = None
|
||||||
|
self.accumulated_results = None # Dict with "name" and "list"
|
||||||
|
|
||||||
|
# Bindings
|
||||||
self.root.bind('<Return>', self.on_enter)
|
self.root.bind('<Return>', self.on_enter)
|
||||||
self.root.bind('e', self.on_edit)
|
self.root.bind('e', self.on_edit)
|
||||||
self.root.bind('o', self.on_open_pdf) # <--- 3. Add Key Binding
|
self.root.bind('o', self.on_open_pdf)
|
||||||
self.root.bind('<Escape>', lambda e: self.root.quit())
|
self.root.bind('<Escape>', lambda e: self.root.quit())
|
||||||
self.label.bind('<Button-1>', self.on_click) # Bind left mouse click
|
self.label.bind('<Button-1>', self.on_click)
|
||||||
|
|
||||||
# Start polling queue
|
|
||||||
self.poll_queue()
|
self.poll_queue()
|
||||||
|
|
||||||
def poll_queue(self):
|
def poll_queue(self):
|
||||||
if not self.is_viewing:
|
if not self.is_viewing:
|
||||||
try:
|
try:
|
||||||
pil_image, json_path = image_queue.get_nowait()
|
pil_image, json_path, metadata = image_queue.get_nowait()
|
||||||
|
|
||||||
|
# Handle End of Stream
|
||||||
if pil_image is None:
|
if pil_image is None:
|
||||||
|
self.save_current_batch() # Save any remaining data
|
||||||
print("All images processed.")
|
print("All images processed.")
|
||||||
self.root.quit() # Stop the program
|
self.root.quit()
|
||||||
return
|
return
|
||||||
|
|
||||||
self.display_image(pil_image, json_path)
|
# Check if we switched to a new "Copie" group
|
||||||
|
if self.active_copie_name != metadata["copie"]:
|
||||||
|
self.save_current_batch() # Write previous group to disk
|
||||||
|
# Start new batch
|
||||||
|
self.active_copie_name = metadata["copie"]
|
||||||
|
self.accumulated_results = {"name": metadata["name"], "list": []}
|
||||||
|
|
||||||
|
self.display_image(pil_image, json_path, metadata)
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
pass
|
pass
|
||||||
self.root.after(100, self.poll_queue)
|
self.root.after(100, self.poll_queue)
|
||||||
|
|
||||||
def on_open_pdf(self, event):
|
def save_current_batch(self):
|
||||||
if self.is_viewing and self.current_json_path:
|
"""Writes the accumulated data to the main JSON file."""
|
||||||
# Replace .json extension with .pdf
|
if self.active_copie_name and self.accumulated_results:
|
||||||
pdf_path = self.current_json_path.with_suffix(".pdf")
|
main_json_path = self.base_dir / f"{self.active_copie_name}.json"
|
||||||
|
print(f"Writing aggregated result to {main_json_path}")
|
||||||
|
with open(main_json_path, 'w') as f:
|
||||||
|
json.dump(self.accumulated_results, f)
|
||||||
|
self.accumulated_results = None
|
||||||
|
|
||||||
print(f"Opening {pdf_path}")
|
def display_image(self, pil_image, json_path, metadata):
|
||||||
# Use subprocess to run xdg-open without blocking
|
|
||||||
subprocess.Popen(['xdg-open', str(pdf_path)])
|
|
||||||
|
|
||||||
def display_image(self, pil_image, json_path):
|
|
||||||
self.orig_size = pil_image.size
|
self.orig_size = pil_image.size
|
||||||
self.scale_factor = 1.0
|
self.scale_factor = 1.0
|
||||||
|
|
||||||
# Resize if too large for screen
|
|
||||||
screen_h = self.root.winfo_screenheight() - 100
|
screen_h = self.root.winfo_screenheight() - 100
|
||||||
if pil_image.height > screen_h:
|
if pil_image.height > screen_h:
|
||||||
self.scale_factor = screen_h / pil_image.height
|
self.scale_factor = screen_h / pil_image.height
|
||||||
|
|
@ -179,17 +207,47 @@ class ImageViewer:
|
||||||
int(pil_image.height * self.scale_factor)))
|
int(pil_image.height * self.scale_factor)))
|
||||||
|
|
||||||
self.tk_image = ImageTk.PhotoImage(pil_image)
|
self.tk_image = ImageTk.PhotoImage(pil_image)
|
||||||
self.label.config(image=self.tk_image, text="")
|
self.label.config(image=self.tk_image, text=f"Processing: {json_path.name}")
|
||||||
self.current_json_path = json_path
|
self.current_json_path = json_path
|
||||||
|
self.current_meta = metadata
|
||||||
self.is_viewing = True
|
self.is_viewing = True
|
||||||
self.root.lift()
|
self.root.lift()
|
||||||
|
|
||||||
def on_enter(self, event):
|
def on_enter(self, event):
|
||||||
if self.is_viewing:
|
if self.is_viewing:
|
||||||
print("Next...")
|
print(f"Committing data for {self.current_json_path.name}...")
|
||||||
|
|
||||||
|
# --- CRITICAL CHANGE: Re-read JSON here to capture user edits ---
|
||||||
|
try:
|
||||||
|
with open(self.current_json_path, 'r') as f:
|
||||||
|
current_data = json.load(f)
|
||||||
|
|
||||||
|
# Perform the conversion now, post-edit
|
||||||
|
converted_items = convert_list(
|
||||||
|
current_data["list"],
|
||||||
|
self.current_meta["part"],
|
||||||
|
self.current_meta["schema"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add to accumulator
|
||||||
|
if self.accumulated_results:
|
||||||
|
self.accumulated_results["list"].extend(converted_items)
|
||||||
|
# Update name just in case (though usually consistent per group)
|
||||||
|
self.accumulated_results["name"] = current_data.get("name", self.accumulated_results["name"])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error re-reading/saving {self.current_json_path}: {e}")
|
||||||
|
|
||||||
|
# Advance UI
|
||||||
self.is_viewing = False
|
self.is_viewing = False
|
||||||
self.label.config(image="", text="Loading next...")
|
self.label.config(image="", text="Loading next...")
|
||||||
|
|
||||||
|
def on_open_pdf(self, event):
|
||||||
|
if self.is_viewing and self.current_json_path:
|
||||||
|
pdf_path = self.current_json_path.with_suffix(".pdf")
|
||||||
|
print(f"Opening {pdf_path}")
|
||||||
|
subprocess.Popen(['xdg-open', str(pdf_path)])
|
||||||
|
|
||||||
def on_edit(self, event):
|
def on_edit(self, event):
|
||||||
if self.is_viewing and self.current_json_path:
|
if self.is_viewing and self.current_json_path:
|
||||||
print(f"Opening {self.current_json_path}")
|
print(f"Opening {self.current_json_path}")
|
||||||
|
|
@ -197,63 +255,45 @@ class ImageViewer:
|
||||||
|
|
||||||
def on_click(self, event):
|
def on_click(self, event):
|
||||||
if not self.is_viewing: return
|
if not self.is_viewing: return
|
||||||
|
|
||||||
# Map click to original image coordinates
|
|
||||||
x = int(event.x / self.scale_factor)
|
x = int(event.x / self.scale_factor)
|
||||||
y = int(event.y / self.scale_factor)
|
y = int(event.y / self.scale_factor)
|
||||||
w, h = self.orig_size
|
w, h = self.orig_size
|
||||||
|
|
||||||
# Create 10px box (5px radius)
|
|
||||||
# Coordinate format: [y_min, x_min, y_max, x_max] (0-1000 scale)
|
|
||||||
box = [
|
box = [
|
||||||
int(max(0, y - 5) / h * 1000),
|
int(max(0, y - 5) / h * 1000),
|
||||||
int(max(0, x - 5) / (w- padding) * 1000),
|
int(max(0, x - 5) / (w- padding) * 1000),
|
||||||
int(min(h, y + 5) / h * 1000),
|
int(min(h, y + 5) / h * 1000),
|
||||||
int(min(w, x + 5) / (w - padding) * 1000),
|
int(min(w, x + 5) / (w - padding) * 1000),
|
||||||
]
|
]
|
||||||
|
|
||||||
box_str = "{ \"box_2d\": " + str(box) + ", \"label\": \"\" },"
|
box_str = "{ \"box_2d\": " + str(box) + ", \"label\": \"\" },"
|
||||||
print(f"Copied box at ({x},{y}): {box_str}")
|
print(f"Copied box at ({x},{y}): {box_str}")
|
||||||
|
|
||||||
self.root.clipboard_clear()
|
self.root.clipboard_clear()
|
||||||
self.root.clipboard_append(box_str)
|
self.root.clipboard_append(box_str)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print("Usage: python plotting_gui.py <directory_or_file>")
|
print("Usage: python plotting.py <directory_or_file>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
input_path = Path(sys.argv[1])
|
input_path = Path(sys.argv[1])
|
||||||
files_to_process = []
|
files_to_process = []
|
||||||
|
|
||||||
if input_path.is_file():
|
if input_path.is_file():
|
||||||
# File mode
|
|
||||||
base_dir = input_path.parent
|
base_dir = input_path.parent
|
||||||
stem = input_path.stem
|
stem = input_path.stem
|
||||||
|
|
||||||
# Try to locate the image in Cutleft directory
|
|
||||||
img_path = base_dir / "Cutleft" / f"{stem}.jpg"
|
img_path = base_dir / "Cutleft" / f"{stem}.jpg"
|
||||||
|
if not img_path.exists() and input_path.parent.name == "Cutleft":
|
||||||
# Fallback: Check if user provided the jpg inside Cutleft directly
|
|
||||||
if not img_path.exists() and input_path.parent.name == "Cutleft" and input_path.suffix.lower() == ".jpg":
|
|
||||||
base_dir = input_path.parent.parent
|
base_dir = input_path.parent.parent
|
||||||
img_path = input_path
|
img_path = input_path
|
||||||
|
|
||||||
if not img_path.exists():
|
if not img_path.exists():
|
||||||
print(f"Error: Could not find image at {img_path}")
|
print(f"Error: Could not find image at {img_path}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
files_to_process = [img_path]
|
files_to_process = [img_path]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Directory mode
|
|
||||||
base_dir = input_path
|
base_dir = input_path
|
||||||
cutleft_dir = base_dir / "Cutleft"
|
cutleft_dir = base_dir / "Cutleft"
|
||||||
|
|
||||||
if not cutleft_dir.exists():
|
if not cutleft_dir.exists():
|
||||||
print(f"Error: {cutleft_dir} does not exist.")
|
print(f"Error: {cutleft_dir} does not exist.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
files_to_process = sorted(cutleft_dir.glob("*.jpg"))
|
files_to_process = sorted(cutleft_dir.glob("*.jpg"))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -261,12 +301,10 @@ if __name__ == "__main__":
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
all_labels = []
|
all_labels = []
|
||||||
|
|
||||||
# Start Processing Thread
|
|
||||||
t = threading.Thread(target=worker_thread, args=(base_dir, files_to_process, all_labels))
|
t = threading.Thread(target=worker_thread, args=(base_dir, files_to_process, all_labels))
|
||||||
t.daemon = True # Kill thread if main app closes
|
t.daemon = True
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
# Start GUI
|
|
||||||
root = tk.Tk()
|
root = tk.Tk()
|
||||||
app = ImageViewer(root, base_dir)
|
app = ImageViewer(root, base_dir)
|
||||||
root.mainloop()
|
root.mainloop()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue