167 lines
5.5 KiB
Python
167 lines
5.5 KiB
Python
import sys
|
|
import os
|
|
import time
|
|
from google import genai
|
|
from google.genai import types
|
|
import base64
|
|
from pathlib import Path
|
|
|
|
if len(sys.argv) < 2:
|
|
sys.exit("Usage: python script.py <directory_path>")
|
|
|
|
INPUT_DIR = sys.argv[1]
|
|
CUTLEFT_DIR = os.path.join(INPUT_DIR, 'Cutleft')
|
|
|
|
|
|
MODEL_ID = "gemini-3-flash-preview"
|
|
api_key="REMOVED_API_KEY"
|
|
|
|
my_prompt = """I'm giving you an image of the left columns of a written exam.
|
|
Students answer several exercises, which can have several questions.
|
|
|
|
The image consists of several columns, separated by vertical black
|
|
lines. The image should be read top to bottom and then left to right,
|
|
meaning first column, then second column, etc.
|
|
|
|
In their sheet, students delimit exercises and questions using
|
|
delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need
|
|
to give me the bounding boxes of each delimiter.
|
|
|
|
When giving the bounding box of the first question of an exercise, the
|
|
box should be large enough to contain both the exercice label
|
|
(`Exercice i`) and the question label (`1)`) parts.
|
|
|
|
You also need to give me the student name. It should appear on the top
|
|
left of the image. Disregard any mention of `MPSI 3`, it is their
|
|
class. A list of possible student names will be given below.
|
|
|
|
You will answer with a JSON object, containing a `name` field with the
|
|
name, and a `list` field, with the list of the bounding boxes and
|
|
their labels. The box_2d should be [ymin, xmin, ymax, xmax] normalized
|
|
to 0-1000.
|
|
|
|
Here is an example :
|
|
{\"name\" : \"John Doe\", \"list\" : [{\"box_2d\": (10, 20, 30, 40), \"label\" : \"Ex 1 : 1)\"}]}
|
|
|
|
Do not provide a box_2d for the name. Only for the labels.
|
|
|
|
You may find the same label present several times, as a student either
|
|
recall the current label on a new page, or adds content to its answer
|
|
later on. Give the position of each instance of each label.
|
|
|
|
For this exam you should look for the labels given below, separated by
|
|
newlines. A student need not have answered every question, so some may
|
|
be missing.
|
|
|
|
##labels##
|
|
|
|
Here's a list of the names of the students, pick the one that matches
|
|
the best or `\"Unknown\"` if you cannot read the name
|
|
|
|
##names##"""
|
|
from tqdm import tqdm
|
|
|
|
def process_batch(directory):
|
|
client = genai.Client(api_key=api_key)
|
|
image_files = list(Path(directory).glob("*.jpg"))
|
|
|
|
if not image_files:
|
|
print("No .jpg files found.")
|
|
return
|
|
|
|
# 1. Upload images to File API (Batch requirement)
|
|
batch_requests = []
|
|
print(f"Uploading {len(image_files)} images to File API...")
|
|
|
|
for img_path in tqdm(image_files, unit="img"):
|
|
# Upload file
|
|
file_ref = client.files.upload(path=img_path)
|
|
|
|
# Construct Request for JSONL
|
|
# Note: We must serialize config manually for the JSONL body
|
|
req_body = {
|
|
"contents": [
|
|
{"role": "user", "parts": [
|
|
{"fileData": {"mimeType": file_ref.mime_type, "fileUri": file_ref.uri}},
|
|
{"text": my_prompt}
|
|
]}
|
|
],
|
|
"generationConfig": {
|
|
"temperature": 1.0,
|
|
"topP": 0.95,
|
|
"maxOutputTokens": 65535,
|
|
"thinkingConfig": {"thinkingBudget": -1}
|
|
},
|
|
"safetySettings": [
|
|
{"category": cat, "threshold": "BLOCK_NONE"}
|
|
for cat in ["HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
"HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_HARASSMENT"]
|
|
]
|
|
}
|
|
|
|
# Batch Request Entry
|
|
batch_requests.append({
|
|
"custom_id": img_path.name,
|
|
"method": "POST",
|
|
"url": f"/v1beta/models/{MODEL_ID}:generateContent",
|
|
"body": req_body
|
|
})
|
|
|
|
# 2. Create and Upload Batch Source File (JSONL)
|
|
batch_file_path = os.path.join(INPUT_DIR, "batch_input.jsonl")
|
|
with open(batch_file_path, "w") as f:
|
|
for req in batch_requests:
|
|
f.write(json.dumps(req) + "\n")
|
|
|
|
batch_input_file = client.files.upload(path=batch_file_path)
|
|
|
|
# 3. Submit Batch Job
|
|
print("Submitting batch job...")
|
|
job = client.batches.create(
|
|
model=MODEL_ID,
|
|
src=batch_input_file.name
|
|
)
|
|
print(f"Batch Job ID: {job.name}")
|
|
|
|
# 4. Poll for Completion
|
|
pbar = tqdm(desc="Processing Batch", unit="poll")
|
|
while True:
|
|
job = client.batches.get(name=job.name)
|
|
if job.state == "ACTIVE":
|
|
pbar.set_description("Processing")
|
|
elif job.state == "SUCCEEDED" or job.state == "FAILED":
|
|
break
|
|
|
|
pbar.update(1)
|
|
time.sleep(10) # Poll every 10 seconds
|
|
|
|
pbar.close()
|
|
|
|
if job.state == "FAILED":
|
|
print(f"Batch job failed: {job.error}")
|
|
return
|
|
|
|
# 5. Retrieve and Save Results
|
|
print("Downloading results...")
|
|
# The output file is a remote URI, we download its content
|
|
output_content = client.files.content(path=job.output_file.name)
|
|
|
|
# Parse JSONL output and map back to files
|
|
# Output format: {"custom_id": "...", "response": {...}}
|
|
results_saved = 0
|
|
for line in output_content.decode("utf-8").splitlines():
|
|
if not line: continue
|
|
result = json.loads(line)
|
|
|
|
filename = result.get("custom_id")
|
|
if filename:
|
|
output_path = Path(directory) / f"{filename}.json"
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
# Save the full response part
|
|
json.dump(result.get("response", {}), f, indent=2)
|
|
results_saved += 1
|
|
|
|
print(f"Batch complete. Saved {results_saved} result files.")
|
|
|
|
process_batch(CUTLEFT_DIR)
|