import sys import os import time from google import genai from google.genai import types import base64 from pathlib import Path if len(sys.argv) < 2: sys.exit("Usage: python script.py ") INPUT_DIR = sys.argv[1] CUTLEFT_DIR = os.path.join(INPUT_DIR, 'Cutleft') MODEL_ID = "gemini-3-flash-preview" api_key="REMOVED_API_KEY" my_prompt = """I'm giving you an image of the left columns of a written exam. Students answer several exercises, which can have several questions. The image consists of several columns, separated by vertical black lines. The image should be read top to bottom and then left to right, meaning first column, then second column, etc. In their sheet, students delimit exercises and questions using delimiters such as `Ex 1`, or `Exercice 1`, and `1)` or `a)`. You need to give me the bounding boxes of each delimiter. When giving the bounding box of the first question of an exercise, the box should be large enough to contain both the exercice label (`Exercice i`) and the question label (`1)`) parts. You also need to give me the student name. It should appear on the top left of the image. Disregard any mention of `MPSI 3`, it is their class. A list of possible student names will be given below. You will answer with a JSON object, containing a `name` field with the name, and a `list` field, with the list of the bounding boxes and their labels. The box_2d should be [ymin, xmin, ymax, xmax] normalized to 0-1000. Here is an example : {\"name\" : \"John Doe\", \"list\" : [{\"box_2d\": (10, 20, 30, 40), \"label\" : \"Ex 1 : 1)\"}]} Do not provide a box_2d for the name. Only for the labels. You may find the same label present several times, as a student either recall the current label on a new page, or adds content to its answer later on. Give the position of each instance of each label. For this exam you should look for the labels given below, separated by newlines. A student need not have answered every question, so some may be missing. ##labels## Here's a list of the names of the students, pick the one that matches the best or `\"Unknown\"` if you cannot read the name ##names##""" from tqdm import tqdm def process_batch(directory): client = genai.Client(api_key=api_key) image_files = list(Path(directory).glob("*.jpg")) if not image_files: print("No .jpg files found.") return # 1. Upload images to File API (Batch requirement) batch_requests = [] print(f"Uploading {len(image_files)} images to File API...") for img_path in tqdm(image_files, unit="img"): # Upload file file_ref = client.files.upload(path=img_path) # Construct Request for JSONL # Note: We must serialize config manually for the JSONL body req_body = { "contents": [ {"role": "user", "parts": [ {"fileData": {"mimeType": file_ref.mime_type, "fileUri": file_ref.uri}}, {"text": my_prompt} ]} ], "generationConfig": { "temperature": 1.0, "topP": 0.95, "maxOutputTokens": 65535, "thinkingConfig": {"thinkingBudget": -1} }, "safetySettings": [ {"category": cat, "threshold": "BLOCK_NONE"} for cat in ["HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_DANGEROUS_CONTENT", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_HARASSMENT"] ] } # Batch Request Entry batch_requests.append({ "custom_id": img_path.name, "method": "POST", "url": f"/v1beta/models/{MODEL_ID}:generateContent", "body": req_body }) # 2. Create and Upload Batch Source File (JSONL) batch_file_path = os.path.join(INPUT_DIR, "batch_input.jsonl") with open(batch_file_path, "w") as f: for req in batch_requests: f.write(json.dumps(req) + "\n") batch_input_file = client.files.upload(path=batch_file_path) # 3. Submit Batch Job print("Submitting batch job...") job = client.batches.create( model=MODEL_ID, src=batch_input_file.name ) print(f"Batch Job ID: {job.name}") # 4. Poll for Completion pbar = tqdm(desc="Processing Batch", unit="poll") while True: job = client.batches.get(name=job.name) if job.state == "ACTIVE": pbar.set_description("Processing") elif job.state == "SUCCEEDED" or job.state == "FAILED": break pbar.update(1) time.sleep(10) # Poll every 10 seconds pbar.close() if job.state == "FAILED": print(f"Batch job failed: {job.error}") return # 5. Retrieve and Save Results print("Downloading results...") # The output file is a remote URI, we download its content output_content = client.files.content(path=job.output_file.name) # Parse JSONL output and map back to files # Output format: {"custom_id": "...", "response": {...}} results_saved = 0 for line in output_content.decode("utf-8").splitlines(): if not line: continue result = json.loads(line) filename = result.get("custom_id") if filename: output_path = Path(directory) / f"{filename}.json" with open(output_path, "w", encoding="utf-8") as f: # Save the full response part json.dump(result.get("response", {}), f, indent=2) results_saved += 1 print(f"Batch complete. Saved {results_saved} result files.") process_batch(CUTLEFT_DIR)