Working state.
parent
2922692cda
commit
bd1362dff8
|
|
@ -87,7 +87,6 @@ def process_label(root_dir, student_id, label, content):
|
||||||
|
|
||||||
if not os.path.exists(pdf_path):
|
if not os.path.exists(pdf_path):
|
||||||
return None, []
|
return None, []
|
||||||
|
|
||||||
base_img, total_h, max_w = annotating.make_base_image(pdf_path)
|
base_img, total_h, max_w = annotating.make_base_image(pdf_path)
|
||||||
if not base_img:
|
if not base_img:
|
||||||
return None, []
|
return None, []
|
||||||
|
|
@ -239,12 +238,14 @@ def process_student(args):
|
||||||
# Au moins, le drift n'empire pas au fil de la copie
|
# Au moins, le drift n'empire pas au fil de la copie
|
||||||
temp_img_path = os.path.join(output_dir, "Reference.jpg") # Can't use png here
|
temp_img_path = os.path.join(output_dir, "Reference.jpg") # Can't use png here
|
||||||
concat_img.save(temp_img_path, quality=90)
|
concat_img.save(temp_img_path, quality=90)
|
||||||
|
|
||||||
pdf_path = os.path.join(output_dir, "Concat.pdf")
|
pdf_path = os.path.join(output_dir, "Concat.pdf")
|
||||||
w, h = concat_img.size
|
w, h = concat_img.size
|
||||||
c = canvas.Canvas(pdf_path, pagesize=(w, h))
|
c = canvas.Canvas(pdf_path, pagesize=(w, h))
|
||||||
c.drawImage(temp_img_path, 0, 0, width=w, height=h)
|
c.drawImage(temp_img_path, 0, 0, width=w, height=h)
|
||||||
c.save()
|
c.save()
|
||||||
|
|
||||||
|
print("Debug : size", w, h)
|
||||||
# Ancien code, avec du drift
|
# Ancien code, avec du drift
|
||||||
# concat_img.save(os.path.join(output_dir, "Concat.pdf"), "PDF", resolution=DPI)
|
# concat_img.save(os.path.join(output_dir, "Concat.pdf"), "PDF", resolution=DPI)
|
||||||
# concat_img.save(os.path.join(output_dir, "Reference.png"))
|
# concat_img.save(os.path.join(output_dir, "Reference.png"))
|
||||||
|
|
@ -268,4 +269,10 @@ if __name__ == "__main__":
|
||||||
# print(tasks)
|
# print(tasks)
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
executor.map(process_student, tasks)
|
results = executor.map(process_student, tasks)
|
||||||
|
try:
|
||||||
|
for _ in results:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
|
||||||
|
|
@ -199,14 +199,6 @@ def generate_request(file, full_label):
|
||||||
max_output_tokens=65535,
|
max_output_tokens=65535,
|
||||||
response_mime_type= "application/json",
|
response_mime_type= "application/json",
|
||||||
response_json_schema= TypeAdapter(List[EvaluationEntry]).json_schema()
|
response_json_schema= TypeAdapter(List[EvaluationEntry]).json_schema()
|
||||||
# Thinking config is not compatible with response_json ? Unsure.
|
|
||||||
# thinking_config=types.ThinkingConfig(
|
|
||||||
# thinking_budget=-1,
|
|
||||||
# ),
|
|
||||||
# thinking_config=types.ThinkingConfig(
|
|
||||||
# include_thoughts=True,
|
|
||||||
# thinking_budget=1024, # Optimized for Gemini 3 capabilities
|
|
||||||
# ),
|
|
||||||
)
|
)
|
||||||
return (contents, generate_content_config)
|
return (contents, generate_content_config)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ import os
|
||||||
import json
|
import json
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import shutil
|
import shutil
|
||||||
from PIL import Image, ImageChops
|
from PIL import Image, ImageChops, ImageFilter
|
||||||
Image.MAX_IMAGE_PIXELS = None
|
Image.MAX_IMAGE_PIXELS = None
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
import annotating # Reuse rendering logic
|
import annotating # Reuse rendering logic
|
||||||
|
|
@ -36,7 +36,10 @@ def detect_checks_and_notes(output_dir):
|
||||||
# Warning: If the PDF is huge, pdf2image might split pages or OOM.
|
# Warning: If the PDF is huge, pdf2image might split pages or OOM.
|
||||||
# Assuming user didn't change page dimensions/order.
|
# Assuming user didn't change page dimensions/order.
|
||||||
try:
|
try:
|
||||||
user_pages = convert_from_path(pdf_path, dpi=DPI)
|
# user_pages = convert_from_path(pdf_path, dpi=DPI)
|
||||||
|
# La version suivante évite les size mismatch
|
||||||
|
# Mais donne plus de bruit
|
||||||
|
user_pages = convert_from_path(pdf_path, dpi=72)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error reading PDF: {e}")
|
print(f"Error reading PDF: {e}")
|
||||||
return [], None
|
return [], None
|
||||||
|
|
@ -97,19 +100,21 @@ def detect_checks_and_notes(output_dir):
|
||||||
# Expand mask slightly to catch sloppy ticks
|
# Expand mask slightly to catch sloppy ticks
|
||||||
mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0)
|
mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0)
|
||||||
else:
|
else:
|
||||||
# print("A box, not checked !", density)
|
mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0)
|
||||||
# Even if not "checked", mask the box area slightly to avoid
|
|
||||||
# artifacts if user hovered over it, though arguably we keep it.
|
if box["type"] == "score" and box["value"] == 0.0:
|
||||||
# Let's strictly mask only if checked to verify detection?
|
# Mask the whole line
|
||||||
# No, prompt says "not extract the part that are just checking".
|
mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0)
|
||||||
# If user checked it, we mask it.
|
|
||||||
pass
|
|
||||||
|
|
||||||
# --- Extraction Phase ---
|
# --- Extraction Phase ---
|
||||||
# Create the "Manual Notes" layer
|
# Create the "Manual Notes" layer
|
||||||
# Logic: User - Ref. If Diff is dark -> Note.
|
# Logic: User - Ref. If Diff is dark -> Note.
|
||||||
# We want a transparent image with just the pen strokes.
|
# We want a transparent image with just the pen strokes.
|
||||||
|
|
||||||
|
# Try Gaussian Blur, peut-être inutile.
|
||||||
|
ref_blur = ref_img.filter(ImageFilter.GaussianBlur(5))
|
||||||
|
user_blur = user_img.filter(ImageFilter.GaussianBlur(5))
|
||||||
|
|
||||||
# 1. Get difference image
|
# 1. Get difference image
|
||||||
diff_img = ImageChops.difference(ref_img, user_img).convert("L")
|
diff_img = ImageChops.difference(ref_img, user_img).convert("L")
|
||||||
|
|
||||||
|
|
@ -117,7 +122,8 @@ def detect_checks_and_notes(output_dir):
|
||||||
# Pixels that are different enough:
|
# Pixels that are different enough:
|
||||||
diff_data = np.array(diff_img)
|
diff_data = np.array(diff_img)
|
||||||
# Create alpha channel: 0 where no diff, 255 where diff
|
# Create alpha channel: 0 where no diff, 255 where diff
|
||||||
alpha = np.where(diff_data > 20, 255, 0).astype(np.uint8)
|
# Higher treshold is better
|
||||||
|
alpha = np.where(diff_data > 100, 255, 0).astype(np.uint8)
|
||||||
|
|
||||||
# 3. Create output image (Black strokes, variable alpha)
|
# 3. Create output image (Black strokes, variable alpha)
|
||||||
# Or Copy user colors? Better to copy user pixels.
|
# Or Copy user colors? Better to copy user pixels.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue