Working state.

2026-02-10 21:10:56 +01:00 · 2026-02-10 21:10:56 +01:00 · bd1362dff8
parent 2922692cda
commit bd1362dff8
3 changed files with 25 additions and 20 deletions
--- a/annotating_with_checks.py
+++ b/annotating_with_checks.py
@ -87,7 +87,6 @@ def process_label(root_dir, student_id, label, content):

    if not os.path.exists(pdf_path):
        return None, []
-
    base_img, total_h, max_w = annotating.make_base_image(pdf_path)
    if not base_img:
        return None, []
@ -239,12 +238,14 @@ def process_student(args):
    # Au moins, le drift n'empire pas au fil de la copie
    temp_img_path = os.path.join(output_dir, "Reference.jpg") # Can't use png here
    concat_img.save(temp_img_path, quality=90)
+
    pdf_path = os.path.join(output_dir, "Concat.pdf")
    w, h = concat_img.size
    c = canvas.Canvas(pdf_path, pagesize=(w, h))
    c.drawImage(temp_img_path, 0, 0, width=w, height=h)
    c.save()

+    print("Debug : size", w, h)
    # Ancien code, avec du drift
    # concat_img.save(os.path.join(output_dir, "Concat.pdf"), "PDF", resolution=DPI)
    # concat_img.save(os.path.join(output_dir, "Reference.png"))
@ -268,4 +269,10 @@ if __name__ == "__main__":
    # print(tasks)

    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
-        executor.map(process_student, tasks)
+        results = executor.map(process_student, tasks)
+        try:
+            for _ in results:
+                pass
+        except Exception:
+            import traceback
+            traceback.print_exc()
--- a/correction.py
+++ b/correction.py
@ -199,14 +199,6 @@ def generate_request(file, full_label):
        max_output_tokens=65535,
        response_mime_type= "application/json",
        response_json_schema= TypeAdapter(List[EvaluationEntry]).json_schema()
-        # Thinking config is not compatible with response_json ? Unsure.
-        # thinking_config=types.ThinkingConfig(
-          # thinking_budget=-1,
-        # ),
-        # thinking_config=types.ThinkingConfig(
-            # include_thoughts=True,
-            # thinking_budget=1024, # Optimized for Gemini 3 capabilities
-        # ),
    )
    return (contents, generate_content_config)

--- a/reading_annotations.py
+++ b/reading_annotations.py
@ -3,7 +3,7 @@ import os
 import json
 import numpy as np
 import shutil
-from PIL import Image, ImageChops
+from PIL import Image, ImageChops, ImageFilter
 Image.MAX_IMAGE_PIXELS = None
 from pdf2image import convert_from_path
 import annotating # Reuse rendering logic
@ -36,7 +36,10 @@ def detect_checks_and_notes(output_dir):
    # Warning: If the PDF is huge, pdf2image might split pages or OOM.
    # Assuming user didn't change page dimensions/order.
    try:
-        user_pages = convert_from_path(pdf_path, dpi=DPI)
+        # user_pages = convert_from_path(pdf_path, dpi=DPI)
+        # La version suivante évite les size mismatch
+        # Mais donne plus de bruit
+        user_pages = convert_from_path(pdf_path, dpi=72)
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return [], None
@ -97,19 +100,21 @@ def detect_checks_and_notes(output_dir):
            # Expand mask slightly to catch sloppy ticks
            mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0)
        else:
-            # print("A box, not checked !", density)
-            # Even if not "checked", mask the box area slightly to avoid
-            # artifacts if user hovered over it, though arguably we keep it.
-            # Let's strictly mask only if checked to verify detection?
-            # No, prompt says "not extract the part that are just checking".
-            # If user checked it, we mask it.
-            pass
+            mask_draw.rectangle([x1-2, y1-2, x2+2, y2+2], fill=0)
+
+        if box["type"] == "score" and box["value"] == 0.0:
+            # Mask the whole line
+            mask_draw.rectangle([0, y1-5, ref_img.width, y2+5], fill=0)

    # --- Extraction Phase ---
    # Create the "Manual Notes" layer
    # Logic: User - Ref. If Diff is dark -> Note.
    # We want a transparent image with just the pen strokes.

+    # Try Gaussian Blur, peut-être inutile.
+    ref_blur = ref_img.filter(ImageFilter.GaussianBlur(5))
+    user_blur = user_img.filter(ImageFilter.GaussianBlur(5))
+
    # 1. Get difference image
    diff_img = ImageChops.difference(ref_img, user_img).convert("L")

@ -117,7 +122,8 @@ def detect_checks_and_notes(output_dir):
    # Pixels that are different enough:
    diff_data = np.array(diff_img)
    # Create alpha channel: 0 where no diff, 255 where diff
-    alpha = np.where(diff_data > 20, 255, 0).astype(np.uint8)
+    # Higher treshold is better
+    alpha = np.where(diff_data > 100, 255, 0).astype(np.uint8)

    # 3. Create output image (Black strokes, variable alpha)
    # Or Copy user colors? Better to copy user pixels.