diff --git a/annotating_with_checks.py b/annotating_with_checks.py new file mode 100644 index 0000000..0ad8c77 --- /dev/null +++ b/annotating_with_checks.py @@ -0,0 +1,258 @@ +import sys +import os +import json +import shutil +import concurrent.futures +import threading + +# Fix for Matplotlib in threads: Set backend to non-interactive 'Agg' +import matplotlib +matplotlib.use('Agg') + +from PIL import Image, ImageDraw, ImageFont +import annotating +from annotating import MARGIN_LEFT, ANNOT_WIDTH + +# Global lock for Matplotlib/Latex rendering to prevent race conditions +LATEX_LOCK = threading.Lock() + +BOX_SIZE = 30 +SCORE_BOX_SIZE = 30 +SCORES = [x * 0.5 for x in range(9)] # 0.0 to 4.0 + +try: + CHECKBOX_FONT = ImageFont.truetype("DejaVuSans.ttf", 20) +except IOError: + try: + CHECKBOX_FONT = ImageFont.truetype("arial.ttf", 20) + except IOError: + CHECKBOX_FONT = ImageFont.load_default() + +def draw_checkbox(draw, x, y, size=BOX_SIZE, label=None, fill="white"): + if label: + draw.text((x - BOX_SIZE-5, y + 2), str(label), fill="black", font=CHECKBOX_FONT) + draw.rectangle([x, y, x + size, y + size], fill=fill, outline="black", width=2) + + return [x, y, x + size, y + size] + +def safe_render_latex(text, **kwargs): + """Thread-safe wrapper for latex rendering.""" + with LATEX_LOCK: + return annotating.render_latex_text(text, **kwargs) + +def create_base_image(pdf_path): + """Converts PDF pages to a single vertically stacked image.""" + try: + pages = annotating.convert_from_path(pdf_path) + total_h = sum(page.height for page in pages) + max_w = max(page.width for page in pages) + base_img = Image.new("RGBA", (max_w, total_h), "white") + cy = 0 + for page in pages: + base_img.paste(page.convert("RGBA"), (0, cy)) + cy += page.height + return base_img + except Exception: + return None + +def render_header(label, score, feedbacks, base_width): + """Generates the score line and global feedback elements.""" + elements = [] + + # Score Line + score_text_img = safe_render_latex(f"{label} ; Note : {score}", width_px=base_width // 2, fontsize=18) + score_line_h = max(score_text_img.height, SCORE_BOX_SIZE + 10) + score_line_img = Image.new("RGBA", (base_width, score_line_h), (255, 255, 255, 0)) + score_line_img.paste(score_text_img, (0, 0)) + + draw_score = ImageDraw.Draw(score_line_img) + start_x = score_text_img.width + 20 + local_boxes = [] + + for val in SCORES: + box = draw_checkbox(draw_score, start_x, 5, SCORE_BOX_SIZE, str(val)) + local_boxes.append({ + "type": "score", "label": label, "value": val, + "rel_box": box, "elem_y": 0 + }) + start_x += SCORE_BOX_SIZE + 60 + elements.append((score_line_img, local_boxes)) + + # Global Feedback + for i, fb in enumerate(feedbacks): + fb_img = safe_render_latex(fb['text'], width_px=base_width) + draw_fb = ImageDraw.Draw(fb_img) + bx = fb_img.width - BOX_SIZE - 5 + by = 5 + box = draw_checkbox(draw_fb, bx, by, BOX_SIZE) + + elements.append((fb_img, [{ + "type": "del_global", "label": label, "index": i, + "text_preview": fb['text'][:20], "rel_box": box, "elem_y": 0 + }])) + + return elements + +def process_label(root_dir, student_id, label, content): + """Processes a single label (PDF) -> Annotated Image + Checkboxes.""" + copie_folder = f"Copie{student_id}" + pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf") + + if not os.path.exists(pdf_path): + return None, [] + + base_img = create_base_image(pdf_path) + if not base_img: + return None, [] + + # Extract Data + coordinates = content.get('coordinates', (0, 0)) + hmin = coordinates[0] + result = content.get('result', {}) + score = result.get('score', 0) + feedbacks = result.get('feedback', []) + + global_fb = [f for f in feedbacks if not f.get('box_2d')] + local_fb = [f for f in feedbacks if f.get('box_2d')] + local_fb.sort(key=lambda x: x['box_2d'][0]) + + checkbox_map = [] + + # 1. Render Header + header_elements = render_header(label, score, global_fb, base_img.width) + header_height = sum(x[0].height for x in header_elements) + + # 2. Assemble Base + Header + total_height = base_img.height + header_height + final_img = Image.new("RGB", (base_img.width + MARGIN_LEFT, total_height), "white") + + current_y = 0 + for img, boxes in header_elements: + final_img.paste(img, (0, current_y)) + for b in boxes: + b['final_box'] = [b['rel_box'][0], b['rel_box'][1] + current_y, + b['rel_box'][2], b['rel_box'][3] + current_y] + checkbox_map.append(b) + current_y += img.height + + image_offset_y = current_y + final_img.paste(base_img, (MARGIN_LEFT, image_offset_y)) + + # 3. Draw Local Annotations + draw = ImageDraw.Draw(final_img, "RGBA") + last_text_bottom = 0 + + for i, fb in enumerate(local_fb): + box = fb.get('box_2d') + if not box: continue + + ymin, xmin, ymax, xmax = box + target_ymin = (ymin - hmin) + image_offset_y + target_ymax = (ymax - hmin) + image_offset_y + target_xmin = xmin + MARGIN_LEFT + target_xmax = xmax + MARGIN_LEFT + + draw.rectangle([target_xmin, target_ymin, target_xmax, target_ymax], outline="red", width=3) + + rect_cb_box = draw_checkbox(draw, target_xmax - BOX_SIZE, target_ymin, BOX_SIZE) + checkbox_map.append({ + "type": "del_local", "label": label, "index": i, + "text_preview": fb['text'][:20], "final_box": rect_cb_box + }) + + txt_img_raw = safe_render_latex(fb['text'], width_px=ANNOT_WIDTH, bg_color=(255, 200, 200, 180), max_lines=3) + container_h = max(txt_img_raw.height, BOX_SIZE) + txt_img = Image.new("RGBA", (ANNOT_WIDTH, container_h), (255, 255, 255, 0)) + txt_img.paste(txt_img_raw, (0, 0)) + + d_txt = ImageDraw.Draw(txt_img) + draw_checkbox(d_txt, ANNOT_WIDTH - BOX_SIZE, 0, BOX_SIZE) + + center_y = (target_ymin + target_ymax) / 2 + paste_y = max(center_y - (txt_img.height / 2), image_offset_y) + if paste_y < last_text_bottom: paste_y = last_text_bottom + 5 + + req_h = int(paste_y + txt_img.height + 20) + if req_h > final_img.height: + new_final = Image.new("RGB", (final_img.width, req_h), "white") + new_final.paste(final_img, (0,0)) + final_img = new_final + draw = ImageDraw.Draw(final_img, "RGBA") + + final_img.paste(txt_img, (10, int(paste_y)), mask=txt_img) + checkbox_map.append({ + "type": "del_local", "label": label, "index": i, + "text_preview": fb['text'][:20], + "final_box": [10 + ANNOT_WIDTH - BOX_SIZE, int(paste_y), 10 + ANNOT_WIDTH, int(paste_y) + BOX_SIZE] + }) + last_text_bottom = paste_y + txt_img.height + + return final_img, checkbox_map + +import re +def natural_key(text): + return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))] + +def process_student(args): + """Thread worker: Processes one student.""" + root_dir, student_id, labels = args + print(f"Generating Checkable PDF for: {student_id}") + + output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir) + + label_images = [] + student_checkboxes = [] + processed_labels_order = [] + + for label, content in sorted(labels.items(), key=lambda x: natural_key(x[0])): + img, boxes = process_label(root_dir, student_id, label, content) + if img: + label_images.append(img) + student_checkboxes.append(boxes) + processed_labels_order.append(label) + + if not label_images: + return + + max_w = max(i.width for i in label_images) + total_h = sum(i.height for i in label_images) + concat_img = Image.new("RGB", (max_w, total_h), "white") + + final_json_map = [] + current_y = 0 + + for label_name, img, boxes in zip(processed_labels_order, label_images, student_checkboxes): + concat_img.paste(img, (0, current_y)) + + for item in boxes: + b = item['final_box'] + item['global_box'] = [b[0], b[1] + current_y, b[2], b[3] + current_y] + final_json_map.append(item) + + current_y += img.height + + with open(os.path.join(output_dir, "checkboxes.json"), "w") as f: + json.dump(final_json_map, f, indent=2) + + concat_img.save(os.path.join(output_dir, "Reference.png")) + concat_img.save(os.path.join(output_dir, "Concat.pdf"), "PDF", resolution=100.0) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python annotating_with_checks.py ") + sys.exit(1) + + root_dir = sys.argv[1] + results = annotating.make_dictionary(root_dir) + + + tasks = [(root_dir, sid, lbls) for sid, lbls in results.items()] + + # print(tasks) + + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + executor.map(process_student, tasks) diff --git a/reading_annotations.py b/reading_annotations.py new file mode 100644 index 0000000..7fd7cc9 --- /dev/null +++ b/reading_annotations.py @@ -0,0 +1,325 @@ +import sys +import os +import json +import numpy as np +import shutil +from PIL import Image, ImageChops +Image.MAX_IMAGE_PIXELS = None +from pdf2image import convert_from_path +import annotating # Reuse rendering logic + +def detect_checks_and_notes(output_dir): + """ + Returns: + actions: List of dicts {type, label, ...} for checked boxes + notes_img: RGBA image of manual notes (checks masked out) + """ + pdf_path = os.path.join(output_dir, "Concat_annotated.pdf") + ref_path = os.path.join(output_dir, "Reference.png") + json_path = os.path.join(output_dir, "checkboxes.json") + + if not (os.path.exists(pdf_path) and os.path.exists(ref_path)): + print(f"Missing files in {output_dir}") + return [], None + + # Load Coordinates + with open(json_path, 'r') as f: + boxes = json.load(f) + + # Load Reference + ref_img = Image.open(ref_path).convert("RGB") + + # Load User PDF (First page only, assuming it's one long strip) + # Warning: If the PDF is huge, pdf2image might split pages or OOM. + # Assuming user didn't change page dimensions/order. + try: + user_pages = convert_from_path(pdf_path) + except Exception as e: + print(f"Error reading PDF: {e}") + return [], None + print("Debug : user_pages", len(user_pages)) + # Concatenate PDF pages back to one image if user saved as multiple pages + # (Xournal++ might preserve the long format or split it) + total_h = sum(p.height for p in user_pages) + user_img = Image.new("RGB", (user_pages[0].width, total_h)) + y = 0 + for p in user_pages: + user_img.paste(p, (0, y)) + y += p.height + + # Resize user_img to match ref_img if slight mismatch (DPI export diffs) + if user_img.size != ref_img.size: + print("Debug : size mismatch : ", user_img.size, ref_img.size) + user_img = user_img.resize(ref_img.size, Image.Resampling.LANCZOS) + + # --- Detection Phase --- + actions = [] + + # Convert to numpy for analysis + ref_arr = np.array(ref_img) + user_arr = np.array(user_img) + + # Diff for analysis + # Simple absolute difference + diff = np.abs(ref_arr.astype(int) - user_arr.astype(int)).astype(np.uint8) + # Convert to grayscale for thresholding + diff_gray = np.mean(diff, axis=2) + + # Threshold for "Checked" + CHECK_THRESHOLD = 30 # intensity diff + DENSITY_THRESHOLD = 0.05 # 5% of pixels darkened + + # Mask to hide checkmarks from the "Notes" extraction + mask_img = Image.new("L", ref_img.size, 255) # White = keep, Black = hide + mask_draw = ImageDraw.Draw(mask_img) + + for box in boxes: + # global_box: [x1, y1, x2, y2] + b = box['global_box'] + x1, y1, x2, y2 = map(int, b) + + # Ensure bounds + x1, y1 = max(0, x1), max(0, y1) + x2, y2 = min(ref_img.width, x2), min(ref_img.height, y2) + + # Analyze ROI + roi = diff_gray[y1+5:y2-5, x1+5:x2-5] + if roi.size == 0: continue + + changed_pixels = np.sum(roi > CHECK_THRESHOLD) + density = changed_pixels / roi.size + + if density > DENSITY_THRESHOLD: + print("A checked box !", density, b) + actions.append(box) + # It's checked, so we mask this area out for manual notes + # Expand mask slightly to catch sloppy ticks + mask_draw.rectangle([x1-5, y1-5, x2+5, y2+5], fill=0) + else: + # print("A box, not checked !", density) + # Even if not "checked", mask the box area slightly to avoid + # artifacts if user hovered over it, though arguably we keep it. + # Let's strictly mask only if checked to verify detection? + # No, prompt says "not extract the part that are just checking". + # If user checked it, we mask it. + pass + + # --- Extraction Phase --- + # Create the "Manual Notes" layer + # Logic: User - Ref. If Diff is dark -> Note. + # We want a transparent image with just the pen strokes. + + # 1. Get difference image + diff_img = ImageChops.difference(ref_img, user_img).convert("L") + + # 2. Threshold to remove JPEG noise (white background isn't perfect) + # Pixels that are different enough: + diff_data = np.array(diff_img) + # Create alpha channel: 0 where no diff, 255 where diff + alpha = np.where(diff_data > 20, 255, 0).astype(np.uint8) + + # 3. Create output image (Black strokes, variable alpha) + # Or Copy user colors? Better to copy user pixels. + notes = user_img.convert("RGBA") + r, g, b, a = notes.split() + + # Combine the diff-based alpha with the box-mask + mask_arr = np.array(mask_img) + final_alpha = np.minimum(alpha, mask_arr) + + notes.putalpha(Image.fromarray(final_alpha)) + + return actions, notes + +from PIL import ImageDraw + +def apply_actions_and_regenerate(root_dir, data, student_id, actions, notes_layer): + """ + Modifies data based on actions, calls annotating.process_correction logic, + overlays notes, saves Concat.jpg. + """ + labels = data[student_id] + + # 1. Apply Actions to Data + # Sort actions to handle indices correctly (delete from end?) + # But we regenerate from dictionary, so modifying the dictionary is fine. + + # Separate actions by label + actions_by_label = {} + for a in actions: + l = a['label'] + if l not in actions_by_label: actions_by_label[l] = [] + actions_by_label[l].append(a) + + for label, acts in actions_by_label.items(): + if label not in labels: continue + + content = labels[label] + result = content['result'] + feedbacks = result.get('feedback', []) + + # Split feedbacks again to match indices + global_fb_indices = [i for i, f in enumerate(feedbacks) if not f.get('box_2d')] + local_fb_indices = [i for i, f in enumerate(feedbacks) if f.get('box_2d')] + # Sort local by Y to match generation order in annotating.py + local_fb_sorted_map = sorted(local_fb_indices, key=lambda i: feedbacks[i]['box_2d'][0]) + + items_to_remove = set() + + for act in acts: + if act['type'] == 'set_score': + result['score'] = act['value'] + print(f" > Updated score for {label} to {act['value']}") + + elif act['type'] == 'del_global': + # act['index'] is the index within the global_fb list + # We need to find the actual index in the main list + if act['index'] < len(global_fb_indices): + real_idx = global_fb_indices[act['index']] + items_to_remove.add(real_idx) + print(f" > Deleted global feedback in {label}") + + elif act['type'] == 'del_local': + # act['index'] is index in sorted local list + if act['index'] < len(local_fb_sorted_map): + real_idx = local_fb_sorted_map[act['index']] + items_to_remove.add(real_idx) + print(f" > Deleted local feedback in {label}") + + # Remove feedbacks (in reverse to preserve indices) + for idx in sorted(list(items_to_remove), reverse=True): + del feedbacks[idx] + + # 2. Regenerate Clean Image + # We use a temporary modified dictionary + temp_data = {student_id: labels} + + # Run the original process (but we need to intercept it to not save, or just let it save) + # annotating.process_correction saves to "Anot_CopieID". + # We want "Bnot_CopieID" (updated). + + # Hijack the output dir in logic or copy code? + # Easiest: Let's create a temporary helper or modify annotating logic slightly? + # The prompt implies we use `annotating.py` logic. + # Let's call `annotating.process_correction` but point it to a temp root or modify path? + # No, `process_correction` takes `root_dir` and writes to `Anot_...`. + # Let's just implement the rendering loop here to be safe and clean, + # overlaying the notes at the end. + + output_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") + # Don't delete output_dir, we need it. + + # ... (Reuse rendering logic from annotating.py exactly) ... + # See below for condensed integration + + final_concats = [] + + for label, content in labels.items(): + # ... [PDF to Image Conversion] ... + copie_folder = f"Copie{student_id}" + pdf_path = os.path.join(root_dir, copie_folder, f"{label}.pdf") + if not os.path.exists(pdf_path): continue + + pages = annotating.convert_from_path(pdf_path) + base_img = Image.new("RGBA", (max(p.width for p in pages), sum(p.height for p in pages)), "white") + y=0 + for p in pages: base_img.paste(p.convert("RGBA"), (0,y)); y+=p.height + + # ... [Draw Header/Margin (Clean)] ... + margin_left = 200 + result = content['result'] + coordinates = content.get('coordinates', (0,0)) + hmin = coordinates[0] + + score_text = f"{label} ; Note : {result.get('score', 0)}" + if result.get('error') and result.get('error') != "null": score_text += f" | Error: {result.get('error')}" + + header_imgs = [annotating.render_latex_text(score_text, base_img.width, fontsize=18)] + + feedbacks = result.get('feedback', []) + # Separate again (now cleaned) + global_fb = [f for f in feedbacks if not f.get('box_2d')] + local_fb = [f for f in feedbacks if f.get('box_2d')] + local_fb.sort(key=lambda x: x['box_2d'][0]) + + for fb in global_fb: header_imgs.append(annotating.render_latex_text(fb['text'], base_img.width)) + + total_h = base_img.height + sum(i.height for i in header_imgs) + label_img = Image.new("RGB", (base_img.width + margin_left, total_h), "white") + + cy = 0 + for i in header_imgs: label_img.paste(i, (0, cy)); cy+=i.height + img_offset_y = cy + label_img.paste(base_img, (margin_left, img_offset_y)) + + draw = ImageDraw.Draw(label_img, "RGBA") + last_bot = 0 + for fb in local_fb: + box = fb['box_2d'] + ymin, xmin, ymax, xmax = box + t_ymin = (ymin - hmin) + img_offset_y + t_ymax = (ymax - hmin) + img_offset_y + draw.rectangle([xmin+margin_left, t_ymin, xmax+margin_left, t_ymax], outline="red", width=3) + + txt = annotating.render_latex_text(fb['text'], 500, (255,200,200,180), max_lines=3) + py = max((t_ymin+t_ymax)/2 - txt.height/2, img_offset_y) + if py < last_bot: py = last_bot + 5 + + if py + txt.height + 20 > label_img.height: + new_l = Image.new("RGB", (label_img.width, int(py+txt.height+20)), "white") + new_l.paste(label_img, (0,0)) + label_img = new_l + draw = ImageDraw.Draw(label_img, "RGBA") + + label_img.paste(txt, (10, int(py)), mask=txt) + last_bot = py + txt.height + + final_concats.append(label_img) + + # Concatenate Labels + if not final_concats: return + + mw = max(i.width for i in final_concats) + th = sum(i.height for i in final_concats) + full_clean_img = Image.new("RGB", (mw, th), "white") + y=0 + for i in final_concats: + full_clean_img.paste(i, (0,y)) + y+=i.height + + # 3. Overlay Manual Notes + if notes_layer: + # Notes layer might be different size if regenerated image size changed (e.g. deleted comments reduced height) + # However, usually reducing content reduces height, so we align top-left. + # But `notes_layer` is based on the "Reference.png" which had boxes. + # The new `full_clean_img` does NOT have boxes. The dimensions should be identical + # unless removing a feedback at the very bottom shrinks the image. + + # We paste notes_layer on top. + full_clean_img.paste(notes_layer, (0,0), mask=notes_layer) + + # Save final Concat.jpg + final_path = os.path.join(output_dir, "Concat.jpg") + full_clean_img.save(final_path) + print(f"Saved regenerated: {final_path}") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python reading_annotations.py ") + sys.exit(1) + + root_dir = sys.argv[1] + + # Load original data + original_data = annotating.make_dictionary(root_dir) + + # Process each Bnot folder + for student_id in original_data.keys(): + bnot_dir = os.path.join(root_dir, "Bnot", f"Copie{student_id}") + if os.path.exists(bnot_dir): + print(f"Processing annotations for: {student_id}") + actions, notes = detect_checks_and_notes(bnot_dir) + if actions or notes: + apply_actions_and_regenerate(root_dir, original_data, student_id, actions, notes) + else: + print(" No changes detected or missing files.")