421 lines
14 KiB
Python
421 lines
14 KiB
Python
import sys
|
|
import os
|
|
import json
|
|
import glob
|
|
from PIL import Image
|
|
|
|
|
|
# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates}
|
|
# Coordinates are the real coordinates (hmin, hmax) of the image in the Group
|
|
# The gemini_result coordinates should be un-normalized !
|
|
def make_dictionary(root_dir):
|
|
correction_path = os.path.join(root_dir, "correction.json")
|
|
|
|
# Load correction data
|
|
try:
|
|
with open(correction_path, 'r', encoding='utf-8') as f:
|
|
corrections = json.load(f)
|
|
except FileNotFoundError:
|
|
print(f"Error: {correction_path} not found.")
|
|
sys.exit(1)
|
|
|
|
# Dictionary: keys are IDs
|
|
result_data = {}
|
|
|
|
# Iterate through labels and items in correction.json
|
|
for label, items in corrections.items():
|
|
items = sum(items, []) # Flatten
|
|
for item in items:
|
|
# print(item)
|
|
student_id = item['id']
|
|
result_obj = item['result']
|
|
|
|
# Find coordinates
|
|
coordinates = None
|
|
height,width= None, None
|
|
label_dir = os.path.join(root_dir, label)
|
|
|
|
# Search all json files in Dir/label
|
|
json_files = glob.glob(os.path.join(label_dir, "*.json"))
|
|
for jf in json_files:
|
|
try:
|
|
with open(jf, 'r', encoding='utf-8') as f:
|
|
coord_list = json.load(f)
|
|
# Format: [["id", x, y], ...]
|
|
for entry in coord_list:
|
|
if entry[0] == student_id:
|
|
coordinates = (entry[1], entry[2])
|
|
img_path = os.path.splitext(jf)[0] + ".jpg"
|
|
with Image.open(img_path) as img:
|
|
width, height = img.size
|
|
break
|
|
except json.JSONDecodeError:
|
|
continue
|
|
if coordinates:
|
|
break
|
|
|
|
# Construct PDF path: Dir/Copie{id}/{label}.pdf
|
|
pdf_path = os.path.join(root_dir, f"Copie{student_id}", f"{label}.pdf")
|
|
|
|
# Initialize dictionary structure for this ID if missing
|
|
if student_id not in result_data:
|
|
result_data[student_id] = {}
|
|
|
|
fb = result_obj.get("feedback", [])
|
|
for i in range(len(fb)):
|
|
el = fb[i]
|
|
if "box_2d" in el and el["box_2d"]:
|
|
el["box_2d"][0] = (el["box_2d"][0] * height)//1000
|
|
el["box_2d"][2] = (el["box_2d"][2] * height)//1000
|
|
el["box_2d"][1] = (el["box_2d"][1] * width)//1000
|
|
el["box_2d"][3] = (el["box_2d"][3] * width)//1000
|
|
|
|
# Populate the object
|
|
result_data[student_id][label] = {
|
|
"pdf_path": pdf_path,
|
|
"result": result_obj,
|
|
"coordinates": coordinates
|
|
}
|
|
|
|
return result_data
|
|
# output the resulting dictionary
|
|
# print(json.dumps(result_data, indent=2, ensure_ascii=False))
|
|
|
|
import io
|
|
import shutil
|
|
from pdf2image import convert_from_path
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import matplotlib.pyplot as plt
|
|
|
|
# plt.rcParams.update({ "text.usetex": True,
|
|
# "text.latex.preamble": r"\usepackage{bbold}"})
|
|
|
|
import re
|
|
import textwrap
|
|
|
|
def normalize_mathtext(text):
|
|
"""
|
|
Replaces LaTeX shortcuts not supported by Matplotlib's mathtext parser.
|
|
e.g. \\le -> \\leq, \\ge -> \\geq
|
|
Using lookahead (?![a-zA-Z]) prevents replacing \\left with \\leqft.
|
|
"""
|
|
text = re.sub(r'\\le(?![a-zA-Z])', r'\\leq', text)
|
|
text = re.sub(r'\\ge(?![a-zA-Z])', r'\\geq', text)
|
|
text = re.sub(r'\\implies', r'\\Rightarrow', text)
|
|
# Sometimes, Gemini escapes too much ? Not sure
|
|
text = text.replace("\\\\", "\\")
|
|
text = text.replace("\\llbracket", "[\\![")
|
|
text = text.replace("\\rrbracket", "]\\!]")
|
|
# Sometimes, Gemini doesn't escape enough. In the json, you should have \\f
|
|
text = text.replace('\f', r'\f')
|
|
text = re.sub('\u0010', "", text)
|
|
return text
|
|
|
|
import re
|
|
def wrap_latex_text(text, width_chars):
|
|
"""
|
|
Wraps text but keeps LaTeX math blocks ($...$) intact.
|
|
"""
|
|
# 1. Split text into chunks of: text, math, text, math...
|
|
# The regex looks for $...$ (non-greedy).
|
|
parts = re.split(r'(\$[^\$]+\$)', text)
|
|
|
|
# 2. Tokenize: Break plain text by spaces, keep math blocks whole.
|
|
tokens = []
|
|
for part in parts:
|
|
if part.startswith('$') and part.endswith('$'):
|
|
tokens.append(part) # Keep math block distinct
|
|
else:
|
|
tokens.extend(part.split()) # Split normal text by whitespace
|
|
|
|
# 3. Reconstruct lines using textwrap logic
|
|
lines = []
|
|
current_line = []
|
|
current_length = 0
|
|
|
|
for token in tokens:
|
|
# +1 for the space we will add
|
|
token_len = len(token)
|
|
|
|
if current_length + token_len + 1 > width_chars:
|
|
lines.append(" ".join(current_line))
|
|
current_line = [token]
|
|
current_length = token_len
|
|
else:
|
|
current_line.append(token)
|
|
current_length += token_len + 1
|
|
|
|
if current_line:
|
|
lines.append(" ".join(current_line))
|
|
|
|
res = "\n".join(lines)
|
|
return res
|
|
|
|
def render_latex_text(text, width_px, bg_color=(255, 255, 255, 255), max_lines=None,
|
|
fontsize=14):
|
|
# 1. Fix unsupported symbols
|
|
text = normalize_mathtext(text)
|
|
|
|
dpi = 100
|
|
fig_width = width_px / dpi
|
|
|
|
# Estimate characters per line based on width and font size (heuristic)
|
|
# FontSize 12 approx 0.5 inches wide for ~15 chars usually,
|
|
# but let's approximate: Width (inches) * ~10 chars/inch for size 12
|
|
chars_per_line = int(fig_width * 10)
|
|
|
|
# Pre-wrap the text respecting LaTeX boundaries
|
|
wrapped_text = wrap_latex_text(text, chars_per_line)
|
|
|
|
# Dynamic height based on actual number of lines
|
|
num_lines = wrapped_text.count('\n') + 1
|
|
if max_lines and num_lines > max_lines:
|
|
# logic to truncate if strictly necessary, or just expand
|
|
pass
|
|
|
|
# 0.3 inches per line buffer
|
|
fig_height = num_lines * 0.3 + 0.2
|
|
|
|
fig = plt.figure(figsize=(fig_width, fig_height), dpi=dpi)
|
|
|
|
# print(wrapped_text)
|
|
# print("\n\n")
|
|
# NOTE: wrap=False because we did it ourselves
|
|
plt.text(0.01, 0.95, wrapped_text, fontsize=fontsize,
|
|
verticalalignment='top', horizontalalignment='left',
|
|
wrap=False)
|
|
|
|
plt.axis('off')
|
|
|
|
buf = io.BytesIO()
|
|
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0.1, transparent=True)
|
|
plt.close(fig)
|
|
buf.seek(0)
|
|
|
|
img = Image.open(buf).convert("RGBA")
|
|
|
|
# Create background
|
|
final_img = Image.new("RGBA", img.size, bg_color)
|
|
final_img.alpha_composite(img)
|
|
return final_img
|
|
|
|
|
|
def process_correction(root_dir, data, all_labels):
|
|
margin_left = 200
|
|
|
|
for student_id, labels in data.items():
|
|
|
|
# Prepare output directory: Dir/Anot_CopieID
|
|
output_dir = os.path.join(root_dir, f"Anot_Copie{student_id}")
|
|
|
|
# Check if already processed (Concat.jpg exists)
|
|
concat_path = os.path.join(output_dir, "Concat.jpg")
|
|
if os.path.exists(concat_path):
|
|
print(f"Skipping Copie {student_id} (Concat.jpg exists)")
|
|
continue
|
|
|
|
print("Processing :", student_id)
|
|
|
|
# Clean folder if re-processing
|
|
if os.path.exists(output_dir):
|
|
shutil.rmtree(output_dir)
|
|
os.makedirs(output_dir)
|
|
|
|
d_notes = dict.fromkeys(all_labels,"")
|
|
|
|
for label, content in labels.items():
|
|
# 1. Find PDF path
|
|
copie_folder = f"Copie{student_id}"
|
|
pdf_rel_path = os.path.join(copie_folder, f"{label}.pdf")
|
|
pdf_full_path = os.path.join(root_dir, pdf_rel_path)
|
|
|
|
if not os.path.exists(pdf_full_path):
|
|
print(f"File not found: {pdf_full_path}")
|
|
continue
|
|
|
|
# 2. Convert PDF to Image
|
|
try:
|
|
pages = convert_from_path(pdf_full_path)
|
|
|
|
# Calculate total dimensions
|
|
total_h = sum(page.height for page in pages)
|
|
max_w = max(page.width for page in pages)
|
|
|
|
# Create concatenated base image
|
|
base_img = Image.new("RGBA", (max_w, total_h), "white")
|
|
|
|
current_y = 0
|
|
for page in pages:
|
|
base_img.paste(page.convert("RGBA"), (0, current_y))
|
|
current_y += page.height
|
|
except Exception as e:
|
|
print(f"Error converting {pdf_full_path}: {e}")
|
|
continue
|
|
|
|
coordinates = content.get('coordinates', (0, 0)) # (hmin, hmax)
|
|
hmin = coordinates[0]
|
|
result = content.get('result', {})
|
|
score = result.get('score', 0)
|
|
error = result.get('error', "")
|
|
feedbacks = result.get('feedback', [])
|
|
|
|
# Organize feedbacks
|
|
global_fb = [f for f in feedbacks if not f.get('box_2d')]
|
|
local_fb = [f for f in feedbacks if f.get('box_2d')]
|
|
# Sort local feedback by Y position
|
|
local_fb.sort(key=lambda x: x['box_2d'][0])
|
|
|
|
# --- PREPARE HEADERS ---
|
|
header_elements = []
|
|
score_text = f"{label} ; Note : {score}"
|
|
d_notes[label] = str(score)
|
|
if error and error != "null":
|
|
score_text += f" | Error: {error}"
|
|
|
|
# Render Row 1
|
|
row1_img = render_latex_text(score_text, base_img.width,fontsize=18)
|
|
header_elements.append(row1_img)
|
|
|
|
# --- OTHER HEADERS
|
|
# Render Global Feedbacks (Rows 2+)
|
|
for fb in global_fb:
|
|
fb_img = render_latex_text(fb['text'], base_img.width)
|
|
header_elements.append(fb_img)
|
|
|
|
# Calculate total new height
|
|
header_height = sum(img.height for img in header_elements)
|
|
total_height = base_img.height + header_height
|
|
|
|
# Create Canvas
|
|
final_img = Image.new("RGB", (base_img.width + margin_left, total_height), "white")
|
|
|
|
# Paste Headers
|
|
current_y = 0
|
|
for elem in header_elements:
|
|
final_img.paste(elem, (0, current_y))
|
|
current_y += elem.height
|
|
|
|
# Paste Original Image
|
|
# Note: current_y is now the offset for the actual image content
|
|
image_offset_y = current_y
|
|
final_img.paste(base_img, (margin_left, image_offset_y))
|
|
|
|
# --- DRAW LOCAL ANNOTATIONS ---
|
|
draw = ImageDraw.Draw(final_img, "RGBA")
|
|
|
|
last_text_bottom = 0
|
|
|
|
for fb in local_fb:
|
|
# raw_pos = fb.get('pos')
|
|
box = fb.get('box_2d')
|
|
if not box or len(box) < 4:
|
|
continue
|
|
|
|
ymin, xmin, ymax, xmax = box[0], box[1], box[2], box[3]
|
|
|
|
target_ymin = (ymin - hmin) + image_offset_y
|
|
target_ymax = (ymax - hmin) + image_offset_y
|
|
target_xmin = xmin + margin_left
|
|
target_xmax = xmax + margin_left
|
|
|
|
# Draw Rectangle
|
|
draw.rectangle([target_xmin, target_ymin, target_xmax, target_ymax], outline="red", width=3)
|
|
|
|
# Render Text with transparent red background
|
|
# (255, 0, 0, 50) is transparent red
|
|
txt_img = render_latex_text(
|
|
fb['text'],
|
|
width_px=500,
|
|
bg_color=(255, 200, 200, 180), # Light Red semi-transparent
|
|
max_lines=3
|
|
)
|
|
|
|
# Calculate placement
|
|
txt_h = txt_img.height
|
|
center_y = (target_ymin + target_ymax) / 2
|
|
paste_y = center_y - (txt_h / 2)
|
|
|
|
paste_y = max(paste_y, image_offset_y)
|
|
|
|
# Prevent overlap with previous text
|
|
if paste_y < last_text_bottom:
|
|
paste_y = last_text_bottom + 5 # Move down + padding
|
|
|
|
# Check for overflow and resize if necessary
|
|
required_height = int(paste_y + txt_h + 20) # +20 for bottom padding
|
|
if required_height > final_img.height:
|
|
# Create a new taller image
|
|
new_final = Image.new("RGB", (final_img.width, required_height), "white")
|
|
# Paste the current image content onto the new one
|
|
new_final.paste(final_img, (0, 0))
|
|
final_img = new_final
|
|
# Re-initialize the draw object for the new image so subsequent rectangles are drawn correctly
|
|
draw = ImageDraw.Draw(final_img, "RGBA")
|
|
|
|
|
|
# Paste in the left margin
|
|
final_img.paste(txt_img, (10, int(paste_y)), mask=txt_img)
|
|
last_text_bottom = paste_y + txt_h
|
|
|
|
# 7. Save Image
|
|
save_path = os.path.join(output_dir, f"{label}.jpg")
|
|
final_img.save(save_path)
|
|
|
|
json_path = os.path.join(output_dir, "score.json")
|
|
with open(json_path, "w") as f:
|
|
json.dump(d_notes, f, indent=4)
|
|
concat_display_image(output_dir)
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
import subprocess
|
|
|
|
def concat_display_image(subdir):
|
|
subdir = Path(subdir)
|
|
# Find valid images, excluding previous concatenations
|
|
images = sorted([
|
|
f for f in subdir.glob("*.jpg")
|
|
if f.name != "Concat.jpg"
|
|
])
|
|
|
|
if not images:
|
|
return
|
|
images.sort(key=lambda f: [int(n) for n in re.findall(r'\d+', str(f))])
|
|
|
|
|
|
# Load images
|
|
opened_imgs = [Image.open(img) for img in images]
|
|
|
|
# Calculate dimensions (max width, sum of heights)
|
|
max_w = max(i.width for i in opened_imgs)
|
|
total_h = sum(i.height for i in opened_imgs)
|
|
|
|
# Create canvas and paste vertically
|
|
canvas = Image.new('RGB', (max_w, total_h))
|
|
current_y = 0
|
|
for img in opened_imgs:
|
|
canvas.paste(img, (0, current_y))
|
|
current_y += img.height
|
|
|
|
# Save
|
|
save_path = subdir / "Concat.jpg"
|
|
canvas.save(save_path)
|
|
print(f"Saved: {save_path}")
|
|
# subprocess.call(('xdg-open', save_path))
|
|
|
|
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python script.py <Dir>")
|
|
sys.exit(1)
|
|
|
|
root_dir = sys.argv[1]
|
|
labels = list(filter(None, (Path(root_dir) / "labels").read_text().splitlines()))
|
|
results = make_dictionary(root_dir)
|
|
# Results is : Copie id -> label -> {pdf_path, gemini_result, coordinates}
|
|
# Coordinates are the real coordinates (hmin, hmax) of the image in the Group
|
|
# print(results,"\n\n\n")
|
|
process_correction(root_dir, results, labels)
|
|
# concat_anot_images(root_dir)
|