Cut Text/Sol/Persp into smaller parts, and use them
parent
3673bd6fe1
commit
b6a0f5d83f
|
|
@ -117,7 +117,8 @@ list \"feedback\", and possibly an \"error\". Like this example :
|
|||
}
|
||||
]
|
||||
|
||||
Here is the text of the exercice of the exam :
|
||||
Here is the text of the exercice (or the relevant part of the problem)
|
||||
of the exam :
|
||||
|
||||
```
|
||||
<<text>>
|
||||
|
|
@ -127,25 +128,31 @@ Here is a possible correct answer :
|
|||
|
||||
```
|
||||
<<corr>>
|
||||
```
|
||||
|
||||
Here is some additional scoring instructions :
|
||||
|
||||
```
|
||||
<<persp>>
|
||||
```
|
||||
|
||||
You are asked to score the question or exercice labeled `<<label>>`,
|
||||
do not score or give feedback to any other question."""
|
||||
|
||||
def make_prompt(full_label):
|
||||
l = full_label.split(" ")
|
||||
ex_label = l[0] + " " + l[1]
|
||||
text = (Path(INPUT_DIR) / "Text" / ex_label).read_text()
|
||||
corr = (Path(INPUT_DIR) / "Sol" / ex_label).read_text()
|
||||
persp = (Path(INPUT_DIR) / "Persp" / ex_label).read_text()
|
||||
if persp == "":
|
||||
perps = "There is no additional scoring instructions."
|
||||
# l = full_label.split(" ")
|
||||
# ex_label = l[0] + " " + l[1]
|
||||
# text = (Path(INPUT_DIR) / "Text" / ex_label).read_text()
|
||||
# corr = (Path(INPUT_DIR) / "Sol" / ex_label).read_text()
|
||||
# persp = (Path(INPUT_DIR) / "Persp" / ex_label).read_text()
|
||||
def read_longest_prefix_file(subdir):
|
||||
dir_path = Path(INPUT_DIR) / subdir
|
||||
matches = [f for f in dir_path.iterdir() if f.is_file() and full_label.startswith(f.name)]
|
||||
if not matches:
|
||||
return ""
|
||||
return max(matches, key=lambda f: len(f.name)).read_text()
|
||||
|
||||
text = read_longest_prefix_file("Text")
|
||||
corr = read_longest_prefix_file("Sol")
|
||||
persp = read_longest_prefix_file("Persp")
|
||||
|
||||
if persp != "":
|
||||
persp = "\n\nHere are additional scoring instructions : \n\n```\n" + persp +"\n```\n"
|
||||
return my_prompt.replace("<<text>>", text).replace("<<corr>>", corr).replace("<<persp>>", persp).replace("<<label>>", full_label)
|
||||
|
||||
from google import genai
|
||||
|
|
|
|||
205
enonce_info.py
205
enonce_info.py
|
|
@ -3,39 +3,90 @@ import os
|
|||
import glob
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
import re
|
||||
|
||||
ROMANS_CAP = ["", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"]
|
||||
ROMANS_LOW = ["", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"]
|
||||
|
||||
def replace_dots(text):
|
||||
# (?m) enables multiline mode so ^ matches start of each line
|
||||
return re.sub(r"(?m)^(\s*.)\.", r"\1)", text)
|
||||
|
||||
def replace_problem_labels(text):
|
||||
"""Replaces labels according to spaces depth when problem=True."""
|
||||
def repl(m):
|
||||
spaces = m.group(1)
|
||||
label = m.group(2)
|
||||
n = len(spaces)
|
||||
try:
|
||||
if n == 1 and label.isdigit(): # 1 space: 1) -> I)
|
||||
return f"{spaces}{ROMANS_CAP[int(label)]})"
|
||||
elif n == 4 and label.isalpha(): # 4 spaces: a) -> 1)
|
||||
return f"{spaces}{ord(label.lower()) - 96})"
|
||||
elif n == 7 and label.isdigit(): # 7 spaces: 1) -> a)
|
||||
return f"{spaces}{chr(96 + int(label))})"
|
||||
elif n == 10 and label.isdigit(): # 10 spaces: 1) -> i)
|
||||
return f"{spaces}{ROMANS_LOW[int(label)]})"
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
return m.group(0)
|
||||
|
||||
def format_indices(indices):
|
||||
"""Converts [2, 1] to '2)a)' based on requirements."""
|
||||
if not indices:
|
||||
return ""
|
||||
# Matches start of line, spaces, alphanumeric label, and closing parenthesis
|
||||
return re.sub(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)", repl, text)
|
||||
|
||||
# First level: numeric (1 -> 1))
|
||||
res = f"{indices[0]})"
|
||||
def format_indices(indices, problem=False):
|
||||
if not indices: return ""
|
||||
if not problem:
|
||||
res = f"{indices[0]})"
|
||||
if len(indices) > 1: res += f"{chr(96 + indices[1])})"
|
||||
if len(indices) > 2: res += f"{ROMANS_LOW[indices[2]]})"
|
||||
return res
|
||||
else:
|
||||
res = ""
|
||||
if len(indices) > 0: res += f"{ROMANS_CAP[indices[0]]})"
|
||||
if len(indices) > 1: res += f"{indices[1]})"
|
||||
if len(indices) > 2: res += f"{chr(96 + indices[2])})"
|
||||
if len(indices) > 3: res += f"{ROMANS_LOW[indices[3]]})"
|
||||
return res
|
||||
|
||||
# Second level: alpha (1 -> a))
|
||||
if len(indices) > 1:
|
||||
res += f"{chr(96 + indices[1])})"
|
||||
|
||||
return res
|
||||
def save_split_content(text, path, base_fname, problem):
|
||||
# Always save the main aggregated file
|
||||
with open(os.path.join(path, base_fname), 'w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
pattern = re.compile(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)")
|
||||
all_matches = list(pattern.finditer(text))
|
||||
|
||||
target_spaces = 4 if problem else 1
|
||||
splits = [m for m in all_matches if len(m.group(1)) == target_spaces]
|
||||
|
||||
for i, match in enumerate(splits):
|
||||
start_idx = match.start()
|
||||
end_idx = splits[i+1].start() if i + 1 < len(splits) else len(text)
|
||||
chunk = text[start_idx:end_idx].strip("\n")
|
||||
|
||||
label = match.group(2) + ")"
|
||||
|
||||
if problem:
|
||||
# Find the most recent 1-space match before this 4-space match
|
||||
sec_match = next((m for m in reversed(all_matches)
|
||||
if len(m.group(1)) == 1 and m.start() < match.start()), None)
|
||||
if sec_match:
|
||||
label = f"{sec_match.group(2)}){label}"
|
||||
|
||||
sub_fname = f"{base_fname} : {label}"
|
||||
|
||||
with open(os.path.join(path, sub_fname), 'w', encoding='utf-8') as f:
|
||||
f.write(chunk)
|
||||
|
||||
def process_directory(directory):
|
||||
# Find the first .tex file in the directory
|
||||
tex_files = glob.glob(os.path.join(directory, "*.tex"))
|
||||
if not tex_files:
|
||||
print(f"No .tex file found in {directory}. Looking in /Staging/Interro/")
|
||||
if directory[-1] == "/":
|
||||
int_name = directory[:-1]
|
||||
else:
|
||||
int_name = directory
|
||||
tex_path = os.path.join("~/Prépa/Staging/Interro/", int_name, ".tex")
|
||||
int_name = directory[:-1] if directory.endswith("/") else directory
|
||||
tex_path = os.path.join(os.path.expanduser("~"), "Prépa/Staging/Interro/", int_name, ".tex")
|
||||
if os.path.exists(tex_path):
|
||||
tex_file = tex_path
|
||||
else:
|
||||
|
|
@ -56,69 +107,95 @@ def process_directory(directory):
|
|||
labels_file = os.path.join(directory, "labels")
|
||||
current_ex_num = 1
|
||||
|
||||
# Read entirely to allow chunking
|
||||
with open(tex_file, 'r', encoding='utf-8') as f_in:
|
||||
content = f_in.read()
|
||||
|
||||
with open(tex_file, 'r', encoding='utf-8') as f_in, \
|
||||
open(labels_file, 'w', encoding='utf-8') as f_labels:
|
||||
for line in f_in:
|
||||
if line.startswith("%%SHEETINFO :"):
|
||||
try:
|
||||
json_str = line.split(":", 1)[1].strip()
|
||||
data = json.loads(json_str)
|
||||
# Split by the specific SHEETINFO tag
|
||||
blocks = content.split("%%SHEETINFO :")
|
||||
|
||||
# 2. Handle Labels
|
||||
indexes = data.get('indexes', [])
|
||||
if not indexes:
|
||||
f_labels.write(f"Ex {current_ex_num}\n")
|
||||
else:
|
||||
for item in indexes:
|
||||
suffix = format_indices(item['indices'])
|
||||
if suffix != "":
|
||||
f_labels.write(f"Ex {current_ex_num} : {suffix}\n")
|
||||
else:
|
||||
f_labels.write(f"Ex {current_ex_num}\n")
|
||||
with open(labels_file, 'w', encoding='utf-8') as f_labels:
|
||||
# Skip blocks[0] (content before first SHEETINFO)
|
||||
for block in blocks[1:]:
|
||||
parts_line = block.split("\n", 1)
|
||||
json_str = parts_line[0].strip()
|
||||
block_content = parts_line[1] if len(parts_line) > 1 else ""
|
||||
|
||||
# Construct 'ids' parameter
|
||||
ex_id = str(data['id'])
|
||||
selection = data.get('select')
|
||||
# Check if text until next SHEETINFO block contains \Roman
|
||||
problem = r"\Roman" in block_content
|
||||
|
||||
if selection is not None:
|
||||
# Format: "ID.sel1,sel2"
|
||||
sel_s = [i+1 for i in selection]
|
||||
ids = f"{ex_id}.{','.join(map(str, sel_s))}"
|
||||
else:
|
||||
ids = ex_id
|
||||
if not json_str:
|
||||
continue
|
||||
|
||||
# Construct URL
|
||||
url = f"http://localhost:8080/exercices/emacs/{ids}?pretty=true&all=true&persp=true"
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
|
||||
# Perform GET request
|
||||
with urllib.request.urlopen(url) as response:
|
||||
content = response.read().decode('utf-8')
|
||||
# 2. Handle Labels
|
||||
indexes = data.get('indexes', [])
|
||||
if not indexes:
|
||||
f_labels.write(f"Ex {current_ex_num}\n")
|
||||
else:
|
||||
for item in indexes:
|
||||
suffix = format_indices(item['indices'], problem)
|
||||
if suffix != "":
|
||||
f_labels.write(f"Ex {current_ex_num} : {suffix}\n")
|
||||
else:
|
||||
f_labels.write(f"Ex {current_ex_num}\n")
|
||||
|
||||
# 4. Split and Save content
|
||||
parts = content.split('###')
|
||||
# Construct 'ids' parameter
|
||||
ex_id = str(data['id'])
|
||||
selection = data.get('select')
|
||||
|
||||
# Ensure we have at least 3 parts, pad if necessary to avoid crashes
|
||||
while len(parts) < 3:
|
||||
parts.append("")
|
||||
if selection is not None:
|
||||
sel_s = [i+1 for i in selection]
|
||||
ids = f"{ex_id}.{','.join(map(str, sel_s))}"
|
||||
else:
|
||||
ids = ex_id
|
||||
|
||||
base_filename = f"Ex {current_ex_num}"
|
||||
# Construct URL (append pb=true if \Roman matched)
|
||||
url = f"http://localhost:8080/exercices/emacs/{ids}?pretty=true&all=true&persp=true"
|
||||
if problem:
|
||||
url += "&pb=true"
|
||||
|
||||
# Perform GET request
|
||||
with urllib.request.urlopen(url) as response:
|
||||
res_content = response.read().decode('utf-8')
|
||||
|
||||
# 4. Split and Save content
|
||||
parts = res_content.split('###')
|
||||
|
||||
# Ensure we have at least 3 parts
|
||||
while len(parts) < 3:
|
||||
parts.append("")
|
||||
|
||||
t_text = replace_dots(parts[0].strip("\n"))
|
||||
s_text = replace_dots(parts[1].strip("\n"))
|
||||
p_text = replace_dots(parts[2].strip("\n"))
|
||||
|
||||
# Apply hierarchy depth replace if problem context
|
||||
if problem:
|
||||
t_text = replace_problem_labels(t_text)
|
||||
s_text = replace_problem_labels(s_text)
|
||||
p_text = replace_problem_labels(p_text)
|
||||
|
||||
base_filename = f"Ex {current_ex_num}"
|
||||
|
||||
if problem:
|
||||
save_split_content(t_text, paths['Text'], base_filename, False)
|
||||
else:
|
||||
with open(os.path.join(paths['Text'], base_filename), 'w', encoding='utf-8') as f:
|
||||
f.write(replace_dots(parts[0].strip("\n")))
|
||||
f.write(t_text)
|
||||
|
||||
with open(os.path.join(paths['Sol'], base_filename), 'w', encoding='utf-8') as f:
|
||||
f.write(replace_dots(parts[1].strip("\n")))
|
||||
|
||||
with open(os.path.join(paths['Persp'], base_filename), 'w', encoding='utf-8') as f:
|
||||
f.write(replace_dots(parts[2].strip("\n")))
|
||||
save_split_content(s_text, paths['Sol'], base_filename, problem)
|
||||
save_split_content(p_text, paths['Persp'], base_filename, problem)
|
||||
|
||||
current_ex_num += 1
|
||||
current_ex_num += 1
|
||||
|
||||
except json.JSONDecodeError:
|
||||
print(f"Error decoding JSON in line: {line.strip()}")
|
||||
except Exception as e:
|
||||
print(f"Error processing {ids}: {e}")
|
||||
except json.JSONDecodeError:
|
||||
print(f"Error decoding JSON in block: {json_str}")
|
||||
except Exception as e:
|
||||
print(f"Error processing block {ex_id if 'ex_id' in locals() else 'unknown'}: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
|
|
|
|||
|
|
@ -272,7 +272,7 @@ class ImageViewer:
|
|||
|
||||
def on_open_ori_pdf(self, event):
|
||||
if self.is_viewing and self.current_json_path:
|
||||
pdf_path = "/home/sebastien/Staging/Interro/" + base_dir + "pdf"
|
||||
pdf_path = "/home/sebastien/Staging/Interro/" + str(base_dir) + "pdf"
|
||||
print(f"Opening {pdf_path}")
|
||||
subprocess.Popen(['xdg-open', pdf_path])
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue