Cut Text/Sol/Persp into smaller parts, and use them

2026-04-21 14:13:04 +02:00 · 2026-04-21 14:13:04 +02:00 · b6a0f5d83f
parent 3673bd6fe1
commit b6a0f5d83f
3 changed files with 162 additions and 78 deletions
--- a/correction.py
+++ b/correction.py
@ -117,7 +117,8 @@ list \"feedback\", and possibly an \"error\". Like this example :
 }
 ]

-Here is the text of the exercice of the exam :
+Here is the text of the exercice (or the relevant part of the problem)
+of the exam :

 ```
 <<text>>
@ -127,25 +128,31 @@ Here is a possible correct answer :

 ```
 <<corr>>
-```
-
-Here is some additional scoring instructions :
-
 ```
 <<persp>>
-```

 You are asked to score the question or exercice labeled `<<label>>`,
 do not score or give feedback to any other question."""

 def make_prompt(full_label):
-    l = full_label.split(" ")
-    ex_label = l[0] + " " + l[1]
-    text = (Path(INPUT_DIR) / "Text" / ex_label).read_text()
-    corr = (Path(INPUT_DIR) / "Sol" / ex_label).read_text()
-    persp = (Path(INPUT_DIR) / "Persp" / ex_label).read_text()
-    if persp == "":
-        perps = "There is no additional scoring instructions."
+    # l = full_label.split(" ")
+    # ex_label = l[0] + " " + l[1]
+    # text = (Path(INPUT_DIR) / "Text" / ex_label).read_text()
+    # corr = (Path(INPUT_DIR) / "Sol" / ex_label).read_text()
+    # persp = (Path(INPUT_DIR) / "Persp" / ex_label).read_text()
+    def read_longest_prefix_file(subdir):
+        dir_path = Path(INPUT_DIR) / subdir
+        matches = [f for f in dir_path.iterdir() if f.is_file() and full_label.startswith(f.name)]
+        if not matches:
+            return ""
+        return max(matches, key=lambda f: len(f.name)).read_text()
+
+    text = read_longest_prefix_file("Text")
+    corr = read_longest_prefix_file("Sol")
+    persp = read_longest_prefix_file("Persp")
+
+    if persp != "":
+        persp = "\n\nHere are additional scoring instructions : \n\n```\n" + persp +"\n```\n"
    return my_prompt.replace("<<text>>", text).replace("<<corr>>", corr).replace("<<persp>>", persp).replace("<<label>>", full_label)

 from google import genai
--- a/enonce_info.py
+++ b/enonce_info.py
@ -3,39 +3,90 @@ import os
 import glob
 import json
 import urllib.request
-
 import re

+ROMANS_CAP = ["", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"]
+ROMANS_LOW = ["", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"]
+
 def replace_dots(text):
    # (?m) enables multiline mode so ^ matches start of each line
    return re.sub(r"(?m)^(\s*.)\.", r"\1)", text)

+def replace_problem_labels(text):
+    """Replaces labels according to spaces depth when problem=True."""
+    def repl(m):
+        spaces = m.group(1)
+        label = m.group(2)
+        n = len(spaces)
+        try:
+            if n == 1 and label.isdigit():     # 1 space: 1) -> I)
+                return f"{spaces}{ROMANS_CAP[int(label)]})"
+            elif n == 4 and label.isalpha():   # 4 spaces: a) -> 1)
+                return f"{spaces}{ord(label.lower()) - 96})"
+            elif n == 7 and label.isdigit():   # 7 spaces: 1) -> a)
+                return f"{spaces}{chr(96 + int(label))})"
+            elif n == 10 and label.isdigit():  # 10 spaces: 1) -> i)
+                return f"{spaces}{ROMANS_LOW[int(label)]})"
+        except (IndexError, ValueError):
+            pass
+        return m.group(0)

-def format_indices(indices):
-    """Converts [2, 1] to '2)a)' based on requirements."""
-    if not indices:
-        return ""
+    # Matches start of line, spaces, alphanumeric label, and closing parenthesis
+    return re.sub(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)", repl, text)

-    # First level: numeric (1 -> 1))
-    res = f"{indices[0]})"
+def format_indices(indices, problem=False):
+    if not indices: return ""
+    if not problem:
+        res = f"{indices[0]})"
+        if len(indices) > 1: res += f"{chr(96 + indices[1])})"
+        if len(indices) > 2: res += f"{ROMANS_LOW[indices[2]]})"
+        return res
+    else:
+        res = ""
+        if len(indices) > 0: res += f"{ROMANS_CAP[indices[0]]})"
+        if len(indices) > 1: res += f"{indices[1]})"
+        if len(indices) > 2: res += f"{chr(96 + indices[2])})"
+        if len(indices) > 3: res += f"{ROMANS_LOW[indices[3]]})"
+        return res

-    # Second level: alpha (1 -> a))
-    if len(indices) > 1:
-        res += f"{chr(96 + indices[1])})"

-    return res
+def save_split_content(text, path, base_fname, problem):
+    # Always save the main aggregated file
+    with open(os.path.join(path, base_fname), 'w', encoding='utf-8') as f:
+        f.write(text)

+    pattern = re.compile(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)")
+    all_matches = list(pattern.finditer(text))
+
+    target_spaces = 4 if problem else 1
+    splits = [m for m in all_matches if len(m.group(1)) == target_spaces]
+
+    for i, match in enumerate(splits):
+        start_idx = match.start()
+        end_idx = splits[i+1].start() if i + 1 < len(splits) else len(text)
+        chunk = text[start_idx:end_idx].strip("\n")
+
+        label = match.group(2) + ")"
+
+        if problem:
+            # Find the most recent 1-space match before this 4-space match
+            sec_match = next((m for m in reversed(all_matches)
+                              if len(m.group(1)) == 1 and m.start() < match.start()), None)
+            if sec_match:
+                label = f"{sec_match.group(2)}){label}"
+
+        sub_fname = f"{base_fname} : {label}"
+
+        with open(os.path.join(path, sub_fname), 'w', encoding='utf-8') as f:
+            f.write(chunk)

 def process_directory(directory):
    # Find the first .tex file in the directory
    tex_files = glob.glob(os.path.join(directory, "*.tex"))
    if not tex_files:
        print(f"No .tex file found in {directory}. Looking in /Staging/Interro/")
-        if directory[-1] == "/":
-            int_name =  directory[:-1]
-        else:
-            int_name = directory
-        tex_path = os.path.join("~/Prépa/Staging/Interro/", int_name, ".tex")
+        int_name = directory[:-1] if directory.endswith("/") else directory
+        tex_path = os.path.join(os.path.expanduser("~"), "Prépa/Staging/Interro/", int_name, ".tex")
        if os.path.exists(tex_path):
            tex_file = tex_path
        else:
@ -56,69 +107,95 @@ def process_directory(directory):
    labels_file = os.path.join(directory, "labels")
    current_ex_num = 1

+    # Read entirely to allow chunking
+    with open(tex_file, 'r', encoding='utf-8') as f_in:
+        content = f_in.read()

-    with open(tex_file, 'r', encoding='utf-8') as f_in, \
-         open(labels_file, 'w', encoding='utf-8') as f_labels:
-        for line in f_in:
-            if line.startswith("%%SHEETINFO :"):
-                try:
-                    json_str = line.split(":", 1)[1].strip()
-                    data = json.loads(json_str)
+    # Split by the specific SHEETINFO tag
+    blocks = content.split("%%SHEETINFO :")

-                    # 2. Handle Labels
-                    indexes = data.get('indexes', [])
-                    if not indexes:
-                        f_labels.write(f"Ex {current_ex_num}\n")
-                    else:
-                        for item in indexes:
-                            suffix = format_indices(item['indices'])
-                            if suffix != "":
-                                f_labels.write(f"Ex {current_ex_num} : {suffix}\n")
-                            else:
-                                f_labels.write(f"Ex {current_ex_num}\n")
+    with open(labels_file, 'w', encoding='utf-8') as f_labels:
+        # Skip blocks[0] (content before first SHEETINFO)
+        for block in blocks[1:]:
+            parts_line = block.split("\n", 1)
+            json_str = parts_line[0].strip()
+            block_content = parts_line[1] if len(parts_line) > 1 else ""

-                    # Construct 'ids' parameter
-                    ex_id = str(data['id'])
-                    selection = data.get('select')
+            # Check if text until next SHEETINFO block contains \Roman
+            problem = r"\Roman" in block_content

-                    if selection is not None:
-                        # Format: "ID.sel1,sel2"
-                        sel_s = [i+1 for i in selection]
-                        ids = f"{ex_id}.{','.join(map(str, sel_s))}"
-                    else:
-                        ids = ex_id
+            if not json_str:
+                continue

-                    # Construct URL
-                    url = f"http://localhost:8080/exercices/emacs/{ids}?pretty=true&all=true&persp=true"
+            try:
+                data = json.loads(json_str)

-                    # Perform GET request
-                    with urllib.request.urlopen(url) as response:
-                        content = response.read().decode('utf-8')
+                # 2. Handle Labels
+                indexes = data.get('indexes', [])
+                if not indexes:
+                    f_labels.write(f"Ex {current_ex_num}\n")
+                else:
+                    for item in indexes:
+                        suffix = format_indices(item['indices'], problem)
+                        if suffix != "":
+                            f_labels.write(f"Ex {current_ex_num} : {suffix}\n")
+                        else:
+                            f_labels.write(f"Ex {current_ex_num}\n")

-                    # 4. Split and Save content
-                    parts = content.split('###')
+                # Construct 'ids' parameter
+                ex_id = str(data['id'])
+                selection = data.get('select')

-                    # Ensure we have at least 3 parts, pad if necessary to avoid crashes
-                    while len(parts) < 3:
-                        parts.append("")
+                if selection is not None:
+                    sel_s = [i+1 for i in selection]
+                    ids = f"{ex_id}.{','.join(map(str, sel_s))}"
+                else:
+                    ids = ex_id

-                    base_filename = f"Ex {current_ex_num}"
+                # Construct URL (append pb=true if \Roman matched)
+                url = f"http://localhost:8080/exercices/emacs/{ids}?pretty=true&all=true&persp=true"
+                if problem:
+                    url += "&pb=true"

+                # Perform GET request
+                with urllib.request.urlopen(url) as response:
+                    res_content = response.read().decode('utf-8')
+
+                # 4. Split and Save content
+                parts = res_content.split('###')
+
+                # Ensure we have at least 3 parts
+                while len(parts) < 3:
+                    parts.append("")
+
+                t_text = replace_dots(parts[0].strip("\n"))
+                s_text = replace_dots(parts[1].strip("\n"))
+                p_text = replace_dots(parts[2].strip("\n"))
+
+                # Apply hierarchy depth replace if problem context
+                if problem:
+                    t_text = replace_problem_labels(t_text)
+                    s_text = replace_problem_labels(s_text)
+                    p_text = replace_problem_labels(p_text)
+
+                base_filename = f"Ex {current_ex_num}"
+
+                if problem:
+                    save_split_content(t_text, paths['Text'], base_filename, False)
+                else:
                    with open(os.path.join(paths['Text'], base_filename), 'w', encoding='utf-8') as f:
-                        f.write(replace_dots(parts[0].strip("\n")))
+                        f.write(t_text)

-                    with open(os.path.join(paths['Sol'], base_filename), 'w', encoding='utf-8') as f:
-                        f.write(replace_dots(parts[1].strip("\n")))

-                    with open(os.path.join(paths['Persp'], base_filename), 'w', encoding='utf-8') as f:
-                        f.write(replace_dots(parts[2].strip("\n")))
+                save_split_content(s_text, paths['Sol'], base_filename, problem)
+                save_split_content(p_text, paths['Persp'], base_filename, problem)

-                    current_ex_num += 1
+                current_ex_num += 1

-                except json.JSONDecodeError:
-                    print(f"Error decoding JSON in line: {line.strip()}")
-                except Exception as e:
-                    print(f"Error processing {ids}: {e}")
+            except json.JSONDecodeError:
+                print(f"Error decoding JSON in block: {json_str}")
+            except Exception as e:
+                print(f"Error processing block {ex_id if 'ex_id' in locals() else 'unknown'}: {e}")

 if __name__ == "__main__":
    if len(sys.argv) < 2:
--- a/plotting.py
+++ b/plotting.py
@ -272,7 +272,7 @@ class ImageViewer:

    def on_open_ori_pdf(self, event):
        if self.is_viewing and self.current_json_path:
-            pdf_path = "/home/sebastien/Staging/Interro/" + base_dir + "pdf"
+            pdf_path = "/home/sebastien/Staging/Interro/" + str(base_dir) + "pdf"
            print(f"Opening {pdf_path}")
            subprocess.Popen(['xdg-open', pdf_path])