Copies/enonce_info.py

import sys
import os
import glob
import json
import urllib.request
import re
import subprocess
import tempfile
import shutil

def compile_to_pdf(text, output_pdf_path): # 21 cm + 3.8 (dimension de la marge de gauche)
    """Wraps text in a standalone template and compiles it to PDF."""
    latex_template = f"""\\documentclass[varwidth=24.8cm,margin=0.4cm]{{standalone}}
\\usepackage[utf8]{{inputenc}}
\\usepackage[T1]{{fontenc}}
\\usepackage{{lmodern}}
\\usepackage{{amsmath, amssymb}}
\\usepackage{{commands}}
\\usepackage{{minted}}
\\usepackage{{graphicx}}
\\usepackage{{enumitem}}
\\begin{{document}}
    \\begin{{minipage}}{{24.8cm}}
{text}
    \\end{{minipage}}
\\end{{document}}
"""
    with tempfile.TemporaryDirectory() as temp_dir:
        tex_filename = 'text.tex'
        pdf_filename = 'text.pdf'
        tex_path = os.path.join(temp_dir, tex_filename)

        with open(tex_path, 'w', encoding='utf-8') as f:
            f.write(latex_template)

        # Set TEXINPUTS so pdflatex can find commands.sty if it's in the current dir
        # env = os.environ.copy()
        # current_dir = os.getcwd()
        # env['TEXINPUTS'] = f".:{current_dir}:"

        try:
            subprocess.run(
                ['pdflatex', '-interaction=nonstopmode', tex_filename],
                cwd=temp_dir,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
                check=False
            )
            if "minted" in text:
                subprocess.run(
                    ['pdflatex', '-interaction=nonstopmode', tex_filename],
                    cwd=temp_dir,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    check=False)

            generated_pdf = os.path.join(temp_dir, pdf_filename)
            if os.path.exists(generated_pdf):
                shutil.move(generated_pdf, output_pdf_path)
        except Exception as e:
            print(f"Compilation error for {output_pdf_path}: {e}")

def fetch_and_save_sub_text(ex_id, indices, label, text_path):
    """Fetches text for a specific sub-question and saves it to Text/{label}.tex"""
    qinds = ",".join(map(str, indices))
    if qinds:
        url = f"http://localhost:8080/exercices/exo_q_text/{ex_id}/{qinds}"
    else:
        url = f"http://localhost:8080/exercices/exo_q_text/{ex_id}"
    try:
        with urllib.request.urlopen(url) as response:
            content = response.read().decode('utf-8')
            content = replace_dots(content.strip("\n"))
            with open(os.path.join(text_path, f"{label}.tex"), 'w', encoding='utf-8') as f:
                f.write(content)
            # Compile PDF
            pdf_file = os.path.join(text_path, f"{label}.pdf")
            compile_to_pdf(content, pdf_file)
    except Exception as e:
        print(f"Error fetching sub-text from {url}: {e}")

def fetch_and_save_sub_sol(ex_id, indices, label, sol_path):
    """Fetches text for a specific sub-question and saves it to Text/{label}.tex"""
    qinds = ",".join(map(str, indices))
    if qinds:
        url = f"http://localhost:8080/exercices/exo_q_sol/{ex_id}/{qinds}"
    else:
        url = f"http://localhost:8080/exercices/exo_q_sol/{ex_id}"
    try:
        with urllib.request.urlopen(url) as response:
            content = response.read().decode('utf-8')
            content = replace_dots(content.strip("\n"))
            with open(os.path.join(sol_path, f"{label}.tex"), 'w', encoding='utf-8') as f:
                f.write(content)
            # Compile PDF
            pdf_file = os.path.join(sol_path, f"{label}.pdf")
            compile_to_pdf(content, pdf_file)
    except Exception as e:
        print(f"Error fetching sub-text from {url}: {e}")


ROMANS_CAP = ["", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"]
ROMANS_LOW = ["", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"]

def replace_dots(text):
    # (?m) enables multiline mode so ^ matches start of each line
    return re.sub(r"(?m)^(\s*.)\.", r"\1)", text)

def replace_problem_labels(text):
    """Replaces labels according to spaces depth when problem=True."""
    def repl(m):
        spaces = m.group(1)
        label = m.group(2)
        n = len(spaces)
        try:
            if n == 1 and label.isdigit():     # 1 space: 1) -> I)
                return f"{spaces}{ROMANS_CAP[int(label)]})"
            elif n == 4 and label.isalpha():   # 4 spaces: a) -> 1)
                return f"{spaces}{ord(label.lower()) - 96})"
            elif n == 7 and label.isdigit():   # 7 spaces: 1) -> a)
                return f"{spaces}{chr(96 + int(label))})"
            elif n == 10 and label.isdigit():  # 10 spaces: 1) -> i)
                return f"{spaces}{ROMANS_LOW[int(label)]})"
        except (IndexError, ValueError):
            pass
        return m.group(0)

    # Matches start of line, spaces, alphanumeric label, and closing parenthesis
    return re.sub(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)", repl, text)

def format_indices(indices, problem=False):
    if not indices: return ""
    if not problem:
        res = f"{indices[0]})"
        if len(indices) > 1: res += f"{chr(96 + indices[1])})"
        if len(indices) > 2: res += f"{ROMANS_LOW[indices[2]]})"
        return res
    else:
        res = ""
        if len(indices) > 0: res += f"{ROMANS_CAP[indices[0]]})"
        if len(indices) > 1: res += f"{indices[1]})"
        if len(indices) > 2: res += f"{chr(96 + indices[2])})"
        if len(indices) > 3: res += f"{ROMANS_LOW[indices[3]]})"
        return res


def save_split_content(text, path, base_fname, problem):
    # Always save the main aggregated file
    with open(os.path.join(path, base_fname), 'w', encoding='utf-8') as f:
        f.write(text)


    pattern = re.compile(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)")
    all_matches = list(pattern.finditer(text))

    target_spaces = 4 if problem else 1
    splits = [m for m in all_matches if len(m.group(1)) == target_spaces]

    for i, match in enumerate(splits):
        start_idx = match.start()
        end_idx = splits[i+1].start() if i + 1 < len(splits) else len(text)
        chunk = text[start_idx:end_idx].strip("\n")

        label = match.group(2) + ")"

        if problem:
            # Find the most recent 1-space match before this 4-space match
            sec_match = next((m for m in reversed(all_matches)
                              if len(m.group(1)) == 1 and m.start() < match.start()), None)
            if sec_match:
                label = f"{sec_match.group(2)}){label}"

        sub_fname = f"{base_fname} : {label}"

        with open(os.path.join(path, sub_fname), 'w', encoding='utf-8') as f:
            f.write(chunk)


def process_directory(directory):
    # Find the first .tex file in the directory
    tex_files = glob.glob(os.path.join(directory, "*.tex"))
    if not tex_files:
        print(f"No .tex file found in {directory}. Looking in /Staging/Interro/")
        int_name = directory[:-1] if directory.endswith("/") else directory
        tex_path = os.path.join(os.path.expanduser("~"), "Prépa/Staging/Interro", f"{int_name}.tex")
        if os.path.exists(tex_path):
            tex_file = tex_path
        else:
            print("Not found in ", tex_path)
            return
    else:
        tex_file = tex_files[0]

    # Prepare output directories
    paths = {
        'Text': os.path.join(directory, "Text"),
        'Sol': os.path.join(directory, "Sol"),
        'Persp': os.path.join(directory, "Persp")
    }
    for p in paths.values():
        os.makedirs(p, exist_ok=True)

    labels_file = os.path.join(directory, "labels")
    current_ex_num = 1

    # Read entirely to allow chunking
    with open(tex_file, 'r', encoding='utf-8') as f_in:
        content = f_in.read()

    # Split by the specific SHEETINFO tag
    blocks = content.split("%%SHEETINFO :")

    with open(labels_file, 'w', encoding='utf-8') as f_labels:
        # Skip blocks[0] (content before first SHEETINFO)
        for block in blocks[1:]:
            parts_line = block.split("\n", 1)
            json_str = parts_line[0].strip()
            block_content = parts_line[1] if len(parts_line) > 1 else ""

            # Check if text until next SHEETINFO block contains \Roman
            problem = r"\Roman" in block_content

            if not json_str: continue

            try:
                data = json.loads(json_str)
                # Construct 'ids' parameter
                ex_id = str(data['id'])
                selection = data.get('select')

                if selection is not None:
                    sel_s = [i+1 for i in selection]
                    ids = f"{ex_id}.{','.join(map(str, sel_s))}"
                else:
                    ids = ex_id


                # 2. Handle Labels
                indexes = data.get('indexes', [])
                if not indexes:
                    label = f"Ex {current_ex_num}"
                    f_labels.write(f"{label}\n")
                    fetch_and_save_sub_text(ids, [], label, paths['Text'])
                    fetch_and_save_sub_sol(ids, [], label, paths['Sol'])
                else:
                    for item in indexes:
                        suffix = format_indices(item['indices'], problem)
                        label = f"Ex {current_ex_num}" + (f" : {suffix}" if suffix else "")
                        f_labels.write(f"{label}\n")
                        fetch_and_save_sub_text(ids, item['indices'], label, paths['Text'])
                        fetch_and_save_sub_sol(ids, item['indices'], label, paths['Sol'])


                # Construct URL (append pb=true if \Roman matched)
                url = f"http://localhost:8080/exercices/emacs/{ids}?pretty=true&all=true&persp=true"
                # if problem:
                    # url += "&pb=true"

                # Perform GET request
                with urllib.request.urlopen(url) as response:
                    res_content = response.read().decode('utf-8')

                # 4. Split and Save content
                parts = res_content.split('###')

                # Ensure we have at least 3 parts
                while len(parts) < 3:
                    parts.append("")

                t_text = replace_dots(parts[0].strip("\n"))
                s_text = replace_dots(parts[1].strip("\n"))
                p_text = replace_dots(parts[2].strip("\n"))

                # Apply hierarchy depth replace if problem context
                if problem:
                    t_text = replace_problem_labels(t_text)
                    s_text = replace_problem_labels(s_text)
                    p_text = replace_problem_labels(p_text)

                base_filename = f"Ex {current_ex_num}"

                if problem:
                    save_split_content(t_text, paths['Text'], base_filename, False)
                else:
                    with open(os.path.join(paths['Text'], base_filename), 'w', encoding='utf-8') as f:
                        f.write(t_text)


                save_split_content(s_text, paths['Sol'], base_filename, problem)
                save_split_content(p_text, paths['Persp'], base_filename, problem)

                current_ex_num += 1

            except json.JSONDecodeError:
                print(f"Error decoding JSON in block: {json_str}")
            except Exception as e:
                print(f"Error processing block {ex_id if 'ex_id' in locals() else 'unknown'}: {e}")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python script.py <Dir>")
        sys.exit(1)

    process_directory(sys.argv[1])