import sys import os import glob import json import urllib.request import re ROMANS_CAP = ["", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"] ROMANS_LOW = ["", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"] def replace_dots(text): # (?m) enables multiline mode so ^ matches start of each line return re.sub(r"(?m)^(\s*.)\.", r"\1)", text) def replace_problem_labels(text): """Replaces labels according to spaces depth when problem=True.""" def repl(m): spaces = m.group(1) label = m.group(2) n = len(spaces) try: if n == 1 and label.isdigit(): # 1 space: 1) -> I) return f"{spaces}{ROMANS_CAP[int(label)]})" elif n == 4 and label.isalpha(): # 4 spaces: a) -> 1) return f"{spaces}{ord(label.lower()) - 96})" elif n == 7 and label.isdigit(): # 7 spaces: 1) -> a) return f"{spaces}{chr(96 + int(label))})" elif n == 10 and label.isdigit(): # 10 spaces: 1) -> i) return f"{spaces}{ROMANS_LOW[int(label)]})" except (IndexError, ValueError): pass return m.group(0) # Matches start of line, spaces, alphanumeric label, and closing parenthesis return re.sub(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)", repl, text) def format_indices(indices, problem=False): if not indices: return "" if not problem: res = f"{indices[0]})" if len(indices) > 1: res += f"{chr(96 + indices[1])})" if len(indices) > 2: res += f"{ROMANS_LOW[indices[2]]})" return res else: res = "" if len(indices) > 0: res += f"{ROMANS_CAP[indices[0]]})" if len(indices) > 1: res += f"{indices[1]})" if len(indices) > 2: res += f"{chr(96 + indices[2])})" if len(indices) > 3: res += f"{ROMANS_LOW[indices[3]]})" return res def save_split_content(text, path, base_fname, problem): # Always save the main aggregated file with open(os.path.join(path, base_fname), 'w', encoding='utf-8') as f: f.write(text) pattern = re.compile(r"(?m)^([ \t]+)([a-zA-Z0-9]+)\)") all_matches = list(pattern.finditer(text)) target_spaces = 4 if problem else 1 splits = [m for m in all_matches if len(m.group(1)) == target_spaces] for i, match in enumerate(splits): start_idx = match.start() end_idx = splits[i+1].start() if i + 1 < len(splits) else len(text) chunk = text[start_idx:end_idx].strip("\n") label = match.group(2) + ")" if problem: # Find the most recent 1-space match before this 4-space match sec_match = next((m for m in reversed(all_matches) if len(m.group(1)) == 1 and m.start() < match.start()), None) if sec_match: label = f"{sec_match.group(2)}){label}" sub_fname = f"{base_fname} : {label}" with open(os.path.join(path, sub_fname), 'w', encoding='utf-8') as f: f.write(chunk) def process_directory(directory): # Find the first .tex file in the directory tex_files = glob.glob(os.path.join(directory, "*.tex")) if not tex_files: print(f"No .tex file found in {directory}. Looking in /Staging/Interro/") int_name = directory[:-1] if directory.endswith("/") else directory tex_path = os.path.join(os.path.expanduser("~"), "Prépa/Staging/Interro/", int_name, ".tex") if os.path.exists(tex_path): tex_file = tex_path else: print("Not found.") return else: tex_file = tex_files[0] # Prepare output directories paths = { 'Text': os.path.join(directory, "Text"), 'Sol': os.path.join(directory, "Sol"), 'Persp': os.path.join(directory, "Persp") } for p in paths.values(): os.makedirs(p, exist_ok=True) labels_file = os.path.join(directory, "labels") current_ex_num = 1 # Read entirely to allow chunking with open(tex_file, 'r', encoding='utf-8') as f_in: content = f_in.read() # Split by the specific SHEETINFO tag blocks = content.split("%%SHEETINFO :") with open(labels_file, 'w', encoding='utf-8') as f_labels: # Skip blocks[0] (content before first SHEETINFO) for block in blocks[1:]: parts_line = block.split("\n", 1) json_str = parts_line[0].strip() block_content = parts_line[1] if len(parts_line) > 1 else "" # Check if text until next SHEETINFO block contains \Roman problem = r"\Roman" in block_content if not json_str: continue try: data = json.loads(json_str) # 2. Handle Labels indexes = data.get('indexes', []) if not indexes: f_labels.write(f"Ex {current_ex_num}\n") else: for item in indexes: suffix = format_indices(item['indices'], problem) if suffix != "": f_labels.write(f"Ex {current_ex_num} : {suffix}\n") else: f_labels.write(f"Ex {current_ex_num}\n") # Construct 'ids' parameter ex_id = str(data['id']) selection = data.get('select') if selection is not None: sel_s = [i+1 for i in selection] ids = f"{ex_id}.{','.join(map(str, sel_s))}" else: ids = ex_id # Construct URL (append pb=true if \Roman matched) url = f"http://localhost:8080/exercices/emacs/{ids}?pretty=true&all=true&persp=true" if problem: url += "&pb=true" # Perform GET request with urllib.request.urlopen(url) as response: res_content = response.read().decode('utf-8') # 4. Split and Save content parts = res_content.split('###') # Ensure we have at least 3 parts while len(parts) < 3: parts.append("") t_text = replace_dots(parts[0].strip("\n")) s_text = replace_dots(parts[1].strip("\n")) p_text = replace_dots(parts[2].strip("\n")) # Apply hierarchy depth replace if problem context if problem: t_text = replace_problem_labels(t_text) s_text = replace_problem_labels(s_text) p_text = replace_problem_labels(p_text) base_filename = f"Ex {current_ex_num}" if problem: save_split_content(t_text, paths['Text'], base_filename, False) else: with open(os.path.join(paths['Text'], base_filename), 'w', encoding='utf-8') as f: f.write(t_text) save_split_content(s_text, paths['Sol'], base_filename, problem) save_split_content(p_text, paths['Persp'], base_filename, problem) current_ex_num += 1 except json.JSONDecodeError: print(f"Error decoding JSON in block: {json_str}") except Exception as e: print(f"Error processing block {ex_id if 'ex_id' in locals() else 'unknown'}: {e}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python script.py ") sys.exit(1) process_directory(sys.argv[1])