Copies/utils.py

30 lines
934 B
Python

import re
from pathlib import Path
def natural_key(text):
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
def read_all_labels(base_dir):
# return sorted(list(filter(None,
# (Path(base_dir) / "labels").read_text().splitlines())),
# key = natural_key)
return list(filter(None, (Path(base_dir) / "labels").read_text().splitlines()))
def enonce_total(base_dir):
text_dir = Path(base_dir) / 'Text'
if not text_dir.is_dir():
return ""
# Exclude .tex and .pdf files
files = [f for f in text_dir.iterdir()
if f.is_file() and f.suffix.lower() not in ('.tex', '.pdf')]
files.sort(key=lambda f: natural_key(f.name))
output = []
for filepath in files:
content = filepath.read_text(encoding='utf-8')
output.append(f"{filepath.name}\n{content}\n\n\n")
return "".join(output)