Copies/utils.py

26 lines
751 B
Python

import re
from pathlib import Path
def natural_key(text):
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', str(text))]
def read_all_labels(base_dir):
return sorted(list(filter(None,
(Path(base_dir) / "labels").read_text().splitlines())),
key = natural_key)
def enonce_total(base_dir):
text_dir = Path(base_dir) / 'Text'
if not text_dir.is_dir():
return ""
files = [f for f in text_dir.iterdir() if f.is_file()]
files.sort(key=lambda f: natural_key(f.name))
output = []
for filepath in files:
content = filepath.read_text(encoding='utf-8')
output.append(f"{filepath.name}\n{content}\n\n\n")
return "".join(output)