import sys import os import time from pathlib import Path import argparse if len(sys.argv) < 2: sys.exit("Usage: python script.py ") arg_path = Path(sys.argv[1]) tasks = [] # List of tuples: (filepath_str, label_str) results = {} INPUT_DIR = str(arg_path) if not arg_path.exists(): sys.exit(f"Directory {INPUT_DIR} not found.") import json import ftfy INPUT_FILE = Path(INPUT_DIR) / "correction.json" OUPUT_FILE = Path(INPUT_DIR) / "fixed_correction.json" def clean_string(s: str) -> str: # remove null bytes s = s.replace("\x00", "") # fix encoding issues s = ftfy.fix_text(s) return s def clean_obj(obj): if isinstance(obj, str): return clean_string(obj) elif isinstance(obj, list): return [clean_obj(x) for x in obj] elif isinstance(obj, dict): return {k: clean_obj(v) for k, v in obj.items()} else: return obj with open(INPUT_FILE, "r", encoding="utf-8") as f: data = json.load(f) data = clean_obj(data) with open(OUTPUT_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) print("Fixed JSON saved to", OUTPUT_FILE)