#!/usr/bin/env python3 import os import re import json SOURCE_DIR = os.path.join("overlay", "src", "ui", "gui") OUTPUT_FILE = os.path.join("overlay", "lang", "en.json") IGNORED_PREFIXES = ( "/", "sysclk", "hocclk", "horizonoc" ) UNTRANSLATABLE = { # Developers "Souldbminer", "Lightos_", # Contributors "Blaise25", # Testers "Samybigio2011", "Delta", "Miki1305", "Happy", "Flopsider", "Winnerboi77", "WE1ZARD", "Alvise", "agjeococh", "Xenshen", "Frost", # Special Thanks "ScriesM - Atmosphere CFW", "KazushiMe - Switch OC Suite", "hanai3bi - Switch OC Suite & EOS", "NaGaa95 - L4T-OC-Kernel", "B3711 - EOS", "RetroNX - sys-clk", "b0rd2death - Ultrahand", "MasaGratoR - Status Monitor", # RAM modules "HB-MGCH 4GB", "HM-MGCH 6GB", "HM-MGXX 8GB", "AM-MGCJ 4GB", "AM-MGCJ 8GB", "AA-MGCL 4GB", "AA-MGCL 8GB", "AB-MGCL 4GB", "x267 4GB", "NLE 4GB", "NEE 4GB", "NME 4GB", "WT:C 4GB", "WT:E 4GB", "WT:F 4GB", "WT:B 4GB", # Technical labels that must not be translated "NV Service", "Governor", "Speedo:", "%u.%u%u mV", "1333 RL", "1600 RL", "1866 RL", "2133 RL", "VDD2 + VDDQ", "VDD2 + Usage", "VDDQ + Usage", "SoC DVB Shift", "PCV Hijack", "Horizon OC Zeus", # Timing labels "t1 tRCD", "t2 tRP", "t3 tRAS", "t4 tRRD", "t5 tRFC", "t6 tRTW", "t7 tWTR", "t8 tREFI", "tREFI", "JEDEC", "App ID", "Profile", "USB Charger", "Handheld", "PD Charger", "Docked", # MHz warning strings with mixed technical terms "1581MHz Tbreak", "1683MHz Tbreak", } def extract_strings_from_file(filepath: str) -> list[str]: with open(filepath, "r", encoding="utf-8", errors="ignore") as f: content = f.read() pattern = r'"((?:[^"\\]|\\.)*)"' return re.findall(pattern, content) def should_include(s: str) -> bool: if not s or s.isspace(): return False stripped = s.strip() # --- Skip strings 5 characters or less --- if len(stripped) <= 5: return False # --- Prefix filters --- for prefix in IGNORED_PREFIXES: if s.startswith(prefix): return False # Skip raw unicode escape sequences if re.match(r"^\\u[0-9a-fA-F]", s): return False # --- File paths / includes --- if re.fullmatch(r"[a-zA-Z0-9_./\\-]+\.(h|hpp|cpp|c)", stripped): return False # --- Format specifiers --- if re.fullmatch(r"[%\d.*\-+lfdsuxXpLh ]*", stripped) and "%" in stripped: return False # --- Whitespace / escape sequences only --- if re.fullmatch(r"[\\nt ]*", stripped): return False # --- Pure numeric values with units --- if re.fullmatch(r"[+\- ]*\d+\.?\d*\s*(MHz|mV|mA|mW|Hz|ms|°C|%%|p)?", stripped): return False # --- Mixed format/unit junk --- if re.fullmatch(r"[%\d./*+\-ufdsxXlLhp ,°CM:HzWmVA\\n]+", stripped): return False # --- IPC function names --- if re.match(r"^hocClkIpc", stripped): return False # --- Escape sequences only --- if re.fullmatch(r"(\\[nt])+", stripped): return False # --- Unicode escape sequences only --- if re.fullmatch(r"[\s]*(\\u[0-9a-fA-F]{4}[\s]*)+", stripped): return False # --- Untranslatable set --- if stripped in UNTRANSLATABLE: return False return True def main(): seen: set[str] = set() strings: list[str] = [] if not os.path.isdir(SOURCE_DIR): print(f"Error: directory '{SOURCE_DIR}' not found.") return for filename in sorted(os.listdir(SOURCE_DIR)): if not filename.endswith((".cpp", ".h")): continue filepath = os.path.join(SOURCE_DIR, filename) for s in extract_strings_from_file(filepath): if s not in seen and should_include(s): seen.add(s) strings.append(s) translations = {s: s for s in strings} os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True) with open(OUTPUT_FILE, "w", encoding="utf-8") as f: f.write("{\n") items = list(translations.items()) for i, (key, val) in enumerate(items): k = json.dumps(key, ensure_ascii=False) v = json.dumps(val, ensure_ascii=False) comma = "," if i < len(items) - 1 else "" f.write(f" {k}: {v}{comma}\n") f.write("}\n") print(f"Extracted {len(translations)} unique strings from {SOURCE_DIR}") print(f"Written to {OUTPUT_FILE}") if __name__ == "__main__": main()