Files
Horizon-OC/Source/sys-clk/template_gen.py
souldbminersmwc 6bd3ba7b3d final changes
2026-03-22 19:25:47 -04:00

200 lines
4.6 KiB
Python

#!/usr/bin/env python3
import os
import re
import json
SOURCE_DIR = os.path.join("overlay", "src", "ui", "gui")
OUTPUT_FILE = os.path.join("overlay", "lang", "en.json")
IGNORED_PREFIXES = (
"/",
"sysclk", "hocclk", "horizonoc"
)
UNTRANSLATABLE = {
# Developers
"Souldbminer",
"Lightos_",
# Contributors
"Blaise25",
# Testers
"Samybigio2011",
"Delta",
"Miki1305",
"Happy",
"Flopsider",
"Winnerboi77",
"WE1ZARD",
"Alvise",
"agjeococh",
"Xenshen",
"Frost",
# Special Thanks
"ScriesM - Atmosphere CFW",
"KazushiMe - Switch OC Suite",
"hanai3bi - Switch OC Suite & EOS",
"NaGaa95 - L4T-OC-Kernel",
"B3711 - EOS",
"RetroNX - sys-clk",
"b0rd2death - Ultrahand",
"MasaGratoR - Status Monitor",
# RAM modules
"HB-MGCH 4GB",
"HM-MGCH 6GB",
"HM-MGXX 8GB",
"AM-MGCJ 4GB",
"AM-MGCJ 8GB",
"AA-MGCL 4GB",
"AA-MGCL 8GB",
"AB-MGCL 4GB",
"x267 4GB",
"NLE 4GB",
"NEE 4GB",
"NME 4GB",
"WT:C 4GB",
"WT:E 4GB",
"WT:F 4GB",
"WT:B 4GB",
# Technical labels that must not be translated
"NV Service",
"Governor",
"Speedo:",
"%u.%u%u mV",
"1333 RL",
"1600 RL",
"1866 RL",
"2133 RL",
"VDD2 + VDDQ",
"VDD2 + Usage",
"VDDQ + Usage",
"SoC DVB Shift",
"PCV Hijack",
"Horizon OC Zeus",
# Timing labels
"t1 tRCD",
"t2 tRP",
"t3 tRAS",
"t4 tRRD",
"t5 tRFC",
"t6 tRTW",
"t7 tWTR",
"t8 tREFI",
"tREFI",
"JEDEC",
"App ID",
"Profile",
"USB Charger",
"Handheld",
"PD Charger",
"Docked",
# MHz warning strings with mixed technical terms
"1581MHz Tbreak",
"1683MHz Tbreak",
}
def extract_strings_from_file(filepath: str) -> list[str]:
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
pattern = r'"((?:[^"\\]|\\.)*)"'
return re.findall(pattern, content)
def should_include(s: str) -> bool:
if not s or s.isspace():
return False
stripped = s.strip()
# --- Skip strings 5 characters or less ---
if len(stripped) <= 5:
return False
# --- Prefix filters ---
for prefix in IGNORED_PREFIXES:
if s.startswith(prefix):
return False
# Skip raw unicode escape sequences
if re.match(r"^\\u[0-9a-fA-F]", s):
return False
# --- File paths / includes ---
if re.fullmatch(r"[a-zA-Z0-9_./\\-]+\.(h|hpp|cpp|c)", stripped):
return False
# --- Format specifiers ---
if re.fullmatch(r"[%\d.*\-+lfdsuxXpLh ]*", stripped) and "%" in stripped:
return False
# --- Whitespace / escape sequences only ---
if re.fullmatch(r"[\\nt ]*", stripped):
return False
# --- Pure numeric values with units ---
if re.fullmatch(r"[+\- ]*\d+\.?\d*\s*(MHz|mV|mA|mW|Hz|ms|°C|%%|p)?", stripped):
return False
# --- Mixed format/unit junk ---
if re.fullmatch(r"[%\d./*+\-ufdsxXlLhp ,°CM:HzWmVA\\n]+", stripped):
return False
# --- IPC function names ---
if re.match(r"^hocClkIpc", stripped):
return False
# --- Escape sequences only ---
if re.fullmatch(r"(\\[nt])+", stripped):
return False
# --- Unicode escape sequences only ---
if re.fullmatch(r"[\s]*(\\u[0-9a-fA-F]{4}[\s]*)+", stripped):
return False
# --- Untranslatable set ---
if stripped in UNTRANSLATABLE:
return False
return True
def main():
seen: set[str] = set()
strings: list[str] = []
if not os.path.isdir(SOURCE_DIR):
print(f"Error: directory '{SOURCE_DIR}' not found.")
return
for filename in sorted(os.listdir(SOURCE_DIR)):
if not filename.endswith((".cpp", ".h")):
continue
filepath = os.path.join(SOURCE_DIR, filename)
for s in extract_strings_from_file(filepath):
if s not in seen and should_include(s):
seen.add(s)
strings.append(s)
translations = {s: s for s in strings}
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write("{\n")
items = list(translations.items())
for i, (key, val) in enumerate(items):
k = json.dumps(key, ensure_ascii=False)
v = json.dumps(val, ensure_ascii=False)
comma = "," if i < len(items) - 1 else ""
f.write(f" {k}: {v}{comma}\n")
f.write("}\n")
print(f"Extracted {len(translations)} unique strings from {SOURCE_DIR}")
print(f"Written to {OUTPUT_FILE}")
if __name__ == "__main__":
main()