Files
Alchemist/scripts/cucholix/titleid_patcher2.py
2025-08-19 20:17:51 -07:00

409 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import os
import json
import re
import unicodedata
import sys
from pathlib import Path
from difflib import get_close_matches
def load_title_database(db_path="US.en.json"):
"""Load the title database from JSON file."""
try:
with open(db_path, "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
print(f"❌ Title database not found: {db_path}")
sys.exit(1)
except json.JSONDecodeError:
print(f"❌ Invalid JSON in title database: {db_path}")
sys.exit(1)
def load_cnmts_database(cnmts_path="cnmts.json"):
"""Load the cnmts database from JSON file."""
try:
with open(cnmts_path, "r", encoding="utf-8") as f:
return json.load(f)
except FileNotFoundError:
print(f"❌ CNMTS database not found: {cnmts_path}")
sys.exit(1)
except json.JSONDecodeError:
print(f"❌ Invalid JSON in CNMTS database: {cnmts_path}")
sys.exit(1)
def sanitize_name(name):
"""Remove accents and unwanted characters, and replace ' - ' with a single space."""
normalized = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
cleaned = normalized.replace("'", "").replace("'", "").replace("`", "").replace('"', '')
cleaned = cleaned.replace(" - ", " ")
cleaned = ' '.join(cleaned.split())
return cleaned.strip()
def capitalize_hyphenated(word):
"""Capitalize both parts of a hyphenated word."""
parts = word.split('-')
capitalized_parts = []
for part in parts:
if part:
capitalized_parts.append(part[0].upper() + part[1:].lower() if len(part) > 1 else part.upper())
else:
capitalized_parts.append('')
return '-'.join(capitalized_parts)
ROMAN_NUMERAL_PATTERN = re.compile(
r"^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$",
re.IGNORECASE
)
ACRONYMS = {
"HD", "2D", "3D", "4K", "VR", "AI", "API", "USB", "CPU", "GPU", "DVD", "CD",
"RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC",
"FFVII", "FFVIII", "FFIX", "FFX", "FFXII",
"FX", "2K", "5K", "8K", "V1", "V2", "V3", "V4", "DOF"
}
def is_roman_numeral(word):
"""Return True if the word is a valid Roman numeral."""
return bool(ROMAN_NUMERAL_PATTERN.match(word))
def title_case_preserve_numbers(name):
"""Title-case with special rules for acronyms, Roman numerals, etc."""
lowercase_exceptions = {
"a", "an", "and", "as", "at", "but", "by", "for", "from",
"in", "nor", "of", "on", "or", "so", "the", "to", "with", "yet"
}
subtitle_markers = {":", "~", "-", "", ""}
words = name.split()
result = []
force_capitalize_mode = False
for idx, word in enumerate(words):
contains_marker = any(marker in word for marker in subtitle_markers)
split_parts = re.split(r'([:~\-–—])', word)
capitalized_parts = []
for part in split_parts:
if part in subtitle_markers:
capitalized_parts.append(part)
force_capitalize_mode = True
continue
lower_part = part.lower()
is_first = (idx == 0)
is_last = (idx == len(words) - 1)
def capitalize_special(w):
if w.upper() in ACRONYMS:
return w.upper()
if is_roman_numeral(w):
return w.upper()
for sep in ['&', '+', '|']:
if sep in w:
parts = w.split(sep)
if all(is_roman_numeral(p) for p in parts):
return sep.join(p.upper() for p in parts)
return capitalize_hyphenated(w)
if force_capitalize_mode or is_first or is_last or (lower_part not in lowercase_exceptions):
sub_parts = part.split('-')
capitalized_sub = [capitalize_special(sp) for sp in sub_parts]
capitalized_parts.append('-'.join(capitalized_sub))
else:
capitalized_parts.append(lower_part)
result.append(''.join(capitalized_parts))
if not contains_marker:
force_capitalize_mode = False
if result:
first_parts = result[0].split('-')
result[0] = '-'.join(
sp.upper() if (sp.upper() in ACRONYMS or is_roman_numeral(sp)) else capitalize_hyphenated(sp)
for sp in first_parts
)
last_parts = result[-1].split('-')
result[-1] = '-'.join(
sp.upper() if (sp.upper() in ACRONYMS or is_roman_numeral(sp)) else capitalize_hyphenated(sp)
for sp in last_parts
)
return ' '.join(result)
def normalize_title(text):
"""Normalize title names for matching."""
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
text = re.sub(r'[^a-zA-Z0-9 ]', '', text).lower()
return ' '.join(text.split())
def create_title_mapping(title_db):
"""Create normalized mapping of game name -> title ID."""
title_map = {}
normalized_title_map = {}
for entry in title_db.values():
name = entry.get("name")
tid = entry.get("id")
if name and tid:
title_map[name.strip()] = tid.strip()
normalized_title_map[normalize_title(name)] = tid.strip()
return title_map, normalized_title_map
def find_title_id_by_build_id(build_id, cnmts_db, debug=False):
"""Find title ID by matching build ID prefix against full build IDs in cnmts.json."""
if not build_id:
return None, None
build_id = build_id.upper()
for title_id, versions in cnmts_db.items():
for version_num, version_data in versions.items():
content_entries = version_data.get("contentEntries", [])
for entry in content_entries:
full_build_id = entry.get("buildId")
if full_build_id and entry.get("type") == 1:
# Check if the build ID from .pchtxt matches the start of the full build ID
if full_build_id.upper().startswith(build_id):
if debug:
return title_id.upper(), full_build_id
return title_id.upper(), None
return None, None
def get_title_id_from_name(game_name, normalized_title_map):
"""Fuzzy match game name to title ID using normalized names."""
if not game_name:
return None
norm_query = normalize_title(game_name)
# Exact match
if norm_query in normalized_title_map:
return normalized_title_map[norm_query]
# Substring match
for title_key in normalized_title_map.keys():
if norm_query in title_key:
return normalized_title_map[title_key]
# Fuzzy fallback
matches = get_close_matches(norm_query, normalized_title_map.keys(), n=1, cutoff=0.5)
if matches:
return normalized_title_map[matches[0]]
return None
def extract_game_name_from_folder(folder_name):
"""Extract and format game name from folder name."""
game_name = sanitize_name(folder_name)
# Remove trailing "Graphics" if it exists
if game_name.endswith("Graphics"):
game_name = game_name[:-len("Graphics")].strip()
# Apply title case formatting
game_name = title_case_preserve_numbers(game_name)
return game_name
def extract_nsobid_from_pchtxt(file_path):
"""Extract the nsobid (build ID) from a .pchtxt file."""
try:
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line.startswith("@nsobid-"):
# Extract the build ID after @nsobid-
build_id = line[8:] # Remove "@nsobid-"
return build_id.upper()
return None
except Exception as e:
print(f"❌ Error reading {file_path}: {e}")
return None
def extract_region_from_filename(filename):
"""Extract region code from filename like '1.0.3_US.pchtxt' -> 'US'."""
# Remove .pchtxt extension
name_without_ext = filename.replace('.pchtxt', '')
# Look for pattern like _XX where XX is 2-3 letter region code
region_match = re.search(r'_([A-Z]{2,3})$', name_without_ext)
if region_match:
return region_match.group(1)
return None
def patch_pchtxt_file(file_path, game_name, title_id):
"""Patch a single .pchtxt file with the title ID."""
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
# Find and update the header line (starts with #)
updated = False
for i, line in enumerate(lines):
if line.startswith("#"):
# Check if it already has a title ID in brackets
if "[" in line and "]" in line:
# Replace existing title ID
lines[i] = f"# {game_name} [{title_id}]\n"
else:
# Add title ID to existing header
lines[i] = line.strip() + f" [{title_id}]\n"
updated = True
break
if not updated:
# Find the line after @nsobid to insert header
nsobid_index = -1
for i, line in enumerate(lines):
if line.startswith("@nsobid-"):
nsobid_index = i
break
if nsobid_index >= 0:
# Insert after @nsobid line, but before any empty line
insert_index = nsobid_index + 1
if insert_index < len(lines) and lines[insert_index].strip() == "":
insert_index += 1
lines.insert(insert_index, f"# {game_name} [{title_id}]\n")
else:
# Insert at the beginning if no @nsobid found
lines.insert(0, f"# {game_name} [{title_id}]\n")
with open(file_path, "w", encoding="utf-8") as f:
f.writelines(lines)
return True
except Exception as e:
print(f"❌ Error processing {file_path}: {e}")
return False
def patch_original_files_with_buildid(root_folder, title_db_path="US.en.json", cnmts_db_path="cnmts.json"):
"""
Walk through the original folder structure and patch all .pchtxt files
with their corresponding title IDs using build ID mapping.
"""
# Load databases
title_db = load_title_database(title_db_path)
cnmts_db = load_cnmts_database(cnmts_db_path)
title_map, normalized_title_map = create_title_mapping(title_db)
print(f"Loaded {len(normalized_title_map)} titles from title database")
print(f"Loaded CNMTS database with {len(cnmts_db)} title entries")
print(f"Scanning folder: {root_folder}\n")
stats = {
'processed': 0,
'patched_by_buildid': 0,
'patched_by_name': 0,
'no_buildid': 0,
'no_title_id': 0,
'errors': 0,
'regional_variants': 0
}
for current_root, dirs, files in os.walk(root_folder):
# Skip the formatted directory if it exists
if 'formatted' in current_root:
continue
pchtxt_files = [f for f in files if f.lower().endswith('.pchtxt')]
if not pchtxt_files:
continue
# Extract game name from folder
folder_name = os.path.basename(current_root)
game_name = extract_game_name_from_folder(folder_name)
print(f"📁 {folder_name}")
print(f" Game: {game_name}")
# Group files by region to show regional variants
regional_files = {}
for file in pchtxt_files:
region = extract_region_from_filename(file)
if region:
regional_files[region] = file
stats['regional_variants'] += 1
if regional_files:
print(f" 🌍 Regional variants detected: {', '.join(regional_files.keys())}")
# Process each .pchtxt file in this folder
for file in pchtxt_files:
file_path = os.path.join(current_root, file)
stats['processed'] += 1
region = extract_region_from_filename(file)
region_suffix = f" ({region})" if region else ""
# First, try to get title ID from build ID
build_id = extract_nsobid_from_pchtxt(file_path)
title_id = None
method = None
if build_id:
title_id, full_build_id = find_title_id_by_build_id(build_id, cnmts_db, debug=True)
if title_id:
method = "build_id"
stats['patched_by_buildid'] += 1
print(f" 🔍 Build ID match: {build_id} -> {full_build_id}")
else:
print(f" ⚠️ Build ID {build_id} not found in CNMTS database")
else:
stats['no_buildid'] += 1
print(f" ⚠️ No build ID found in {file}")
# If build ID lookup failed, try name-based lookup
if not title_id:
title_id = get_title_id_from_name(game_name, normalized_title_map)
if title_id:
method = "name"
stats['patched_by_name'] += 1
if title_id:
if patch_pchtxt_file(file_path, game_name, title_id):
print(f" ✅ Patched: {file}{region_suffix} with [{title_id}] (via {method})")
else:
stats['errors'] += 1
else:
print(f" ❌ No title ID found for: {file}{region_suffix}")
stats['no_title_id'] += 1
print() # Empty line for readability
# Print summary
print("=" * 60)
print("SUMMARY:")
print(f"Files processed: {stats['processed']}")
print(f"Files patched via build ID: {stats['patched_by_buildid']}")
print(f"Files patched via name matching: {stats['patched_by_name']}")
print(f"Regional variant files detected: {stats['regional_variants']}")
print(f"Files with no build ID: {stats['no_buildid']}")
print(f"Files with no title ID found: {stats['no_title_id']}")
print(f"Errors: {stats['errors']}")
print("=" * 60)
def main():
if len(sys.argv) not in [2, 3, 4]:
print("Usage: python patch_buildid_titleids.py /path/to/root/folder [path/to/US.en.json] [path/to/cnmts.json]")
print("If database paths are not provided, it will look for 'US.en.json' and 'cnmts.json' in the current directory.")
sys.exit(1)
root_folder = sys.argv[1]
title_db_path = sys.argv[2] if len(sys.argv) >= 3 else "US.en.json"
cnmts_db_path = sys.argv[3] if len(sys.argv) == 4 else "cnmts.json"
if not os.path.exists(root_folder):
print(f"❌ Root folder not found: {root_folder}")
sys.exit(1)
patch_original_files_with_buildid(root_folder, title_db_path, cnmts_db_path)
if __name__ == "__main__":
main()