Files
Alchemist/scripts/cucholix/format_repo5.py
2025-05-31 06:39:55 -07:00

192 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import os
import shutil
import unicodedata
import sys
import re
def sanitize_name(name):
"""
Remove accents and unwanted characters, and replace ' - ' with a single space.
"""
normalized = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
cleaned = normalized.replace("'", "").replace("", "").replace("`", "").replace('"', '')
cleaned = cleaned.replace(" - ", " ") # Remove " - " to avoid duplication
cleaned = ' '.join(cleaned.split()) # Collapse multiple spaces
return cleaned.strip()
def capitalize_hyphenated(word):
"""
Capitalize both parts of a hyphenated word. E.g. "yooka-laylee""Yooka-Laylee".
"""
parts = word.split('-')
capitalized_parts = []
for part in parts:
if part:
capitalized_parts.append(part[0].upper() + part[1:].lower() if len(part) > 1 else part.upper())
else:
capitalized_parts.append('')
return '-'.join(capitalized_parts)
# Regex for Roman numerals (supports up to 3999): I, II, III, IV, V, VI, VII, VIII, IX, X, XI, etc.
ROMAN_NUMERAL_PATTERN = re.compile(
r"^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$",
re.IGNORECASE
)
# A set of known acronyms (fully uppercase) that should remain uppercase.
ACRONYMS = {
"HD", "2D", "3D", "4K", "VR", "AI", "API", "USB", "CPU", "GPU", "DVD", "CD",
"RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC", "FFVII", "FFVIII", "FX", "FFIX", "FFX", "FFXII"
}
def is_roman_numeral(word):
"""
Return True if the word is a valid Roman numeral (case-insensitive).
"""
return bool(ROMAN_NUMERAL_PATTERN.match(word))
def title_case_preserve_numbers(name):
"""
Title-case with these rules:
• Fully uppercase acronyms remain unchanged (e.g. HD, 2D, 3D, FFVII, etc.).
• Roman numerals become fully uppercase (e.g. iIi → III, xI → XI).
• Hyphenated words are capitalized on both sides (→ Yooka-Laylee).
• Conjoined Roman numerals with '&', '+', or '|' become fully uppercase (e.g. I&ii → I&II).
• Small filler words (a, an, and, the, of, in, etc.) become lowercase
only if they appear in the middle and are not immediately after a subtitle marker,
except the first and last words (which always capitalized).
• After a subtitle marker (":", "~", "", "", or "-"), force capitalization
on all subsequent words (until the next subtitle marker or end).
"""
lowercase_exceptions = {
"a", "an", "and", "as", "at", "but", "by", "for", "from",
"in", "nor", "of", "on", "or", "so", "the", "to", "with", "yet"
}
subtitle_markers = {":", "~", "-", "", ""}
words = name.split()
result = []
force_capitalize_mode = False # Once True, stays True until next subtitle marker
for idx, word in enumerate(words):
# Detect if this word contains any subtitle marker character
contains_marker = any(marker in word for marker in subtitle_markers)
# Split on subtitle markers but keep them in the list
split_parts = re.split(r'([:~\-–—])', word)
capitalized_parts = []
for part in split_parts:
if part in subtitle_markers:
# Append the marker itself, then force-capitalize subsequent parts
capitalized_parts.append(part)
force_capitalize_mode = True
continue
lower_part = part.lower()
is_first = (idx == 0)
is_last = (idx == len(words) - 1)
# Helper: capitalize a sub-word with special rules for acronyms, roman numerals, and conjoined numerals
def capitalize_special(w):
# If w (case-insensitive) is in our ACRONYMS set, uppercase it fully.
if w.upper() in ACRONYMS:
return w.upper()
# If w alone is a Roman numeral, uppercase it fully.
if is_roman_numeral(w):
return w.upper()
# Handle compound Roman numerals separated by &, +, or |
for sep in ['&', '+', '|']:
if sep in w:
parts = w.split(sep)
if all(is_roman_numeral(p) for p in parts):
return sep.join(p.upper() for p in parts)
# Otherwise, capitalize hyphenated words normally.
return capitalize_hyphenated(w)
# Decide how to capitalize this segment:
if force_capitalize_mode or is_first or is_last or (lower_part not in lowercase_exceptions):
# Split any hyphens, apply capitalize_special to each half
sub_parts = part.split('-')
capitalized_sub = [capitalize_special(sp) for sp in sub_parts]
capitalized_parts.append('-'.join(capitalized_sub))
else:
# In-the-middle filler word: keep lowercase
capitalized_parts.append(lower_part)
result.append(''.join(capitalized_parts))
# If this word did not contain a subtitle marker, stop forcing next capitalization
if not contains_marker:
force_capitalize_mode = False
# Always capitalize the FIRST and LAST words (using the same special rules):
if result:
first_parts = result[0].split('-')
result[0] = '-'.join(
sp.upper() if (sp.upper() in ACRONYMS or is_roman_numeral(sp)) else capitalize_hyphenated(sp)
for sp in first_parts
)
last_parts = result[-1].split('-')
result[-1] = '-'.join(
sp.upper() if (sp.upper() in ACRONYMS or is_roman_numeral(sp)) else capitalize_hyphenated(sp)
for sp in last_parts
)
return ' '.join(result)
def create_formatted_structure(root_folder):
"""
Walk root_folder for all .pchtxt files. For each one:
1. Extract folder name → raw game name.
2. sanitize_name() → remove weird characters.
3. Remove trailing "Graphics" if present.
4. title_case_preserve_numbers() to get final Game Name.
5. Create folder: formatted/<Game Name> - Graphics Mods/
6. Copy each .pchtxt into that folder as <version>.pchtxt.
"""
formatted_path = os.path.join(root_folder, 'formatted')
os.makedirs(formatted_path, exist_ok=True)
print(f"Creating formatted structure at: {formatted_path}\n")
for current_root, dirs, files in os.walk(root_folder):
for file in files:
if not file.lower().endswith('.pchtxt'):
continue
version = file[:-len('.pchtxt')].strip()
parent_dir = os.path.basename(current_root)
game_name = sanitize_name(parent_dir)
# Remove trailing "Graphics" if it exists (exact match at end)
if game_name.endswith("Graphics"):
game_name = game_name[:-len("Graphics")].strip()
# Title-case with acronyms, roman-numeral, and compound-numeral logic
game_name = title_case_preserve_numbers(game_name)
mod_name = "Graphics Mods"
target_dir = os.path.join(formatted_path, f"{game_name} - {mod_name}")
os.makedirs(target_dir, exist_ok=True)
source_path = os.path.join(current_root, file)
dest_path = os.path.join(target_dir, f"{version}.pchtxt")
shutil.copy2(source_path, dest_path)
print(f"Copied {source_path}{dest_path}")
print("\nDone!")
def main():
if len(sys.argv) != 2:
print("Usage: python collect_graphics_mods.py /path/to/root/folder")
sys.exit(1)
folder_path = sys.argv[1]
create_formatted_structure(folder_path)
if __name__ == "__main__":
main()