Files
Alchemist/scripts/StevensND/format_repo2.py
2025-05-31 06:57:14 -07:00

250 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import os
import shutil
import re
import sys
import unicodedata
# ----- Normalization & TitleCasing Helpers (same as other repos) -----
def sanitize_name(name):
"""
Remove accents and unwanted characters, replace ' - ' with a single space,
and collapse multiple spaces.
"""
normalized = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
cleaned = normalized.replace("'", "").replace("", "").replace("`", "").replace('"', "")
cleaned = cleaned.replace(" - ", " ")
cleaned = ' '.join(cleaned.split())
return cleaned.strip()
def capitalize_hyphenated(word):
"""
Capitalize both parts of a hyphenated word. E.g. "yooka-laylee""Yooka-Laylee".
"""
parts = word.split('-')
capitalized_parts = []
for part in parts:
if part:
capitalized_parts.append(part[0].upper() + part[1:].lower() if len(part) > 1 else part.upper())
else:
capitalized_parts.append('')
return '-'.join(capitalized_parts)
# Regex for Roman numerals up to 3999
ROMAN_NUMERAL_PATTERN = re.compile(
r"^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$",
re.IGNORECASE
)
# Known acronyms to forceuppercase exactly
ACRONYMS = {
"HD", "2D", "3D", "4K", "VR", "AI", "API", "USB", "CPU", "GPU", "DVD", "CD",
"RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC", "FFVII", "FFVIII", "FX", "FFIX", "FFX", "FFXII"
}
def is_roman_numeral(word):
"""
Return True if `word` is a valid Roman numeral (caseinsensitive).
"""
return bool(ROMAN_NUMERAL_PATTERN.match(word))
def title_case_preserve_numbers(name):
"""
Title-case with these rules:
• Fully uppercase acronyms remain unchanged (e.g. HD, 2D, 3D, FFVII, etc.).
• Roman numerals become fully uppercase (e.g. 'iii''III', 'xI''XI').
• Hyphenated words are capitalized on both sides (→ 'Yooka-Laylee').
• Small filler words (a, an, and, the, of, in, etc.) become lowercase
only if they appear in the middle and are not immediately after a subtitle marker,
except the first and last words (always capitalized).
• After a subtitle marker (":", "~", "", "", or "-"), force capitalization
on all subsequent words until the next subtitle marker or end.
• Compound Roman numerals joined by &, +, or | become fully uppercase
(e.g. "I&ii""I&II").
"""
lowercase_exceptions = {
"a", "an", "and", "as", "at", "but", "by", "for", "from",
"in", "nor", "of", "on", "or", "so", "the", "to", "with", "yet"
}
subtitle_markers = {":", "~", "-", "", ""}
words = name.split()
result = []
force_capitalize_mode = False
for idx, raw_word in enumerate(words):
contains_marker = any(marker in raw_word for marker in subtitle_markers)
split_parts = re.split(r'([:~\-–—])', raw_word)
rebuilt_parts = []
for part in split_parts:
if part in subtitle_markers:
# Keep marker, force next segments to capitalize
rebuilt_parts.append(part)
force_capitalize_mode = True
continue
lower_part = part.lower()
is_first = (idx == 0)
is_last = (idx == len(words) - 1)
def capitalize_special(subword):
if subword.upper() in ACRONYMS:
return subword.upper()
if is_roman_numeral(subword):
return subword.upper()
# Check for compound Roman numerals joined by &, +, |
for sep in ('&', '+', '|'):
if sep in subword:
pieces = subword.split(sep)
if all(is_roman_numeral(p) for p in pieces):
return sep.join(p.upper() for p in pieces)
return capitalize_hyphenated(subword)
if force_capitalize_mode or is_first or is_last or (lower_part not in lowercase_exceptions):
sub_parts = part.split('-')
rebuilt_parts.append('-'.join(capitalize_special(p) for p in sub_parts))
else:
rebuilt_parts.append(lower_part)
result.append(''.join(rebuilt_parts))
if not contains_marker:
force_capitalize_mode = False
# Always capitalize first and last words fully:
if result:
# First word
first_split = result[0].split('-')
new_first = []
for p in first_split:
if p.upper() in ACRONYMS or is_roman_numeral(p):
new_first.append(p.upper())
else:
new_first.append(capitalize_hyphenated(p))
result[0] = '-'.join(new_first)
# Last word
last_split = result[-1].split('-')
new_last = []
for p in last_split:
if p.upper() in ACRONYMS or is_roman_numeral(p):
new_last.append(p.upper())
else:
new_last.append(capitalize_hyphenated(p))
result[-1] = '-'.join(new_last)
return ' '.join(result)
def clean_title(name):
"""
Run sanitize_name() → title_case_preserve_numbers() in one shot.
"""
return title_case_preserve_numbers(sanitize_name(name))
# ----- Revised get_game_name_and_mod_name & Structure Logic -----
def get_game_name_and_mod_name(path, root_dir):
"""
Given a folder path where a .pchtxt lives, derive:
• game_name (honoring "[Country]" folders, ", The", etc.), then clean_title().
• mod_name (join of all subfolders except game, handling Aspect Ratio,
version suffix " vX", etc.), then clean_title().
"""
relative_path = os.path.relpath(path, root_dir)
parts = relative_path.split(os.sep)
# 1) Derive raw game folder (strip [tags], handle ", The")
raw_game = parts[0]
raw_game = re.sub(r'\[.*?\]', '', raw_game).strip()
if ', The' in raw_game:
p = raw_game.split(', The')
raw_game = f"The {p[0]}{p[1]}"
raw_game = raw_game.replace(" - ", " ")
# 2) Check for country folder like "[USA]" etc.
country = None
for p in parts[1:]:
if re.search(r'\[.*?\]', p):
country = re.sub(r'\[.*?\]', '', p).strip()
break
if country:
raw_game = f"{raw_game} ({country})"
# 3) Clean/title-case the game name:
game_name = clean_title(raw_game)
# 4) Derive raw mod_name
# a) If path contains "Aspect Ratio" segment, use that
if 'Aspect Ratio' in relative_path:
aspect_folder = os.path.basename(path)
raw_mod = f"Aspect Ratio {aspect_folder}"
else:
# b) Otherwise join all folder parts after the first as mod components
mod_parts = [re.sub(r'\[.*?\]', '', p).strip() for p in parts[1:]]
if mod_parts:
raw_mod = " ".join(mod_parts)
else:
raw_mod = ""
# c) If raw_mod ends in " v<digits>" → preserve version suffix as part of mod_name
m = re.search(r'(.*) v[0-9.]+$', raw_mod)
if m:
raw_mod = m.group(1)
raw_mod = raw_mod.strip()
# 5) Clean/title-case the mod_name:
mod_name = clean_title(raw_mod) if raw_mod else ""
return game_name, mod_name
def create_formatted_structure(folder_path):
"""
Walk `folder_path` for all `.pchtxt` files. For each:
1) Derive (game_name, mod_name) via get_game_name_and_mod_name.
2) Place the file under formatted/"<Game Name> - <Mod Name>"/"<version>.pchtxt".
"""
formatted_path = os.path.join(folder_path, 'formatted')
os.makedirs(formatted_path, exist_ok=True)
print(f"Creating formatted structure at: {formatted_path}\n")
for root, dirs, files in os.walk(folder_path):
# Skip anything already under "formatted"
if 'formatted' in root.split(os.sep):
continue
for file in files:
if not file.lower().endswith('.pchtxt'):
continue
game_name, mod_name = get_game_name_and_mod_name(root, folder_path)
version = file[:-len('.pchtxt')].strip()
# If mod_name ended up empty, we still create "Game Name - "
combined_dir_name = f"{game_name} - {mod_name}".rstrip()
new_dir = os.path.join(formatted_path, combined_dir_name)
os.makedirs(new_dir, exist_ok=True)
src = os.path.join(root, file)
dst = os.path.join(new_dir, f"{version}.pchtxt")
shutil.copy(src, dst)
print(f"Copied {src}{dst}")
print("\nDone!\n")
def main(folder_path):
create_formatted_structure(folder_path)
print("All files have been organized successfully.")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python format_repo_2.py /path/to/folder/")
sys.exit(1)
folder_path = sys.argv[1]
main(folder_path)