Update format_repo2.py

This commit is contained in:
ppkantorski
2025-05-31 07:17:11 -07:00
committed by GitHub
parent 4991ffafb6
commit 91e5b6178e

View File

@@ -5,13 +5,9 @@ import re
import sys
import unicodedata
# ----- Normalization & TitleCasing Helpers (same as other repos) -----
# ----- Normalization & TitleCasing Helpers -----
def sanitize_name(name):
"""
Remove accents and unwanted characters, replace ' - ' with a single space,
and collapse multiple spaces.
"""
normalized = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
cleaned = normalized.replace("'", "").replace("", "").replace("`", "").replace('"', "")
cleaned = cleaned.replace(" - ", " ")
@@ -19,9 +15,6 @@ def sanitize_name(name):
return cleaned.strip()
def capitalize_hyphenated(word):
"""
Capitalize both parts of a hyphenated word. E.g. "yooka-laylee""Yooka-Laylee".
"""
parts = word.split('-')
capitalized_parts = []
for part in parts:
@@ -31,44 +24,27 @@ def capitalize_hyphenated(word):
capitalized_parts.append('')
return '-'.join(capitalized_parts)
# Regex for Roman numerals up to 3999
ROMAN_NUMERAL_PATTERN = re.compile(
r"^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$",
re.IGNORECASE
)
# Known uppercase acronyms we want to preserve exactly
ACRONYMS = {
"HD", "2D", "3D", "4K", "VR", "AI", "API", "USB", "CPU", "GPU", "DVD", "CD",
"RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC", "FFVII", "FFVIII", "FX", "FFIX", "FFX", "FFXII", "2K", "V1", "V2", "V3", "V4"
"RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC",
"FFVII", "FFVIII", "FFIX", "FFX", "FFXII",
"FX", "2K", "V1", "V2", "V3", "V4"
}
def is_roman_numeral(word):
"""
Return True if `word` is a valid Roman numeral (caseinsensitive).
"""
return bool(ROMAN_NUMERAL_PATTERN.match(word))
def title_case_preserve_numbers(name):
"""
Title-case with these rules:
• Fully uppercase acronyms remain unchanged (e.g. HD, 2D, 3D, FFVII, etc.).
• Roman numerals become fully uppercase (e.g. 'iii''III', 'xI''XI').
• Hyphenated words are capitalized on both sides (→ 'Yooka-Laylee').
• Small filler words (a, an, and, the, of, in, etc.) become lowercase
only if they appear in the middle and are not immediately after a subtitle marker,
except the first and last words (always capitalized).
• After a subtitle marker (":", "~", "", "", or "-"), force capitalization
on all subsequent words until the next subtitle marker or end.
• Compound Roman numerals joined by &, +, or | become fully uppercase
(e.g. "I&ii""I&II").
"""
lowercase_exceptions = {
"a", "an", "and", "as", "at", "but", "by", "for", "from",
"in", "nor", "of", "on", "or", "so", "the", "to", "with", "yet"
}
subtitle_markers = {":", "~", "-", "", ""}
words = name.split()
result = []
force_capitalize_mode = False
@@ -80,7 +56,6 @@ def title_case_preserve_numbers(name):
for part in split_parts:
if part in subtitle_markers:
# Keep marker, force next segments to capitalize
rebuilt_parts.append(part)
force_capitalize_mode = True
continue
@@ -94,7 +69,6 @@ def title_case_preserve_numbers(name):
return subword.upper()
if is_roman_numeral(subword):
return subword.upper()
# Check for compound Roman numerals joined by &, +, |
for sep in ('&', '+', '|'):
if sep in subword:
pieces = subword.split(sep)
@@ -104,7 +78,7 @@ def title_case_preserve_numbers(name):
if force_capitalize_mode or is_first or is_last or (lower_part not in lowercase_exceptions):
sub_parts = part.split('-')
rebuilt_parts.append('-'.join(capitalize_special(p) for p in sub_parts))
rebuilt_parts.append('-'.join(capitalize_special(s) for s in sub_parts))
else:
rebuilt_parts.append(lower_part)
@@ -112,58 +86,41 @@ def title_case_preserve_numbers(name):
if not contains_marker:
force_capitalize_mode = False
# Always capitalize first and last words fully:
if result:
# First word
first_split = result[0].split('-')
new_first = []
for p in first_split:
if p.upper() in ACRONYMS or is_roman_numeral(p):
new_first.append(p.upper())
else:
new_first.append(capitalize_hyphenated(p))
result[0] = '-'.join(new_first)
result[0] = "-".join(capitalize_hyphenated(p) if not is_roman_numeral(p) else p.upper() for p in first_split)
# Last word
last_split = result[-1].split('-')
new_last = []
for p in last_split:
if p.upper() in ACRONYMS or is_roman_numeral(p):
new_last.append(p.upper())
else:
new_last.append(capitalize_hyphenated(p))
result[-1] = '-'.join(new_last)
result[-1] = "-".join(capitalize_hyphenated(p) if not is_roman_numeral(p) else p.upper() for p in last_split)
return ' '.join(result)
return " ".join(result)
def clean_title(name):
"""
Run sanitize_name() → title_case_preserve_numbers() in one shot.
"""
return title_case_preserve_numbers(sanitize_name(name))
# ----- Revised get_game_name_and_mod_name & Structure Logic -----
# ----- Game & Mod Name Logic -----
def strip_versions(text):
"""
Remove any substrings that look like version numbers, e.g.:
- 1.0, 1.2.3
- v1.0, v2.3.4
"""
return re.sub(r'\b(v?\d+(?:\.\d+){1,2})\b', '', text, flags=re.IGNORECASE).strip()
def get_game_name_and_mod_name(path, root_dir):
"""
Given a folder path where a .pchtxt lives, derive:
• game_name (honoring "[Country]" folders, ", The", etc.), then clean_title().
• mod_name (join of all subfolders except game, handling Aspect Ratio,
version suffix " vX", etc.), then clean_title().
"""
relative_path = os.path.relpath(path, root_dir)
parts = relative_path.split(os.sep)
# 1) Derive raw game folder (strip [tags], handle ", The")
raw_game = parts[0]
raw_game = re.sub(r'\[.*?\]', '', raw_game).strip()
if ', The' in raw_game:
p = raw_game.split(', The')
if ", The" in raw_game:
p = raw_game.split(", The")
raw_game = f"The {p[0]}{p[1]}"
raw_game = raw_game.replace(" - ", " ")
# 2) Check for country folder like "[USA]" etc.
country = None
for p in parts[1:]:
if re.search(r'\[.*?\]', p):
@@ -171,64 +128,66 @@ def get_game_name_and_mod_name(path, root_dir):
break
if country:
raw_game = f"{raw_game} ({country})"
# 3) Clean/title-case the game name:
game_name = clean_title(raw_game)
# 4) Derive raw mod_name
# a) If path contains "Aspect Ratio" segment, use that
if 'Aspect Ratio' in relative_path:
sub_folders = [ re.sub(r'\[.*?\]', '', p).strip() for p in parts[1:] ]
sub_folders = [sf for sf in sub_folders if sf.lower() != "pchtxt"]
if "Aspect Ratio" in relative_path:
aspect_folder = os.path.basename(path)
raw_mod = f"Aspect Ratio {aspect_folder}"
else:
# b) Otherwise join all folder parts after the first as mod components
mod_parts = [re.sub(r'\[.*?\]', '', p).strip() for p in parts[1:]]
if mod_parts:
raw_mod = " ".join(mod_parts)
if sub_folders:
m = re.match(r'^([0-9]+(?:\.[0-9]+)*)\s*(.*)$', sub_folders[0])
if m:
trailing = m.group(2).strip()
if trailing:
sub_folders[0] = trailing
else:
sub_folders = sub_folders[1:]
if country and sub_folders:
prefix = country.lower()
candidate = sub_folders[0].lower()
if candidate.startswith(prefix):
sub_folders[0] = sub_folders[0][len(country):].lstrip()
if sub_folders:
raw_mod = " ".join(sub_folders).strip()
else:
raw_mod = ""
# c) If raw_mod ends in " v<digits>" → preserve version suffix as part of mod_name
m = re.search(r'(.*) v[0-9.]+$', raw_mod)
if m:
raw_mod = m.group(1)
raw_mod = strip_versions(raw_mod)
m2 = re.match(r'^(.*)\s+v[0-9.]+$', raw_mod, re.IGNORECASE)
if m2:
raw_mod = m2.group(1).strip()
raw_mod = raw_mod.strip()
# 5) Clean/title-case the mod_name:
mod_name = clean_title(raw_mod) if raw_mod else ""
return game_name, mod_name
# ----- File Structure Logic -----
def create_formatted_structure(folder_path):
"""
Walk `folder_path` for all `.pchtxt` files. For each:
1) Derive (game_name, mod_name) via get_game_name_and_mod_name.
2) Place the file under formatted/"<Game Name> - <Mod Name>"/"<version>.pchtxt".
"""
formatted_path = os.path.join(folder_path, 'formatted')
formatted_path = os.path.join(folder_path, "formatted")
os.makedirs(formatted_path, exist_ok=True)
print(f"Creating formatted structure at: {formatted_path}\n")
for root, dirs, files in os.walk(folder_path):
# Skip anything already under "formatted"
if 'formatted' in root.split(os.sep):
if "formatted" in root.split(os.sep):
continue
for file in files:
if not file.lower().endswith('.pchtxt'):
for filename in files:
if not filename.lower().endswith(".pchtxt"):
continue
game_name, mod_name = get_game_name_and_mod_name(root, folder_path)
version = file[:-len('.pchtxt')].strip()
# If mod_name ended up empty, we still create "Game Name - "
combined_dir_name = f"{game_name} - {mod_name}".rstrip()
new_dir = os.path.join(formatted_path, combined_dir_name)
version = filename[:-len(".pchtxt")].strip()
combined_dir = f"{game_name} - {mod_name}".rstrip()
new_dir = os.path.join(formatted_path, combined_dir)
os.makedirs(new_dir, exist_ok=True)
src = os.path.join(root, file)
src = os.path.join(root, filename)
dst = os.path.join(new_dir, f"{version}.pchtxt")
shutil.copy(src, dst)
print(f"Copied {src}{dst}")
@@ -240,6 +199,7 @@ def main(folder_path):
create_formatted_structure(folder_path)
print("All files have been organized successfully.")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python format_repo_2.py /path/to/folder/")