Files
Alchemist/scripts/StevensND/format_repo2.py
2025-05-31 07:19:49 -07:00

211 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import os
import shutil
import re
import sys
import unicodedata
# ----- Normalization & TitleCasing Helpers -----
def sanitize_name(name):
normalized = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
cleaned = normalized.replace("'", "").replace("", "").replace("`", "").replace('"', "")
cleaned = cleaned.replace(" - ", " ")
cleaned = ' '.join(cleaned.split())
return cleaned.strip()
def capitalize_hyphenated(word):
parts = word.split('-')
capitalized_parts = []
for part in parts:
if part:
capitalized_parts.append(part[0].upper() + part[1:].lower() if len(part) > 1 else part.upper())
else:
capitalized_parts.append('')
return '-'.join(capitalized_parts)
ROMAN_NUMERAL_PATTERN = re.compile(
r"^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$",
re.IGNORECASE
)
# Known acronyms to forceuppercase exactly
ACRONYMS = {
"HD", "2D", "3D", "4K", "VR", "AI", "API", "USB", "CPU", "GPU", "DVD", "CD",
"RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC",
"FFVII", "FFVIII", "FFIX", "FFX", "FFXII",
"FX", "2K", "5K", "8K", "V1", "V2", "V3", "V4", "DOF"
}
def is_roman_numeral(word):
return bool(ROMAN_NUMERAL_PATTERN.match(word))
def title_case_preserve_numbers(name):
lowercase_exceptions = {
"a", "an", "and", "as", "at", "but", "by", "for", "from",
"in", "nor", "of", "on", "or", "so", "the", "to", "with", "yet"
}
subtitle_markers = {":", "~", "-", "", ""}
words = name.split()
result = []
force_capitalize_mode = False
for idx, raw_word in enumerate(words):
contains_marker = any(marker in raw_word for marker in subtitle_markers)
split_parts = re.split(r'([:~\-–—])', raw_word)
rebuilt_parts = []
for part in split_parts:
if part in subtitle_markers:
rebuilt_parts.append(part)
force_capitalize_mode = True
continue
lower_part = part.lower()
is_first = (idx == 0)
is_last = (idx == len(words) - 1)
def capitalize_special(subword):
if subword.upper() in ACRONYMS:
return subword.upper()
if is_roman_numeral(subword):
return subword.upper()
for sep in ('&', '+', '|'):
if sep in subword:
pieces = subword.split(sep)
if all(is_roman_numeral(p) for p in pieces):
return sep.join(p.upper() for p in pieces)
return capitalize_hyphenated(subword)
if force_capitalize_mode or is_first or is_last or (lower_part not in lowercase_exceptions):
sub_parts = part.split('-')
rebuilt_parts.append('-'.join(capitalize_special(s) for s in sub_parts))
else:
rebuilt_parts.append(lower_part)
result.append(''.join(rebuilt_parts))
if not contains_marker:
force_capitalize_mode = False
if result:
first_split = result[0].split('-')
result[0] = "-".join(capitalize_hyphenated(p) if not is_roman_numeral(p) else p.upper() for p in first_split)
last_split = result[-1].split('-')
result[-1] = "-".join(capitalize_hyphenated(p) if not is_roman_numeral(p) else p.upper() for p in last_split)
return " ".join(result)
def clean_title(name):
return title_case_preserve_numbers(sanitize_name(name))
# ----- Game & Mod Name Logic -----
def strip_versions(text):
"""
Remove any substrings that look like version numbers, e.g.:
- 1.0, 1.2.3
- v1.0, v2.3.4
"""
return re.sub(r'\b(v?\d+(?:\.\d+){1,2})\b', '', text, flags=re.IGNORECASE).strip()
def get_game_name_and_mod_name(path, root_dir):
relative_path = os.path.relpath(path, root_dir)
parts = relative_path.split(os.sep)
raw_game = parts[0]
raw_game = re.sub(r'\[.*?\]', '', raw_game).strip()
if ", The" in raw_game:
p = raw_game.split(", The")
raw_game = f"The {p[0]}{p[1]}"
raw_game = raw_game.replace(" - ", " ")
country = None
for p in parts[1:]:
if re.search(r'\[.*?\]', p):
country = re.sub(r'\[.*?\]', '', p).strip()
break
if country:
raw_game = f"{raw_game} ({country})"
game_name = clean_title(raw_game)
sub_folders = [ re.sub(r'\[.*?\]', '', p).strip() for p in parts[1:] ]
sub_folders = [sf for sf in sub_folders if sf.lower() != "pchtxt"]
if "Aspect Ratio" in relative_path:
aspect_folder = os.path.basename(path)
raw_mod = f"Aspect Ratio {aspect_folder}"
else:
if sub_folders:
m = re.match(r'^([0-9]+(?:\.[0-9]+)*)\s*(.*)$', sub_folders[0])
if m:
trailing = m.group(2).strip()
if trailing:
sub_folders[0] = trailing
else:
sub_folders = sub_folders[1:]
if country and sub_folders:
prefix = country.lower()
candidate = sub_folders[0].lower()
if candidate.startswith(prefix):
sub_folders[0] = sub_folders[0][len(country):].lstrip()
if sub_folders:
raw_mod = " ".join(sub_folders).strip()
else:
raw_mod = ""
raw_mod = strip_versions(raw_mod)
m2 = re.match(r'^(.*)\s+v[0-9.]+$', raw_mod, re.IGNORECASE)
if m2:
raw_mod = m2.group(1).strip()
mod_name = clean_title(raw_mod) if raw_mod else ""
return game_name, mod_name
# ----- File Structure Logic -----
def create_formatted_structure(folder_path):
formatted_path = os.path.join(folder_path, "formatted")
os.makedirs(formatted_path, exist_ok=True)
print(f"Creating formatted structure at: {formatted_path}\n")
for root, dirs, files in os.walk(folder_path):
if "formatted" in root.split(os.sep):
continue
for filename in files:
if not filename.lower().endswith(".pchtxt"):
continue
game_name, mod_name = get_game_name_and_mod_name(root, folder_path)
version = filename[:-len(".pchtxt")].strip()
combined_dir = f"{game_name} - {mod_name}".rstrip()
new_dir = os.path.join(formatted_path, combined_dir)
os.makedirs(new_dir, exist_ok=True)
src = os.path.join(root, filename)
dst = os.path.join(new_dir, f"{version}.pchtxt")
shutil.copy(src, dst)
print(f"Copied {src}{dst}")
print("\nDone!\n")
def main(folder_path):
create_formatted_structure(folder_path)
print("All files have been organized successfully.")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python format_repo_2.py /path/to/folder/")
sys.exit(1)
folder_path = sys.argv[1]
main(folder_path)