From 911c4646f6163508716386dd01964ac7a32aa276 Mon Sep 17 00:00:00 2001 From: ppkantorski <6467366+ppkantorski@users.noreply.github.com> Date: Sat, 31 May 2025 07:04:14 -0700 Subject: [PATCH] Update format_repo4.py --- scripts/theboy181/format_repo4.py | 327 ++++++++++++++++++++++-------- 1 file changed, 245 insertions(+), 82 deletions(-) diff --git a/scripts/theboy181/format_repo4.py b/scripts/theboy181/format_repo4.py index 59e5b50..a4d84ef 100644 --- a/scripts/theboy181/format_repo4.py +++ b/scripts/theboy181/format_repo4.py @@ -1,118 +1,281 @@ +#!/usr/bin/env python3 import os import shutil import re import sys import rarfile +import unicodedata -def transform_game_name(game_name): - # Move 'The' to the front if it exists - if ', The' in game_name: - parts = game_name.split(', The') - game_name = f"The {parts[0]}{parts[1]}" - - # Remove ' - ' from the game name - game_name = game_name.replace(' - ', ' ') - - # Remove any '/' - game_name = game_name.replace(':', '') +# ----- Normalization & Title‐Casing Helpers ----- - print("Game Name "+game_name) - return game_name +def sanitize_name(name): + """ + Remove accents and unwanted characters, replace ' - ' with a single space, + remove extra quotes/apostrophes, collapse multiple spaces. + """ + normalized = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii') + cleaned = normalized.replace("'", "").replace("’", "").replace("`", "").replace('"', "") + cleaned = cleaned.replace(" - ", " ") + cleaned = ' '.join(cleaned.split()) + return cleaned.strip() + +def capitalize_hyphenated(word): + """ + Capitalize both parts of a hyphenated word. E.g. "yooka-laylee" → "Yooka-Laylee". + """ + parts = word.split('-') + capitalized = [] + for part in parts: + if part: + capitalized.append(part[0].upper() + part[1:].lower() if len(part) > 1 else part.upper()) + else: + capitalized.append('') + return '-'.join(capitalized) + +# Regex for Roman numerals (supports up to 3999) +ROMAN_NUMERAL_PATTERN = re.compile( + r"^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", + re.IGNORECASE +) + +# Known uppercase acronyms we want to preserve exactly +ACRONYMS = { + "HD", "2D", "3D", "4K", "VR", "AI", "API", "USB", "CPU", "GPU", "DVD", "CD", + "RPG", "FPS", "MMO", "MMORPG", "LAN", "GUI", "NPC", "FFVII", "FFVIII", "FX", "FFIX", "FFX", "FFXII", "2K" +} + +def is_roman_numeral(word): + """ + Return True if `word` is a valid Roman numeral (case‐insensitive). + """ + return bool(ROMAN_NUMERAL_PATTERN.match(word)) + +def title_case_preserve_numbers(name): + """ + Title‐case `name` with these rules: + • Fully uppercase acronyms remain unchanged (e.g. HD, 2D, 3D, FFVII, etc.). + • Roman numerals become fully uppercase (e.g. 'iii' → 'III', 'xI' → 'XI'). + • Hyphenated words are capitalized on both sides (→ 'Yooka-Laylee'). + • Small filler words (a, an, and, the, of, in, etc.) become lowercase + only if they appear in the middle and are not immediately after a subtitle marker, + except the first and last words (always capitalized). + • After a subtitle marker (":", "~", "–", "—", or "-"), force capitalization + on all subsequent words until the next subtitle marker or the end. + • Compound Roman numerals joined by "&", "+", or "|" become fully uppercase + (e.g. "I&ii" → "I&II"). + """ + lowercase_exceptions = { + "a", "an", "and", "as", "at", "but", "by", "for", "from", + "in", "nor", "of", "on", "or", "so", "the", "to", "with", "yet" + } + subtitle_markers = {":", "~", "-", "–", "—"} + + words = name.split() + result = [] + force_capitalize_mode = False + + for idx, raw_word in enumerate(words): + # Check if this raw_word contains any subtitle marker (to force‐caps afterward) + contains_marker = any(marker in raw_word for marker in subtitle_markers) + + # Split on any subtitle marker but keep the markers themselves + split_parts = re.split(r'([:~\-–—])', raw_word) + compounded = [] + + for part in split_parts: + if part in subtitle_markers: + # Keep the marker, then force‐capitalize subsequent parts + compounded.append(part) + force_capitalize_mode = True + continue + + lower_part = part.lower() + is_first = (idx == 0) + is_last = (idx == len(words) - 1) + + def cap_one(subword): + # If it's an acronym, uppercase it + if subword.upper() in ACRONYMS: + return subword.upper() + # If it's a Roman numeral, uppercase it + if is_roman_numeral(subword): + return subword.upper() + # If it’s a compound Roman numeral joined by &, +, or | + for sep in ("&", "+", "|"): + if sep in subword: + pieces = subword.split(sep) + if all(is_roman_numeral(p) for p in pieces): + return sep.join(p.upper() for p in pieces) + # Otherwise just capitalize hyphenated segments + return capitalize_hyphenated(subword) + + if force_capitalize_mode or is_first or is_last or (lower_part not in lowercase_exceptions): + # Split hyphens, apply cap_one to each + subs = part.split('-') + compounded.append("-".join(cap_one(s) for s in subs)) + else: + # Middle filler word → keep lowercase + compounded.append(lower_part) + + result.append("".join(compounded)) + # If this raw_word did not contain a subtitle marker, exit force‐capitalize + if not contains_marker: + force_capitalize_mode = False + + # Finally, ALWAYS capitalize the very first and very last words (same rules) + if result: + first_split = result[0].split("-") + new_first = [] + for p in first_split: + if p.upper() in ACRONYMS or is_roman_numeral(p): + new_first.append(p.upper()) + else: + new_first.append(capitalize_hyphenated(p)) + result[0] = "-".join(new_first) + + last_split = result[-1].split("-") + new_last = [] + for p in last_split: + if p.upper() in ACRONYMS or is_roman_numeral(p): + new_last.append(p.upper()) + else: + new_last.append(capitalize_hyphenated(p)) + result[-1] = "-".join(new_last) + + return " ".join(result) + +def clean_title(name): + """ + Combine sanitize_name() + title_case_preserve_numbers() into one call. + """ + return title_case_preserve_numbers(sanitize_name(name)) + +def transform_game_name_raw(raw_game): + """ + Move ", The" to the front and remove any stray colons: + e.g. "Skyrim, The" → "The Skyrim" + """ + if ", The" in raw_game: + parts = raw_game.split(", The") + raw_game = f"The {parts[0]}{parts[1]}" + raw_game = raw_game.replace(":", "") # strip out colons + raw_game = raw_game.replace(" - ", " ") # remove literal “ - ” + return raw_game def extract_rar_files(folder_path): - for root, dirs, files in os.walk(folder_path): - for file in files: - if file.endswith('.rar'): - rar_path = os.path.join(root, file) - with rarfile.RarFile(rar_path) as rf: - rf.extractall(root) - print(f"Extracted {rar_path}") + """ + Only extract top‐level archives matching "release_*.rar" in the root folder_path. + Do NOT dive into subfolders (so we skip those tiny per‐mod RARs). + """ + # We look at *only* the immediate children of `folder_path`. + for item in os.listdir(folder_path): + full = os.path.join(folder_path, item) + if not os.path.isfile(full): + continue + + # Only process those .rar that match "release_*.rar" at the top level + if item.lower().startswith("release_") and item.lower().endswith(".rar"): + try: + with rarfile.RarFile(full) as rf: + rf.extractall(folder_path) + print(f"Extracted top‐level archive: {full}") + except rarfile.Error as e: + print(f"❌ Failed to extract {full}: {e}") def get_game_name_and_mod_name(path, root_dir): - relative_path = os.path.relpath(path, root_dir) - parts = relative_path.split(os.sep) - - # The first part is the game name - game_name = parts[0] - - # Remove any parts within square brackets - game_name = re.sub(r'\[.*?\]', '', game_name).strip() - - # Transform the game name - game_name = transform_game_name(game_name) - - # Check for country-specific folders and adjust game name accordingly + """ + Given a folder `path` containing a .pchtxt, return (game_name, mod_name). + 1) game_name ← first‐level folder under root_dir, strip bracketed tags, move ", The", + remove colons, possibly append "(Country)", then run clean_title(...). + 2) mod_name ← if 'Aspect Ratio' in path → "Aspect Ratio "; + else if last folder ends in " v" → " "; + else immediate parent folder. + Afterwards, replace ' / ` → ".", "21-9" → "21.9", remove colons, + handle "Trailblazers" → "4K", then run clean_title(...). + """ + relative = os.path.relpath(path, root_dir) + parts = relative.split(os.sep) + + # --- raw_game_name logic --- + raw_game = parts[0] + raw_game = re.sub(r'\[.*?\]', '', raw_game).strip() + raw_game = transform_game_name_raw(raw_game) + + # check for country code deeper in path country = None - for part in parts[1:]: - if re.search(r'\[.*?\]', part): - country = re.sub(r'\[.*?\]', '', part).strip() - parts.remove(part) + for p in parts[1:]: + if re.search(r'\[.*?\]', p): + country = re.sub(r'\[.*?\]', '', p).strip() break - if country: - game_name = f"{game_name} ({country})" - else: - game_name = game_name.strip() + raw_game = f"{raw_game} ({country})" - # Handle Aspect Ratio mods specifically - if 'Aspect Ratio' in relative_path: - aspect_ratio = os.path.basename(os.path.dirname(path)).replace("'", ".").replace("`", ".") - mod_name = f'Aspect Ratio {aspect_ratio}' + game_name = clean_title(raw_game) + + # --- raw_mod_name logic --- + if "Aspect Ratio" in relative: + aspect_folder = os.path.basename(path) + raw_mod = f"Aspect Ratio {aspect_folder}" else: - # Handle versioned mod names like "Disable Fog v1" - if re.search(r' v\d+', parts[-1]): - mod_name = parts[-2] + " " + parts[-1] + last_folder = parts[-1] + if re.search(r' v\d+', last_folder): + parent_folder = parts[-2] + raw_mod = f"{parent_folder} {last_folder}" else: - # For other cases, use the immediate parent directory name - mod_name = parts[-2] - - # Replace ` and ' with . in the mod name - mod_name = mod_name.replace("'", ".").replace("`", ".").replace("21-9", "21.9") - # Remove any '/' from mod name - mod_name = mod_name.replace(':', '') + raw_mod = parts[-2] if len(parts) > 1 else "" - # For handling the outlier mod (missing name) on the repo - mod_name = mod_name.replace("Trailblazers", "4K") + raw_mod = raw_mod.strip() + raw_mod = raw_mod.replace("'", ".").replace("`", ".") + raw_mod = raw_mod.replace("21-9", "21.9") + raw_mod = raw_mod.replace(":", "") + if raw_mod == "Trailblazers": + raw_mod = "4K" + mod_name = clean_title(raw_mod) if raw_mod else "" return game_name, mod_name def create_formatted_structure(folder_path): + """ + 1) extract_rar_files(folder_path) # only top‐level releases + 2) walk every subfolder for .pchtxt + 3) for each .pchtxt, compute (game_name, mod_name) with get_game_name_and_mod_name + 4) copy into formatted/" - "/".pchtxt" + """ extract_rar_files(folder_path) - - formatted_path = os.path.join(folder_path, 'formatted') - if not os.path.exists(formatted_path): - os.makedirs(formatted_path) - #print(f"Creating formatted structure at: {formatted_path}") + + formatted_path = os.path.join(folder_path, "formatted") + os.makedirs(formatted_path, exist_ok=True) for root, dirs, files in os.walk(folder_path): - if 'formatted' in root: + # Skip anything already under “formatted” + if "formatted" in root.split(os.sep): continue - for file in files: - if file.endswith('.pchtxt'): - game_name, mod_name = get_game_name_and_mod_name(root, folder_path) - - version = file.replace('.pchtxt', '').strip() - - new_dir = os.path.join(formatted_path, f"{game_name} - {mod_name}") - - if not os.path.exists(new_dir): - os.makedirs(new_dir) - #print(f"Created directory: {new_dir}") - - source_file = os.path.join(root, file) - destination_file = os.path.join(new_dir, f"{version}.pchtxt") - - shutil.copy(source_file, destination_file) - #print(f"Copied {source_file} to {destination_file}") + + for f in files: + if not f.lower().endswith(".pchtxt"): + continue + + game_name, mod_name = get_game_name_and_mod_name(root, folder_path) + version = f[:-len(".pchtxt")].strip() + + combined = f"{game_name} - {mod_name}".strip() + target_dir = os.path.join(formatted_path, combined) + os.makedirs(target_dir, exist_ok=True) + + src = os.path.join(root, f) + dst = os.path.join(target_dir, f"{version}.pchtxt") + shutil.copy(src, dst) + print(f"Copied {src} → {dst}") + + print("\nAll files have been organized successfully.") def main(folder_path): create_formatted_structure(folder_path) - print("Files have been organized successfully.") if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python format_repo_4.py /path/to/folder/") sys.exit(1) - + folder_path = sys.argv[1] main(folder_path)