linux-scripts/win_fh_rename.py

#!/usr/bin/python3

# Solution inspired by: https://gist.github.com/kmorcinek/2710267

# This script will convert the date between the parentehsis and get the oldest date
# It will delete the older files and search all sub directories
# This definitely isn't the most efficient, but can process tens of thousands of files in less than a second

import re
from os.path import join
import os.path
from os import walk, rename, remove
import os
import sys

from datetime import datetime

FOLDER_PATH = os.getcwd()

processed = []

for path, subdirs, files in walk(FOLDER_PATH):
    for i, name in enumerate(files):  # Iterate over the original list

        if name in processed:
            continue

        print('{} / {}'.format(i + 1, len(files)))

        # Extract the date from the parentheses using regex
        match = re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', name)
        if match:
            date_in_parentheses = match.group(1)

            # Convert the date to a datetime object for comparison
            formatted_date = datetime.strptime(date_in_parentheses, "%Y_%m_%d %H_%M_%S %Z")

            # Find all files that match regex
            duplicates = [f for f in files if re.sub(r' \(.+\)', '', f) == re.sub(r' \(.+\)', '', name)]

            # Find the newest file among duplicates
            newest_file = max(duplicates, key=lambda f: datetime.strptime(re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', f).group(1), "%Y_%m_%d %H_%M_%S %Z"))
            print("Newest file found:", newest_file)

            # Rename the current file if needed
            new_name = re.sub(r' \(.+\)', '', newest_file)

            # Move older duplicates to the fixed directory
            for duplicate in duplicates:
                duplicate_path = join(path, duplicate)
                if duplicate != newest_file:
                    # Check if file exists because it can be deleted already
                    if os.path.isfile(duplicate_path):
                        remove(duplicate_path)
                        print(f"Removed older duplicate: {duplicate_path}")
                else:
                    if os.path.isfile(duplicate_path):
                        rename(join(path, newest_file), join(path, new_name))
                        print(f"Renamed: {duplicate_path} to {join(path, new_name)}")

                processed.append(duplicate_path)


        else:
            print(f"Skipping {join(path, name)}")

print(f"Processed {len(processed)} files.")