68 lines
2.5 KiB
Python
Executable File
68 lines
2.5 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
# Solution inspired by: https://gist.github.com/kmorcinek/2710267
|
|
|
|
# This script will convert the date between the parentehsis and get the oldest date
|
|
# It will delete the older files and search all sub directories
|
|
# This definitely isn't the most efficient, but can process tens of thousands of files in less than a second
|
|
|
|
import re
|
|
from os.path import join
|
|
import os.path
|
|
from os import walk, rename, remove
|
|
import os
|
|
import sys
|
|
|
|
from datetime import datetime
|
|
|
|
FOLDER_PATH = os.getcwd()
|
|
|
|
processed = []
|
|
|
|
for path, subdirs, files in walk(FOLDER_PATH):
|
|
for i, name in enumerate(files): # Iterate over the original list
|
|
|
|
if name in processed:
|
|
continue
|
|
|
|
print('{} / {}'.format(i + 1, len(files)))
|
|
|
|
# Extract the date from the parentheses using regex
|
|
match = re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', name)
|
|
if match:
|
|
date_in_parentheses = match.group(1)
|
|
|
|
# Convert the date to a datetime object for comparison
|
|
formatted_date = datetime.strptime(date_in_parentheses, "%Y_%m_%d %H_%M_%S %Z")
|
|
|
|
# Find all files that match regex
|
|
duplicates = [f for f in files if re.sub(r' \(.+\)', '', f) == re.sub(r' \(.+\)', '', name)]
|
|
|
|
# Find the newest file among duplicates
|
|
newest_file = max(duplicates, key=lambda f: datetime.strptime(re.search(r'\((\d{4}_\d{2}_\d{2} \d{2}_\d{2}_\d{2} UTC)\)', f).group(1), "%Y_%m_%d %H_%M_%S %Z"))
|
|
print("Newest file found:", newest_file)
|
|
|
|
# Rename the current file if needed
|
|
new_name = re.sub(r' \(.+\)', '', newest_file)
|
|
|
|
# Move older duplicates to the fixed directory
|
|
for duplicate in duplicates:
|
|
duplicate_path = join(path, duplicate)
|
|
if duplicate != newest_file:
|
|
# Check if file exists because it can be deleted already
|
|
if os.path.isfile(duplicate_path):
|
|
remove(duplicate_path)
|
|
print(f"Removed older duplicate: {duplicate_path}")
|
|
else:
|
|
if os.path.isfile(duplicate_path):
|
|
rename(join(path, newest_file), join(path, new_name))
|
|
print(f"Renamed: {duplicate_path} to {join(path, new_name)}")
|
|
|
|
processed.append(duplicate_path)
|
|
|
|
|
|
else:
|
|
print(f"Skipping {join(path, name)}")
|
|
|
|
print(f"Processed {len(processed)} files.")
|