La respuesta de LPChip es la mejor.
Pero debido a que comencé a aprender Python, pensé: "Diablos, ¿por qué no escribir un script de Python como respuesta a esta pregunta?"
Instalar Python y Send2Trash
Debería instalar Python antes de poder ejecutar el script desde la línea de comandos.
Luego instale Send2Trash para que los archivos eliminados no desaparezcan irremediablemente, sino que terminen en la papelera del sistema operativo:
pip install Send2Trash
Crear guión
Cree un nuevo archivo con, por ejemplo, el nombre DeleteDuplicateInFolderA.py
Copie el siguiente script en el archivo.
#!/usr/bin/python
import sys
import os
from send2trash import send2trash
class DeleteDuplicateInFolderA(object):
"""Given two paths A and B, the application determines which files are in
path A which are also in path B and then deletes the duplicates from
path A.
If the "dry run" flag is set to 'true', files are deleted. Otherwise
they are only displayed but not deleted.
"""
def __init__(self, path_A, path_B, is_dry_run=True):
self._path_A = path_A
self._path_B = path_B
self._is_dry_run = is_dry_run
def get_filenames_in_folder(self, folder_path):
only_files = []
for (dirpath, dirnames, filenames) in os.walk(folder_path):
only_files.extend(filenames)
return only_files
def print_files(sel, heading, files):
print(heading)
if len(files) == 0:
print(" none")
else:
for file in files:
print(" {}".format(file))
def delete_duplicates_in_folder_A(self):
only_files_A = self.get_filenames_in_folder(self._path_A)
only_files_B = self.get_filenames_in_folder(self._path_B)
files_of_A_that_are_in_B = [file for file in only_files_A if file in only_files_B]
self.print_files("Files in {}".format(self._path_A), only_files_A)
self.print_files("Files in {}".format(self._path_B), only_files_B)
if self._is_dry_run:
self.print_files("These files would be deleted: ", [os.path.join(self._path_A, file) for file in files_of_A_that_are_in_B])
else:
print("Deleting files:")
for filepath in [os.path.join(self._path_A, file) for file in files_of_A_that_are_in_B]:
print(" {}".format(filepath))
# os.remove(filepath) # Use this line instead of the next if Send2Trash is not installed
send2trash(filepath)
if __name__ == "__main__":
if len(sys.argv) == 4:
is_dry_run_argument = sys.argv[3]
if not is_dry_run_argument == "--dryrun":
println("The 3rd argument must be '--dryrun' or nothing.")
else:
app = DeleteDuplicateInFolderA(sys.argv[1], sys.argv[2], is_dry_run=True)
else:
app = DeleteDuplicateInFolderA(sys.argv[1], sys.argv[2], is_dry_run=False)
app.delete_duplicates_in_folder_A()
Uso
Modo de ejecución en seco, que muestra qué archivos se eliminarían sin eliminar realmente ningún archivo:
c:\temp> python .\DeleteDuplicateInFolderA.py c:\temp\test\A c:\temp\test\B --dryrun
Modo de eliminación de archivos, que de hecho elimina archivos, así que tenga cuidado:
c:\temp> python .\DeleteDuplicateInFolderA.py c:\temp\test\A c:\temp\test\B
Salida del modo de funcionamiento en seco
Files in C:\temp\A
1.txt
2.txt
Files in C:\temp\B
2.txt
3.txt
These files would be deleted:
C:\temp\A\2.txt
Salida del modo de eliminación de archivos
Files in C:\temp\A
1.txt
2.txt
Files in C:\temp\B
2.txt
3.txt
Deleting files:
C:\temp\A\2.txt
Prueba de unidad
Si desea probar la aplicación anterior, cree un archivo llamado DeleteDuplicateInFolderATest.py
y pegue estas pruebas unitarias en ella:
import unittest
import os
import shutil
from DeleteDuplicateInFolderA import DeleteDuplicateInFolderA
class DeleteDuplicateInFolderATest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super(DeleteDuplicateInFolderATest, self).__init__(*args, **kwargs)
self._base_directory = r"c:\temp\test"
self._path_A = self._base_directory + r"\A"
self._path_B = self._base_directory + r"\B"
def create_folder_and_create_some_files(self, path, filename_list):
if os.path.exists(path):
shutil.rmtree(path)
os.makedirs(path)
for filename in filename_list:
open(os.path.join(path, filename), "w+").close()
def setUp(self):
# Create folders and files for testing
self.create_folder_and_create_some_files(self._path_A, ["1.txt", "2.txt"])
self.create_folder_and_create_some_files(self._path_B, ["2.txt", "3.txt"])
def tearDown(self):
for path in [self._path_A, self._path_B, self._base_directory]:
if os.path.exists(path):
shutil.rmtree(path)
def test_duplicate_file_gets_deleted(self):
# Arrange
app = DeleteDuplicateInFolderA(self._path_A, self._path_B, is_dry_run=False)
# Act
app.delete_duplicates_in_folder_A()
# Assert
self.assertFalse(os.path.isfile(self._path_A + r"\2.txt"), "File 2.txt has not been deleted.")
def test_duplicate_file_gets_not_deleted_in_mode_dryrun(self):
# Arrange
app = DeleteDuplicateInFolderA(self._path_A, self._path_B, is_dry_run=True)
# Act
app.delete_duplicates_in_folder_A()
# Assert
self.assertTrue(os.path.isfile(self._path_A + r"\2.txt"), "File 2.txt should not have been deleted in mode '--dryrun'")
def main():
unittest.main()
if __name__ == '__main__':
main()