From 73aa74c7f9691b45962439544eb417ad0b5102fc Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:02:08 +0100 Subject: [PATCH 1/7] Extend jpgSorter.py to allow sorting into years *and* months. --- jpgSorter.py | 53 ++++++++++++++++++++++++++++++---------------------- recovery.py | 2 +- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/jpgSorter.py b/jpgSorter.py index 467086f..b18f437 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -5,7 +5,7 @@ from time import localtime, strftime, strptime, mktime import shutil minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds -unknownDateFolderName = "Datum unbekannt" +unknownDateFolderName = "date-unknown" def getMinimumCreationTime(exif_data): creationTime = None @@ -51,25 +51,29 @@ def postprocessImage(images, imageDirectory, fileName): images.append((mktime(creationTime), imagePath)) image.close() +# Creates the requested path recursively. +def createPath(newPath): + if not os.path.exists(newPath): + os.makedirs(newPath) -def createNewFolder(destinationRoot, year, eventNumber): - yearPath = os.path.join(destinationRoot, year) - if not os.path.exists(yearPath): - os.mkdir(yearPath) - eventPath = os.path.join(yearPath, str(eventNumber)) - if not os.path.exists(eventPath): - os.mkdir(eventPath) +# Pass None for month to create 'year/eventNumber' directories instead of 'year/month/eventNumber'. +def createNewFolder(destinationRoot, year, month, eventNumber): + if month is not None: + newPath = os.path.join(destinationRoot, year, month, str(eventNumber)) + else: + newPath = os.path.join(destinationRoot, year, str(eventNumber)) + + createPath(newPath) def createUnknownDateFolder(destinationRoot): path = os.path.join(destinationRoot, unknownDateFolderName) - if not os.path.exists(path): - os.mkdir(path) + createPath(path) - -def writeImages(images, destinationRoot): +def writeImages(images, destinationRoot, splitByMonth=False): sortedImages = sorted(images) previousTime = None eventNumber = 0 + previousDestination = None today = strftime("%d/%m/%Y") for imageTuple in sortedImages: @@ -77,6 +81,7 @@ def writeImages(images, destinationRoot): destinationFilePath = "" t = localtime(imageTuple[0]) year = strftime("%Y", t) + month = splitByMonth and strftime("%m", t) or None creationDate = strftime("%d/%m/%Y", t) fileName = ntpath.basename(imageTuple[1]) @@ -87,18 +92,22 @@ def writeImages(images, destinationRoot): else: if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]): - previousTime = imageTuple[0] eventNumber = eventNumber + 1 - createNewFolder(destinationRoot, year, eventNumber) - + createNewFolder(destinationRoot, year, month, eventNumber) + previousTime = imageTuple[0] - destination = os.path.join(destinationRoot, year, str(eventNumber)) - # it may be possible that an event covers 2 years. - # in such a case put all the images to the even in the old year - if not (os.path.exists(destination)): - destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber)) + destComponents = [destinationRoot, year, month, str(eventNumber)] + destComponents = [v for v in destComponents if v is not None] + destination = os.path.join(*destComponents) + # it may be possible that an event covers 2 years. + # in such a case put all the images to the event in the old year + if not (os.path.exists(destination)): + destination = previousDestination + # destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber)) + + previousDestination = destination destinationFilePath = os.path.join(destination, fileName) if not (os.path.exists(destinationFilePath)): @@ -108,10 +117,10 @@ def writeImages(images, destinationRoot): os.remove(imageTuple[1]) -def postprocessImages(imageDirectory): +def postprocessImages(imageDirectory, splitByMonth): images = [] for root, dirs, files in os.walk(imageDirectory): for file in files: postprocessImage(images, imageDirectory, file) - writeImages(images, imageDirectory) + writeImages(images, imageDirectory, splitByMonth) diff --git a/recovery.py b/recovery.py index 5064964..2f2944a 100644 --- a/recovery.py +++ b/recovery.py @@ -91,7 +91,7 @@ for root, dirs, files in os.walk(source, topdown=False): log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") -jpgSorter.postprocessImages(os.path.join(destination, "JPG")) +jpgSorter.postprocessImages(os.path.join(destination, "JPG"), False) log("assure max file per folder number") numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder) From db3970d4a5475f0ad22cda35e18579ec44d4aa0e Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:13:18 +0100 Subject: [PATCH 2/7] Add .gitignore to ignore *.pyc. --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc From a1266c6aa2721dd8bf6f022b947d032d9889cc16 Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:21:49 +0100 Subject: [PATCH 3/7] Cleanup whitespace. --- jpgSorter.py | 6 +++--- numberOfFilesPerFolderLimiter.py | 4 ++-- recovery.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/jpgSorter.py b/jpgSorter.py index b18f437..8957cf4 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -18,7 +18,7 @@ def getMinimumCreationTime(exif_data): if (dateTimeOriginal is None): # case 1/9: dateTime, dateTimeOriginal, and dateTimeDigitized = None # case 2/9: dateTime and dateTimeOriginal = None, then use dateTimeDigitized - creationTime = dateTimeDigitized + creationTime = dateTimeDigitized else: # case 3/9: dateTime and dateTimeDigitized = None, then use dateTimeOriginal # case 4/9: dateTime = None, prefere dateTimeOriginal over dateTimeDigitized @@ -33,7 +33,7 @@ def postprocessImage(images, imageDirectory, fileName): imagePath = os.path.join(imageDirectory, fileName) image = open(imagePath, 'rb') creationTime = None - try: + try: exifTags = exifread.process_file(image, details=False) creationTime = getMinimumCreationTime(exifTags) except: @@ -89,7 +89,7 @@ def writeImages(images, destinationRoot, splitByMonth=False): createUnknownDateFolder(destinationRoot) destination = os.path.join(destinationRoot, unknownDateFolderName) destinationFilePath = os.path.join(destination, fileName) - + else: if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]): eventNumber = eventNumber + 1 diff --git a/numberOfFilesPerFolderLimiter.py b/numberOfFilesPerFolderLimiter.py index fdfd644..f226bf0 100644 --- a/numberOfFilesPerFolderLimiter.py +++ b/numberOfFilesPerFolderLimiter.py @@ -16,12 +16,12 @@ def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder): if not os.path.exists(subFolderPath): os.mkdir(subFolderPath) fileCounter = 1 - for file in os.listdir(dirPath): + for file in os.listdir(dirPath): source = os.path.join(dirPath, file) if os.path.isfile(source): destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) destination = os.path.join(dirPath, destDir, file) shutil.move(source, destination) fileCounter += 1 - + diff --git a/recovery.py b/recovery.py index 2f2944a..c926933 100644 --- a/recovery.py +++ b/recovery.py @@ -30,12 +30,12 @@ def log(logString): def moveFile(file, destination): extension = os.path.splitext(file)[1][1:].upper() sourcePath = os.path.join(root, file) - + destinationDirectory = os.path.join(destination, extension) if not os.path.exists(destinationDirectory): os.mkdir(destinationDirectory) - + fileName = str(fileCounter) + "." + extension.lower() destinationFile = os.path.join(destinationDirectory, fileName) if not os.path.exists(destinationFile): @@ -75,12 +75,12 @@ for root, dirs, files in os.walk(source, topdown=False): for file in files: extension = os.path.splitext(file)[1][1:].upper() sourcePath = os.path.join(root, file) - + destinationDirectory = os.path.join(destination, extension) if not os.path.exists(destinationDirectory): os.mkdir(destinationDirectory) - + fileName = str(fileCounter) + "." + extension.lower() destinationFile = os.path.join(destinationDirectory, fileName) if not os.path.exists(destinationFile): From dc9b46234ce2df11aa9b4976305e1ebefb57328d Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:24:30 +0100 Subject: [PATCH 4/7] Make indentation style consistent (all spaces now). --- numberOfFilesPerFolderLimiter.py | 36 ++++++++--------- recovery.py | 66 ++++++++++++++++---------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/numberOfFilesPerFolderLimiter.py b/numberOfFilesPerFolderLimiter.py index f226bf0..fc60d2a 100644 --- a/numberOfFilesPerFolderLimiter.py +++ b/numberOfFilesPerFolderLimiter.py @@ -5,23 +5,23 @@ import shutil def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder): - for root, dirs, files in os.walk(folder, topdown=False): - for dir in dirs: - dirPath = os.path.join(root, dir) - filesInFolder = len(os.listdir(dirPath)) - if(filesInFolder > maxNumberOfFilesPerFolder): - numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1 - for subFolderNumber in range(1, numberOfSubfolders+1): - subFolderPath = os.path.join(dirPath, str(subFolderNumber)) - if not os.path.exists(subFolderPath): - os.mkdir(subFolderPath) - fileCounter = 1 - for file in os.listdir(dirPath): - source = os.path.join(dirPath, file) - if os.path.isfile(source): - destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) - destination = os.path.join(dirPath, destDir, file) - shutil.move(source, destination) - fileCounter += 1 + for root, dirs, files in os.walk(folder, topdown=False): + for dir in dirs: + dirPath = os.path.join(root, dir) + filesInFolder = len(os.listdir(dirPath)) + if(filesInFolder > maxNumberOfFilesPerFolder): + numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1 + for subFolderNumber in range(1, numberOfSubfolders+1): + subFolderPath = os.path.join(dirPath, str(subFolderNumber)) + if not os.path.exists(subFolderPath): + os.mkdir(subFolderPath) + fileCounter = 1 + for file in os.listdir(dirPath): + source = os.path.join(dirPath, file) + if os.path.isfile(source): + destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) + destination = os.path.join(dirPath, destDir, file) + shutil.move(source, destination) + fileCounter += 1 diff --git a/recovery.py b/recovery.py index c926933..b283203 100644 --- a/recovery.py +++ b/recovery.py @@ -15,31 +15,31 @@ def getNumberOfFilesInFolderRecursively(start_path = '.'): for f in filenames: fp = os.path.join(dirpath, f) if(os.path.isfile(fp)): - numberOfFiles += 1 + numberOfFiles += 1 return numberOfFiles def getNumberOfFilesInFolder(path): - return len(os.listdir(path)) + return len(os.listdir(path)) def log(logString): - print(strftime("%H:%M:%S", localtime()) + ": " + logString) + print(strftime("%H:%M:%S", localtime()) + ": " + logString) def moveFile(file, destination): - extension = os.path.splitext(file)[1][1:].upper() - sourcePath = os.path.join(root, file) + extension = os.path.splitext(file)[1][1:].upper() + sourcePath = os.path.join(root, file) - destinationDirectory = os.path.join(destination, extension) + destinationDirectory = os.path.join(destination, extension) - if not os.path.exists(destinationDirectory): - os.mkdir(destinationDirectory) + if not os.path.exists(destinationDirectory): + os.mkdir(destinationDirectory) - fileName = str(fileCounter) + "." + extension.lower() - destinationFile = os.path.join(destinationDirectory, fileName) - if not os.path.exists(destinationFile): - shutil.copy(sourcePath, destinationFile) + fileName = str(fileCounter) + "." + extension.lower() + destinationFile = os.path.join(destinationDirectory, fileName) + if not os.path.exists(destinationFile): + shutil.copy(sourcePath, destinationFile) @@ -48,20 +48,20 @@ source = None destination = None if(len(sys.argv) < 3): - print("Enter source and destination: python sort.py source/path destination/path") + print("Enter source and destination: python sort.py source/path destination/path") else: - source = sys.argv[1] - print("Source directory: " + source) - destination = sys.argv[2] - print("Destination directory: " + destination) + source = sys.argv[1] + print("Source directory: " + source) + destination = sys.argv[2] + print("Destination directory: " + destination) if(len(sys.argv) > 3): - maxNumberOfFilesPerFolder = int(sys.argv[3]) + maxNumberOfFilesPerFolder = int(sys.argv[3]) while ((source is None) or (not os.path.exists(source))): - source = input('Enter a valid source directory\n') + source = input('Enter a valid source directory\n') while ((destination is None) or (not os.path.exists(destination))): - destination = input('Enter a valid destination directory\n') + destination = input('Enter a valid destination directory\n') fileNumber = getNumberOfFilesInFolderRecursively(source) onePercentFiles = int(fileNumber/100) @@ -72,23 +72,23 @@ print("Files to copy: " + totalAmountToCopy) fileCounter = 0 for root, dirs, files in os.walk(source, topdown=False): - for file in files: - extension = os.path.splitext(file)[1][1:].upper() - sourcePath = os.path.join(root, file) + for file in files: + extension = os.path.splitext(file)[1][1:].upper() + sourcePath = os.path.join(root, file) - destinationDirectory = os.path.join(destination, extension) + destinationDirectory = os.path.join(destination, extension) - if not os.path.exists(destinationDirectory): - os.mkdir(destinationDirectory) + if not os.path.exists(destinationDirectory): + os.mkdir(destinationDirectory) - fileName = str(fileCounter) + "." + extension.lower() - destinationFile = os.path.join(destinationDirectory, fileName) - if not os.path.exists(destinationFile): - shutil.copy2(sourcePath, destinationFile) + fileName = str(fileCounter) + "." + extension.lower() + destinationFile = os.path.join(destinationDirectory, fileName) + if not os.path.exists(destinationFile): + shutil.copy2(sourcePath, destinationFile) - fileCounter += 1 - if((fileCounter % onePercentFiles) is 0): - log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") + fileCounter += 1 + if((fileCounter % onePercentFiles) is 0): + log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") jpgSorter.postprocessImages(os.path.join(destination, "JPG"), False) From a69f5585cadf0777a59ed9e07de4b021e02e574b Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sun, 26 Nov 2017 12:23:12 +0100 Subject: [PATCH 5/7] Use argparse to process commandline arguments and add argument to split JPG files by month. --- recovery.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/recovery.py b/recovery.py index b283203..68ef428 100644 --- a/recovery.py +++ b/recovery.py @@ -42,21 +42,40 @@ def moveFile(file, destination): shutil.copy(sourcePath, destinationFile) +def get_args(): + import argparse + + description = ( + "Sort files recoverd by Photorec.\n" + "The input files are first copied to the destination, sorted by file type.\n" + "Then JPG files are sorted based on creation year (and optionally month).\n" + "Finally any directories containing more than a maximum number of files are accordingly split into separate directories." + ) + + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('source', metavar='src', type=str, help='source directory with files recovered by Photorec') + parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to') + parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory') + parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well') + + return parser.parse_args() + + maxNumberOfFilesPerFolder = 500 +splitMonths = False source = None destination = None -if(len(sys.argv) < 3): - print("Enter source and destination: python sort.py source/path destination/path") -else: - source = sys.argv[1] - print("Source directory: " + source) - destination = sys.argv[2] - print("Destination directory: " + destination) -if(len(sys.argv) > 3): - maxNumberOfFilesPerFolder = int(sys.argv[3]) +args = get_args() +source = args.source +destination = args.destination +maxNumberOfFilesPerFolder = args.max_per_dir +splitMonths = args.split_months + +print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." % + (source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only")) while ((source is None) or (not os.path.exists(source))): source = input('Enter a valid source directory\n') @@ -91,7 +110,7 @@ for root, dirs, files in os.walk(source, topdown=False): log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") -jpgSorter.postprocessImages(os.path.join(destination, "JPG"), False) +jpgSorter.postprocessImages(os.path.join(destination, "JPG"), splitMonths) log("assure max file per folder number") numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder) From 3e636e39f9a9239fce1bcf79c20db6844d6d893b Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sun, 26 Nov 2017 12:26:47 +0100 Subject: [PATCH 6/7] Fix hashbang. --- recovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 recovery.py diff --git a/recovery.py b/recovery.py old mode 100644 new mode 100755 index 68ef428..be6a363 --- a/recovery.py +++ b/recovery.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import os import os.path import sys From f09b88f35c813884e095c6a1520aed46aa3c8896 Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sun, 26 Nov 2017 12:33:57 +0100 Subject: [PATCH 7/7] Update readme.md. --- readme.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/readme.md b/readme.md index cabe074..1639082 100644 --- a/readme.md +++ b/readme.md @@ -1,8 +1,8 @@ # Sort files recoverd by Photorec -Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sortig them is an endless job. +Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job. -This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. +This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. ## Usage @@ -15,9 +15,11 @@ The first output of the programm is the number of files to copy. To count them m All directories contain maximum 500 files. If one contains more, numbered subdirectories are created. If you want another file-limit, e.g. 1000, just put that number as third parameter to the execution of the programm: -```python recovery.py 1000``` +```python recovery.py -n1000``` + +For an overview of all arguments, run with the `-h` option: ```python recovery.py -h```. ## Adjust event distance -For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds. +For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds.