From 73aa74c7f9691b45962439544eb417ad0b5102fc Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:02:08 +0100 Subject: [PATCH 01/14] Extend jpgSorter.py to allow sorting into years *and* months. --- jpgSorter.py | 53 ++++++++++++++++++++++++++++++---------------------- recovery.py | 2 +- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/jpgSorter.py b/jpgSorter.py index 467086f..b18f437 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -5,7 +5,7 @@ from time import localtime, strftime, strptime, mktime import shutil minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds -unknownDateFolderName = "Datum unbekannt" +unknownDateFolderName = "date-unknown" def getMinimumCreationTime(exif_data): creationTime = None @@ -51,25 +51,29 @@ def postprocessImage(images, imageDirectory, fileName): images.append((mktime(creationTime), imagePath)) image.close() +# Creates the requested path recursively. +def createPath(newPath): + if not os.path.exists(newPath): + os.makedirs(newPath) -def createNewFolder(destinationRoot, year, eventNumber): - yearPath = os.path.join(destinationRoot, year) - if not os.path.exists(yearPath): - os.mkdir(yearPath) - eventPath = os.path.join(yearPath, str(eventNumber)) - if not os.path.exists(eventPath): - os.mkdir(eventPath) +# Pass None for month to create 'year/eventNumber' directories instead of 'year/month/eventNumber'. +def createNewFolder(destinationRoot, year, month, eventNumber): + if month is not None: + newPath = os.path.join(destinationRoot, year, month, str(eventNumber)) + else: + newPath = os.path.join(destinationRoot, year, str(eventNumber)) + + createPath(newPath) def createUnknownDateFolder(destinationRoot): path = os.path.join(destinationRoot, unknownDateFolderName) - if not os.path.exists(path): - os.mkdir(path) + createPath(path) - -def writeImages(images, destinationRoot): +def writeImages(images, destinationRoot, splitByMonth=False): sortedImages = sorted(images) previousTime = None eventNumber = 0 + previousDestination = None today = strftime("%d/%m/%Y") for imageTuple in sortedImages: @@ -77,6 +81,7 @@ def writeImages(images, destinationRoot): destinationFilePath = "" t = localtime(imageTuple[0]) year = strftime("%Y", t) + month = splitByMonth and strftime("%m", t) or None creationDate = strftime("%d/%m/%Y", t) fileName = ntpath.basename(imageTuple[1]) @@ -87,18 +92,22 @@ def writeImages(images, destinationRoot): else: if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]): - previousTime = imageTuple[0] eventNumber = eventNumber + 1 - createNewFolder(destinationRoot, year, eventNumber) - + createNewFolder(destinationRoot, year, month, eventNumber) + previousTime = imageTuple[0] - destination = os.path.join(destinationRoot, year, str(eventNumber)) - # it may be possible that an event covers 2 years. - # in such a case put all the images to the even in the old year - if not (os.path.exists(destination)): - destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber)) + destComponents = [destinationRoot, year, month, str(eventNumber)] + destComponents = [v for v in destComponents if v is not None] + destination = os.path.join(*destComponents) + # it may be possible that an event covers 2 years. + # in such a case put all the images to the event in the old year + if not (os.path.exists(destination)): + destination = previousDestination + # destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber)) + + previousDestination = destination destinationFilePath = os.path.join(destination, fileName) if not (os.path.exists(destinationFilePath)): @@ -108,10 +117,10 @@ def writeImages(images, destinationRoot): os.remove(imageTuple[1]) -def postprocessImages(imageDirectory): +def postprocessImages(imageDirectory, splitByMonth): images = [] for root, dirs, files in os.walk(imageDirectory): for file in files: postprocessImage(images, imageDirectory, file) - writeImages(images, imageDirectory) + writeImages(images, imageDirectory, splitByMonth) diff --git a/recovery.py b/recovery.py index 5064964..2f2944a 100644 --- a/recovery.py +++ b/recovery.py @@ -91,7 +91,7 @@ for root, dirs, files in os.walk(source, topdown=False): log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") -jpgSorter.postprocessImages(os.path.join(destination, "JPG")) +jpgSorter.postprocessImages(os.path.join(destination, "JPG"), False) log("assure max file per folder number") numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder) From db3970d4a5475f0ad22cda35e18579ec44d4aa0e Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:13:18 +0100 Subject: [PATCH 02/14] Add .gitignore to ignore *.pyc. --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc From a1266c6aa2721dd8bf6f022b947d032d9889cc16 Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:21:49 +0100 Subject: [PATCH 03/14] Cleanup whitespace. --- jpgSorter.py | 6 +++--- numberOfFilesPerFolderLimiter.py | 4 ++-- recovery.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/jpgSorter.py b/jpgSorter.py index b18f437..8957cf4 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -18,7 +18,7 @@ def getMinimumCreationTime(exif_data): if (dateTimeOriginal is None): # case 1/9: dateTime, dateTimeOriginal, and dateTimeDigitized = None # case 2/9: dateTime and dateTimeOriginal = None, then use dateTimeDigitized - creationTime = dateTimeDigitized + creationTime = dateTimeDigitized else: # case 3/9: dateTime and dateTimeDigitized = None, then use dateTimeOriginal # case 4/9: dateTime = None, prefere dateTimeOriginal over dateTimeDigitized @@ -33,7 +33,7 @@ def postprocessImage(images, imageDirectory, fileName): imagePath = os.path.join(imageDirectory, fileName) image = open(imagePath, 'rb') creationTime = None - try: + try: exifTags = exifread.process_file(image, details=False) creationTime = getMinimumCreationTime(exifTags) except: @@ -89,7 +89,7 @@ def writeImages(images, destinationRoot, splitByMonth=False): createUnknownDateFolder(destinationRoot) destination = os.path.join(destinationRoot, unknownDateFolderName) destinationFilePath = os.path.join(destination, fileName) - + else: if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]): eventNumber = eventNumber + 1 diff --git a/numberOfFilesPerFolderLimiter.py b/numberOfFilesPerFolderLimiter.py index fdfd644..f226bf0 100644 --- a/numberOfFilesPerFolderLimiter.py +++ b/numberOfFilesPerFolderLimiter.py @@ -16,12 +16,12 @@ def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder): if not os.path.exists(subFolderPath): os.mkdir(subFolderPath) fileCounter = 1 - for file in os.listdir(dirPath): + for file in os.listdir(dirPath): source = os.path.join(dirPath, file) if os.path.isfile(source): destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) destination = os.path.join(dirPath, destDir, file) shutil.move(source, destination) fileCounter += 1 - + diff --git a/recovery.py b/recovery.py index 2f2944a..c926933 100644 --- a/recovery.py +++ b/recovery.py @@ -30,12 +30,12 @@ def log(logString): def moveFile(file, destination): extension = os.path.splitext(file)[1][1:].upper() sourcePath = os.path.join(root, file) - + destinationDirectory = os.path.join(destination, extension) if not os.path.exists(destinationDirectory): os.mkdir(destinationDirectory) - + fileName = str(fileCounter) + "." + extension.lower() destinationFile = os.path.join(destinationDirectory, fileName) if not os.path.exists(destinationFile): @@ -75,12 +75,12 @@ for root, dirs, files in os.walk(source, topdown=False): for file in files: extension = os.path.splitext(file)[1][1:].upper() sourcePath = os.path.join(root, file) - + destinationDirectory = os.path.join(destination, extension) if not os.path.exists(destinationDirectory): os.mkdir(destinationDirectory) - + fileName = str(fileCounter) + "." + extension.lower() destinationFile = os.path.join(destinationDirectory, fileName) if not os.path.exists(destinationFile): From dc9b46234ce2df11aa9b4976305e1ebefb57328d Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sat, 25 Nov 2017 19:24:30 +0100 Subject: [PATCH 04/14] Make indentation style consistent (all spaces now). --- numberOfFilesPerFolderLimiter.py | 36 ++++++++--------- recovery.py | 66 ++++++++++++++++---------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/numberOfFilesPerFolderLimiter.py b/numberOfFilesPerFolderLimiter.py index f226bf0..fc60d2a 100644 --- a/numberOfFilesPerFolderLimiter.py +++ b/numberOfFilesPerFolderLimiter.py @@ -5,23 +5,23 @@ import shutil def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder): - for root, dirs, files in os.walk(folder, topdown=False): - for dir in dirs: - dirPath = os.path.join(root, dir) - filesInFolder = len(os.listdir(dirPath)) - if(filesInFolder > maxNumberOfFilesPerFolder): - numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1 - for subFolderNumber in range(1, numberOfSubfolders+1): - subFolderPath = os.path.join(dirPath, str(subFolderNumber)) - if not os.path.exists(subFolderPath): - os.mkdir(subFolderPath) - fileCounter = 1 - for file in os.listdir(dirPath): - source = os.path.join(dirPath, file) - if os.path.isfile(source): - destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) - destination = os.path.join(dirPath, destDir, file) - shutil.move(source, destination) - fileCounter += 1 + for root, dirs, files in os.walk(folder, topdown=False): + for dir in dirs: + dirPath = os.path.join(root, dir) + filesInFolder = len(os.listdir(dirPath)) + if(filesInFolder > maxNumberOfFilesPerFolder): + numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1 + for subFolderNumber in range(1, numberOfSubfolders+1): + subFolderPath = os.path.join(dirPath, str(subFolderNumber)) + if not os.path.exists(subFolderPath): + os.mkdir(subFolderPath) + fileCounter = 1 + for file in os.listdir(dirPath): + source = os.path.join(dirPath, file) + if os.path.isfile(source): + destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) + destination = os.path.join(dirPath, destDir, file) + shutil.move(source, destination) + fileCounter += 1 diff --git a/recovery.py b/recovery.py index c926933..b283203 100644 --- a/recovery.py +++ b/recovery.py @@ -15,31 +15,31 @@ def getNumberOfFilesInFolderRecursively(start_path = '.'): for f in filenames: fp = os.path.join(dirpath, f) if(os.path.isfile(fp)): - numberOfFiles += 1 + numberOfFiles += 1 return numberOfFiles def getNumberOfFilesInFolder(path): - return len(os.listdir(path)) + return len(os.listdir(path)) def log(logString): - print(strftime("%H:%M:%S", localtime()) + ": " + logString) + print(strftime("%H:%M:%S", localtime()) + ": " + logString) def moveFile(file, destination): - extension = os.path.splitext(file)[1][1:].upper() - sourcePath = os.path.join(root, file) + extension = os.path.splitext(file)[1][1:].upper() + sourcePath = os.path.join(root, file) - destinationDirectory = os.path.join(destination, extension) + destinationDirectory = os.path.join(destination, extension) - if not os.path.exists(destinationDirectory): - os.mkdir(destinationDirectory) + if not os.path.exists(destinationDirectory): + os.mkdir(destinationDirectory) - fileName = str(fileCounter) + "." + extension.lower() - destinationFile = os.path.join(destinationDirectory, fileName) - if not os.path.exists(destinationFile): - shutil.copy(sourcePath, destinationFile) + fileName = str(fileCounter) + "." + extension.lower() + destinationFile = os.path.join(destinationDirectory, fileName) + if not os.path.exists(destinationFile): + shutil.copy(sourcePath, destinationFile) @@ -48,20 +48,20 @@ source = None destination = None if(len(sys.argv) < 3): - print("Enter source and destination: python sort.py source/path destination/path") + print("Enter source and destination: python sort.py source/path destination/path") else: - source = sys.argv[1] - print("Source directory: " + source) - destination = sys.argv[2] - print("Destination directory: " + destination) + source = sys.argv[1] + print("Source directory: " + source) + destination = sys.argv[2] + print("Destination directory: " + destination) if(len(sys.argv) > 3): - maxNumberOfFilesPerFolder = int(sys.argv[3]) + maxNumberOfFilesPerFolder = int(sys.argv[3]) while ((source is None) or (not os.path.exists(source))): - source = input('Enter a valid source directory\n') + source = input('Enter a valid source directory\n') while ((destination is None) or (not os.path.exists(destination))): - destination = input('Enter a valid destination directory\n') + destination = input('Enter a valid destination directory\n') fileNumber = getNumberOfFilesInFolderRecursively(source) onePercentFiles = int(fileNumber/100) @@ -72,23 +72,23 @@ print("Files to copy: " + totalAmountToCopy) fileCounter = 0 for root, dirs, files in os.walk(source, topdown=False): - for file in files: - extension = os.path.splitext(file)[1][1:].upper() - sourcePath = os.path.join(root, file) + for file in files: + extension = os.path.splitext(file)[1][1:].upper() + sourcePath = os.path.join(root, file) - destinationDirectory = os.path.join(destination, extension) + destinationDirectory = os.path.join(destination, extension) - if not os.path.exists(destinationDirectory): - os.mkdir(destinationDirectory) + if not os.path.exists(destinationDirectory): + os.mkdir(destinationDirectory) - fileName = str(fileCounter) + "." + extension.lower() - destinationFile = os.path.join(destinationDirectory, fileName) - if not os.path.exists(destinationFile): - shutil.copy2(sourcePath, destinationFile) + fileName = str(fileCounter) + "." + extension.lower() + destinationFile = os.path.join(destinationDirectory, fileName) + if not os.path.exists(destinationFile): + shutil.copy2(sourcePath, destinationFile) - fileCounter += 1 - if((fileCounter % onePercentFiles) is 0): - log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") + fileCounter += 1 + if((fileCounter % onePercentFiles) is 0): + log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") jpgSorter.postprocessImages(os.path.join(destination, "JPG"), False) From a69f5585cadf0777a59ed9e07de4b021e02e574b Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sun, 26 Nov 2017 12:23:12 +0100 Subject: [PATCH 05/14] Use argparse to process commandline arguments and add argument to split JPG files by month. --- recovery.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/recovery.py b/recovery.py index b283203..68ef428 100644 --- a/recovery.py +++ b/recovery.py @@ -42,21 +42,40 @@ def moveFile(file, destination): shutil.copy(sourcePath, destinationFile) +def get_args(): + import argparse + + description = ( + "Sort files recoverd by Photorec.\n" + "The input files are first copied to the destination, sorted by file type.\n" + "Then JPG files are sorted based on creation year (and optionally month).\n" + "Finally any directories containing more than a maximum number of files are accordingly split into separate directories." + ) + + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('source', metavar='src', type=str, help='source directory with files recovered by Photorec') + parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to') + parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory') + parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well') + + return parser.parse_args() + + maxNumberOfFilesPerFolder = 500 +splitMonths = False source = None destination = None -if(len(sys.argv) < 3): - print("Enter source and destination: python sort.py source/path destination/path") -else: - source = sys.argv[1] - print("Source directory: " + source) - destination = sys.argv[2] - print("Destination directory: " + destination) -if(len(sys.argv) > 3): - maxNumberOfFilesPerFolder = int(sys.argv[3]) +args = get_args() +source = args.source +destination = args.destination +maxNumberOfFilesPerFolder = args.max_per_dir +splitMonths = args.split_months + +print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." % + (source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only")) while ((source is None) or (not os.path.exists(source))): source = input('Enter a valid source directory\n') @@ -91,7 +110,7 @@ for root, dirs, files in os.walk(source, topdown=False): log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") -jpgSorter.postprocessImages(os.path.join(destination, "JPG"), False) +jpgSorter.postprocessImages(os.path.join(destination, "JPG"), splitMonths) log("assure max file per folder number") numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder) From 3e636e39f9a9239fce1bcf79c20db6844d6d893b Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sun, 26 Nov 2017 12:26:47 +0100 Subject: [PATCH 06/14] Fix hashbang. --- recovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 recovery.py diff --git a/recovery.py b/recovery.py old mode 100644 new mode 100755 index 68ef428..be6a363 --- a/recovery.py +++ b/recovery.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import os import os.path import sys From f09b88f35c813884e095c6a1520aed46aa3c8896 Mon Sep 17 00:00:00 2001 From: Wouter R Date: Sun, 26 Nov 2017 12:33:57 +0100 Subject: [PATCH 07/14] Update readme.md. --- readme.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/readme.md b/readme.md index cabe074..1639082 100644 --- a/readme.md +++ b/readme.md @@ -1,8 +1,8 @@ # Sort files recoverd by Photorec -Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sortig them is an endless job. +Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job. -This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. +This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. ## Usage @@ -15,9 +15,11 @@ The first output of the programm is the number of files to copy. To count them m All directories contain maximum 500 files. If one contains more, numbered subdirectories are created. If you want another file-limit, e.g. 1000, just put that number as third parameter to the execution of the programm: -```python recovery.py 1000``` +```python recovery.py -n1000``` + +For an overview of all arguments, run with the `-h` option: ```python recovery.py -h```. ## Adjust event distance -For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds. +For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds. From 5f5668236c9ff8ec9d28e4e2812edc61bffb8c0b Mon Sep 17 00:00:00 2001 From: Lukas Hahmann Date: Thu, 30 Nov 2017 21:43:14 +0100 Subject: [PATCH 08/14] Added info to install exifread --- readme.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/readme.md b/readme.md index 1639082..8176944 100644 --- a/readme.md +++ b/readme.md @@ -7,6 +7,12 @@ This program helps you sorting your files. First of all, the **files are copied ## Usage +First install the package [exifread](https://pypi.python.org/pypi/ExifRead): + +```pip install exifread``` + +Then run the sorter: + ```python recovery.py ``` This copies the recovered file to their file type folder in the destination directory. The recovered files are not modified. If a file already exists in the destination directory, it is skipped. Hence you can interrupt the process with Ctrl+C and continue afterwards. From 284d32900b2e2743727cd2dc03b96f545146f318 Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 30 Nov 2017 22:20:36 +0100 Subject: [PATCH 09/14] added parameter "keep filename" --- .gitignore | 1 + recovery.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0d20b64..6f44278 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.pyc +.vscode \ No newline at end of file diff --git a/recovery.py b/recovery.py index be6a363..8bd7182 100755 --- a/recovery.py +++ b/recovery.py @@ -57,6 +57,7 @@ def get_args(): parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to') parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory') parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well') + parser.add_argument('-k', '--keep_filename', action='store_true', required=False, help='keeps the original filenames when copying') return parser.parse_args() @@ -66,6 +67,7 @@ maxNumberOfFilesPerFolder = 500 splitMonths = False source = None destination = None +keepFilename = False args = get_args() @@ -73,9 +75,14 @@ source = args.source destination = args.destination maxNumberOfFilesPerFolder = args.max_per_dir splitMonths = args.split_months +keepFilename = args.keep_filename print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." % (source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only")) +if keepFilename: + print("I will keep you filenames as they are") +else: + print("I will rename your files like '1.jpg'") while ((source is None) or (not os.path.exists(source))): source = input('Enter a valid source directory\n') @@ -99,8 +106,11 @@ for root, dirs, files in os.walk(source, topdown=False): if not os.path.exists(destinationDirectory): os.mkdir(destinationDirectory) + if keepFilename: + fileName = file + else: + fileName = str(fileCounter) + "." + extension.lower() - fileName = str(fileCounter) + "." + extension.lower() destinationFile = os.path.join(destinationDirectory, fileName) if not os.path.exists(destinationFile): shutil.copy2(sourcePath, destinationFile) From 6ee334b0e8540d4f8651ce0495f85ce81c7fdeae Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 30 Nov 2017 22:37:56 +0100 Subject: [PATCH 10/14] ordered imports --- jpgSorter.py | 2 +- recovery.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/jpgSorter.py b/jpgSorter.py index 8957cf4..3ef84d2 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -1,8 +1,8 @@ import os.path import ntpath -import exifread from time import localtime, strftime, strptime, mktime import shutil +import exifread minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds unknownDateFolderName = "date-unknown" diff --git a/recovery.py b/recovery.py index 8bd7182..293fade 100755 --- a/recovery.py +++ b/recovery.py @@ -1,14 +1,15 @@ #!/usr/bin/env python import os import os.path -import sys -import jpgSorter, numberOfFilesPerFolderLimiter import shutil -from time import localtime, strftime import math -import multiprocessing as mp +import multiprocessing as mpfrom time +import localtime, strftime +import jpgSorter +import numberOfFilesPerFolderLimiter + def getNumberOfFilesInFolderRecursively(start_path = '.'): numberOfFiles = 0 for dirpath, dirnames, filenames in os.walk(start_path): From 26c334f654860b2e00de8dec32d1ee1aecf23be3 Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 30 Nov 2017 22:39:17 +0100 Subject: [PATCH 11/14] ordered imports #2 --- recovery.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/recovery.py b/recovery.py index 293fade..d4672af 100755 --- a/recovery.py +++ b/recovery.py @@ -1,15 +1,12 @@ #!/usr/bin/env python import os import os.path +from time import localtime, strftime import shutil -import math -import multiprocessing as mpfrom time -import localtime, strftime - - import jpgSorter import numberOfFilesPerFolderLimiter + def getNumberOfFilesInFolderRecursively(start_path = '.'): numberOfFiles = 0 for dirpath, dirnames, filenames in os.walk(start_path): From caf9b84a2697e40821842496b234dec4ec1ef567 Mon Sep 17 00:00:00 2001 From: Lukas Date: Thu, 30 Nov 2017 22:48:57 +0100 Subject: [PATCH 12/14] min event delta is configurable via parameter now =) --- jpgSorter.py | 8 ++++---- recovery.py | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/jpgSorter.py b/jpgSorter.py index 3ef84d2..679dd05 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -4,7 +4,6 @@ from time import localtime, strftime, strptime, mktime import shutil import exifread -minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds unknownDateFolderName = "date-unknown" def getMinimumCreationTime(exif_data): @@ -69,7 +68,8 @@ def createUnknownDateFolder(destinationRoot): path = os.path.join(destinationRoot, unknownDateFolderName) createPath(path) -def writeImages(images, destinationRoot, splitByMonth=False): +def writeImages(images, destinationRoot, minEventDeltaDays, splitByMonth=False): + minEventDelta = minEventDeltaDays * 60 * 60 * 24 # convert in seconds sortedImages = sorted(images) previousTime = None eventNumber = 0 @@ -117,10 +117,10 @@ def writeImages(images, destinationRoot, splitByMonth=False): os.remove(imageTuple[1]) -def postprocessImages(imageDirectory, splitByMonth): +def postprocessImages(imageDirectory, minEventDeltaDays, splitByMonth): images = [] for root, dirs, files in os.walk(imageDirectory): for file in files: postprocessImage(images, imageDirectory, file) - writeImages(images, imageDirectory, splitByMonth) + writeImages(images, imageDirectory, minEventDeltaDays, splitByMonth) diff --git a/recovery.py b/recovery.py index d4672af..bf88598 100755 --- a/recovery.py +++ b/recovery.py @@ -56,6 +56,7 @@ def get_args(): parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory') parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well') parser.add_argument('-k', '--keep_filename', action='store_true', required=False, help='keeps the original filenames when copying') + parser.add_argument('-d', '--min-event-delta', type=int, default=4, required=False, help='minimum delta in days between two days') return parser.parse_args() @@ -74,6 +75,7 @@ destination = args.destination maxNumberOfFilesPerFolder = args.max_per_dir splitMonths = args.split_months keepFilename = args.keep_filename +minEventDeltaDays = args.min_event_delta print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." % (source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only")) @@ -118,7 +120,7 @@ for root, dirs, files in os.walk(source, topdown=False): log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") -jpgSorter.postprocessImages(os.path.join(destination, "JPG"), splitMonths) +jpgSorter.postprocessImages(os.path.join(destination, "JPG"), minEventDeltaDays, splitMonths) log("assure max file per folder number") numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder) From 4f214f84ada90ddc5c38cf1e7e311e7f35899391 Mon Sep 17 00:00:00 2001 From: Lukas Hahmann Date: Thu, 30 Nov 2017 22:49:59 +0100 Subject: [PATCH 13/14] Update readme.md --- readme.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index 8176944..871728a 100644 --- a/readme.md +++ b/readme.md @@ -2,30 +2,77 @@ Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job. -This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. +This program sPRF helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. -## Usage +## Installation First install the package [exifread](https://pypi.python.org/pypi/ExifRead): ```pip install exifread``` +## Run the sorter + Then run the sorter: ```python recovery.py ``` This copies the recovered file to their file type folder in the destination directory. The recovered files are not modified. If a file already exists in the destination directory, it is skipped. Hence you can interrupt the process with Ctrl+C and continue afterwards. -The first output of the programm is the number of files to copy. To count them might take some minutes depending on the amount of recovered files. Afterwareds you get some feedback every ~2000 processed files. +The first output of the programm is the number of files to copy. To count them might take some minutes depending on the amount of recovered files. Afterwareds you get some feedback on the processed files. + +### Parameters + +For an overview of all arguments, run with the `-h` option: ```python recovery.py -h```. + +#### Max numbers of files per folder All directories contain maximum 500 files. If one contains more, numbered subdirectories are created. If you want another file-limit, e.g. 1000, just put that number as third parameter to the execution of the programm: ```python recovery.py -n1000``` -For an overview of all arguments, run with the `-h` option: ```python recovery.py -h```. +#### Folder for each month + +sPRF usually sorts your photos by year: + +``` +destination +|- 2015 + |- 1.jpg + |- 2.jpg + |- ... +|- 2016 + |- ... +``` + +Sometimes you might want to sort each year by month: + +```python recovery.py -m``` + +Now you get: + +``` +destination +|- 2015 + |- 1 + |- 1.jpg + |- 2.jpg + |- 2 + |- 3.jpg + |- 4.jpg + |- ... +|- 2016 + |- ... +``` + +#### Keep original filenames + +Use the -k parameter to keep the original filenames: + +```python recovery.py -k``` -## Adjust event distance +#### Adjust event distance -For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds. +For the case you want to reduce or increase the timespan between events, simply use the parameter -d. The default is 4: +```python recovery.py -d10``` From 033e5ac4a0dda5a9e7c91efc46c412efb55a22e0 Mon Sep 17 00:00:00 2001 From: Anson Date: Sat, 26 Jun 2021 20:00:38 +0800 Subject: [PATCH 14/14] Fix SyntaxWarning in recovery.py Fixes: recovery.py:119: SyntaxWarning: "is" with a literal. Did you mean "=="? if((fileCounter % onePercentFiles) is 0): --- recovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recovery.py b/recovery.py index bf88598..459e663 100755 --- a/recovery.py +++ b/recovery.py @@ -116,7 +116,7 @@ for root, dirs, files in os.walk(source, topdown=False): shutil.copy2(sourcePath, destinationFile) fileCounter += 1 - if((fileCounter % onePercentFiles) is 0): + if((fileCounter % onePercentFiles) == 0): log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment")