diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f44278 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +.vscode \ No newline at end of file diff --git a/jpgSorter.py b/jpgSorter.py index 467086f..679dd05 100644 --- a/jpgSorter.py +++ b/jpgSorter.py @@ -1,11 +1,10 @@ import os.path import ntpath -import exifread from time import localtime, strftime, strptime, mktime import shutil +import exifread -minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds -unknownDateFolderName = "Datum unbekannt" +unknownDateFolderName = "date-unknown" def getMinimumCreationTime(exif_data): creationTime = None @@ -18,7 +17,7 @@ def getMinimumCreationTime(exif_data): if (dateTimeOriginal is None): # case 1/9: dateTime, dateTimeOriginal, and dateTimeDigitized = None # case 2/9: dateTime and dateTimeOriginal = None, then use dateTimeDigitized - creationTime = dateTimeDigitized + creationTime = dateTimeDigitized else: # case 3/9: dateTime and dateTimeDigitized = None, then use dateTimeOriginal # case 4/9: dateTime = None, prefere dateTimeOriginal over dateTimeDigitized @@ -33,7 +32,7 @@ def postprocessImage(images, imageDirectory, fileName): imagePath = os.path.join(imageDirectory, fileName) image = open(imagePath, 'rb') creationTime = None - try: + try: exifTags = exifread.process_file(image, details=False) creationTime = getMinimumCreationTime(exifTags) except: @@ -51,25 +50,30 @@ def postprocessImage(images, imageDirectory, fileName): images.append((mktime(creationTime), imagePath)) image.close() +# Creates the requested path recursively. +def createPath(newPath): + if not os.path.exists(newPath): + os.makedirs(newPath) -def createNewFolder(destinationRoot, year, eventNumber): - yearPath = os.path.join(destinationRoot, year) - if not os.path.exists(yearPath): - os.mkdir(yearPath) - eventPath = os.path.join(yearPath, str(eventNumber)) - if not os.path.exists(eventPath): - os.mkdir(eventPath) +# Pass None for month to create 'year/eventNumber' directories instead of 'year/month/eventNumber'. +def createNewFolder(destinationRoot, year, month, eventNumber): + if month is not None: + newPath = os.path.join(destinationRoot, year, month, str(eventNumber)) + else: + newPath = os.path.join(destinationRoot, year, str(eventNumber)) + + createPath(newPath) def createUnknownDateFolder(destinationRoot): path = os.path.join(destinationRoot, unknownDateFolderName) - if not os.path.exists(path): - os.mkdir(path) + createPath(path) - -def writeImages(images, destinationRoot): +def writeImages(images, destinationRoot, minEventDeltaDays, splitByMonth=False): + minEventDelta = minEventDeltaDays * 60 * 60 * 24 # convert in seconds sortedImages = sorted(images) previousTime = None eventNumber = 0 + previousDestination = None today = strftime("%d/%m/%Y") for imageTuple in sortedImages: @@ -77,6 +81,7 @@ def writeImages(images, destinationRoot): destinationFilePath = "" t = localtime(imageTuple[0]) year = strftime("%Y", t) + month = splitByMonth and strftime("%m", t) or None creationDate = strftime("%d/%m/%Y", t) fileName = ntpath.basename(imageTuple[1]) @@ -84,21 +89,25 @@ def writeImages(images, destinationRoot): createUnknownDateFolder(destinationRoot) destination = os.path.join(destinationRoot, unknownDateFolderName) destinationFilePath = os.path.join(destination, fileName) - + else: if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]): - previousTime = imageTuple[0] eventNumber = eventNumber + 1 - createNewFolder(destinationRoot, year, eventNumber) - + createNewFolder(destinationRoot, year, month, eventNumber) + previousTime = imageTuple[0] - destination = os.path.join(destinationRoot, year, str(eventNumber)) - # it may be possible that an event covers 2 years. - # in such a case put all the images to the even in the old year - if not (os.path.exists(destination)): - destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber)) + destComponents = [destinationRoot, year, month, str(eventNumber)] + destComponents = [v for v in destComponents if v is not None] + destination = os.path.join(*destComponents) + # it may be possible that an event covers 2 years. + # in such a case put all the images to the event in the old year + if not (os.path.exists(destination)): + destination = previousDestination + # destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber)) + + previousDestination = destination destinationFilePath = os.path.join(destination, fileName) if not (os.path.exists(destinationFilePath)): @@ -108,10 +117,10 @@ def writeImages(images, destinationRoot): os.remove(imageTuple[1]) -def postprocessImages(imageDirectory): +def postprocessImages(imageDirectory, minEventDeltaDays, splitByMonth): images = [] for root, dirs, files in os.walk(imageDirectory): for file in files: postprocessImage(images, imageDirectory, file) - writeImages(images, imageDirectory) + writeImages(images, imageDirectory, minEventDeltaDays, splitByMonth) diff --git a/numberOfFilesPerFolderLimiter.py b/numberOfFilesPerFolderLimiter.py index fdfd644..fc60d2a 100644 --- a/numberOfFilesPerFolderLimiter.py +++ b/numberOfFilesPerFolderLimiter.py @@ -5,23 +5,23 @@ import shutil def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder): - for root, dirs, files in os.walk(folder, topdown=False): - for dir in dirs: - dirPath = os.path.join(root, dir) - filesInFolder = len(os.listdir(dirPath)) - if(filesInFolder > maxNumberOfFilesPerFolder): - numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1 - for subFolderNumber in range(1, numberOfSubfolders+1): - subFolderPath = os.path.join(dirPath, str(subFolderNumber)) - if not os.path.exists(subFolderPath): - os.mkdir(subFolderPath) - fileCounter = 1 - for file in os.listdir(dirPath): - source = os.path.join(dirPath, file) - if os.path.isfile(source): - destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) - destination = os.path.join(dirPath, destDir, file) - shutil.move(source, destination) - fileCounter += 1 - + for root, dirs, files in os.walk(folder, topdown=False): + for dir in dirs: + dirPath = os.path.join(root, dir) + filesInFolder = len(os.listdir(dirPath)) + if(filesInFolder > maxNumberOfFilesPerFolder): + numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1 + for subFolderNumber in range(1, numberOfSubfolders+1): + subFolderPath = os.path.join(dirPath, str(subFolderNumber)) + if not os.path.exists(subFolderPath): + os.mkdir(subFolderPath) + fileCounter = 1 + for file in os.listdir(dirPath): + source = os.path.join(dirPath, file) + if os.path.isfile(source): + destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1) + destination = os.path.join(dirPath, destDir, file) + shutil.move(source, destination) + fileCounter += 1 + diff --git a/readme.md b/readme.md index 17b804b..458a633 100644 --- a/readme.md +++ b/readme.md @@ -1,25 +1,80 @@ # Sort files recovered by Photorec -Photorec does a great job when recovering deleted files, but the result is a huge, unsorted, unnamed amount of files. Particularly for external hard drives that serve as a backup of all your personal data, sorting them is a tedious job. +Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job. -This program helps you sort the files that Photorec recovers. First, the **files are sorted into folders by their file extensions**. Second, **JPGs** are then further sorted **by the year they were taken** and **by the event as part of which they were taken**. An event is defined as a 4-day time span in which photos were taken, though this can be changed - see *"Adjust event distance"*. If no date from the past can be detected, these JPGs are put into one folder to be sorted manually. +This program sPRF helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually. ## Origin and Credits This is a fork of tfrdidi's [much-improved fork](https://github.com/tfrdidi/sort-PhotorecRecoveredFiles) of Chris Masterson's [sort-PhotorecRecoveredFiles](https://github.com/ChrisMagnuson/sort-PhotorecRecoveredFiles). The code of this version is unchanged from tifrdidi's version, and was created purely for the purpose of enabling issue tracking and logging the issues I've come across during usage in the hope that someone more adept than me at Python either fix them or contribute to them. The bulk of this readme was also written by tfrdidi, and I improved the English slightly for the purpose of making it clearer. -## Usage +## Installation -```python recovery.py ``` +First install the package [exifread](https://pypi.python.org/pypi/ExifRead): + +`pip install exifread` + +## Run the sorter + +Then run the sorter: + +`python recovery.py ` This copies the recovered files to their file type folder in the destination directory. The recovered files are not modified. If a file already exists in the destination directory, it is skipped. This means that the program can be interrupted with Ctrl+C and then continued at a later point by running it again. -The first output of the programm is the number of files to copy. Counting them may take anything from a few minutes to a few hours depending on the amount of recovered files. After that, the program will output feedback every ~2000 processed files. +The first output of the programm is the number of files to copy. To count them might take some minutes depending on the amount of recovered files. Afterwareds you get some feedback on the processed files. + +### Parameters + +For an overview of all arguments, run with the `-h` option: `python recovery.py -h`. + +#### Max numbers of files per folder All directories contain a maximum of 500 files by default. If there are more for a file type, numbered subdirectories are created. If you want another file-limit, e.g. 1000, pass that number as the third parameter when running the program: -```python recovery.py 1000``` +`python recovery.py -n1000` -## Adjust event distance +#### Folder for each month -By default, an event is defined as a 4-day time span in which photos have been taken. If you want to reduce or increase this, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds. +sPRF usually sorts your photos by year: + +``` +destination +|- 2015 + |- 1.jpg + |- 2.jpg + |- ... +|- 2016 + |- ... +``` + +Sometimes you might want to sort each year by month: + +`python recovery.py -m` + +Now you get: + +``` +destination +|- 2015 + |- 1 + |- 1.jpg + |- 2.jpg + |- 2 + |- 3.jpg + |- 4.jpg + |- ... +|- 2016 + |- ... +``` + +#### Keep original filenames + +Use the -k parameter to keep the original filenames: + +`python recovery.py -k` + +#### Adjust event distance + +For the case you want to reduce or increase the timespan between events, simply use the parameter -d. The default is 4: +`python recovery.py -d10` diff --git a/recovery.py b/recovery.py old mode 100644 new mode 100755 index 5064964..459e663 --- a/recovery.py +++ b/recovery.py @@ -1,12 +1,10 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import os import os.path -import sys -import jpgSorter, numberOfFilesPerFolderLimiter -import shutil from time import localtime, strftime -import math -import multiprocessing as mp +import shutil +import jpgSorter +import numberOfFilesPerFolderLimiter def getNumberOfFilesInFolderRecursively(start_path = '.'): @@ -15,53 +13,81 @@ def getNumberOfFilesInFolderRecursively(start_path = '.'): for f in filenames: fp = os.path.join(dirpath, f) if(os.path.isfile(fp)): - numberOfFiles += 1 + numberOfFiles += 1 return numberOfFiles def getNumberOfFilesInFolder(path): - return len(os.listdir(path)) + return len(os.listdir(path)) def log(logString): - print(strftime("%H:%M:%S", localtime()) + ": " + logString) + print(strftime("%H:%M:%S", localtime()) + ": " + logString) def moveFile(file, destination): - extension = os.path.splitext(file)[1][1:].upper() - sourcePath = os.path.join(root, file) - - destinationDirectory = os.path.join(destination, extension) + extension = os.path.splitext(file)[1][1:].upper() + sourcePath = os.path.join(root, file) - if not os.path.exists(destinationDirectory): - os.mkdir(destinationDirectory) - - fileName = str(fileCounter) + "." + extension.lower() - destinationFile = os.path.join(destinationDirectory, fileName) - if not os.path.exists(destinationFile): - shutil.copy(sourcePath, destinationFile) + destinationDirectory = os.path.join(destination, extension) + + if not os.path.exists(destinationDirectory): + os.mkdir(destinationDirectory) + + fileName = str(fileCounter) + "." + extension.lower() + destinationFile = os.path.join(destinationDirectory, fileName) + if not os.path.exists(destinationFile): + shutil.copy(sourcePath, destinationFile) + + +def get_args(): + import argparse + + description = ( + "Sort files recoverd by Photorec.\n" + "The input files are first copied to the destination, sorted by file type.\n" + "Then JPG files are sorted based on creation year (and optionally month).\n" + "Finally any directories containing more than a maximum number of files are accordingly split into separate directories." + ) + + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('source', metavar='src', type=str, help='source directory with files recovered by Photorec') + parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to') + parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory') + parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well') + parser.add_argument('-k', '--keep_filename', action='store_true', required=False, help='keeps the original filenames when copying') + parser.add_argument('-d', '--min-event-delta', type=int, default=4, required=False, help='minimum delta in days between two days') + + return parser.parse_args() maxNumberOfFilesPerFolder = 500 +splitMonths = False source = None destination = None +keepFilename = False -if(len(sys.argv) < 3): - print("Enter source and destination: python sort.py source/path destination/path") + +args = get_args() +source = args.source +destination = args.destination +maxNumberOfFilesPerFolder = args.max_per_dir +splitMonths = args.split_months +keepFilename = args.keep_filename +minEventDeltaDays = args.min_event_delta + +print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." % + (source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only")) +if keepFilename: + print("I will keep you filenames as they are") else: - source = sys.argv[1] - print("Source directory: " + source) - destination = sys.argv[2] - print("Destination directory: " + destination) - -if(len(sys.argv) > 3): - maxNumberOfFilesPerFolder = int(sys.argv[3]) + print("I will rename your files like '1.jpg'") while ((source is None) or (not os.path.exists(source))): - source = input('Enter a valid source directory\n') + source = input('Enter a valid source directory\n') while ((destination is None) or (not os.path.exists(destination))): - destination = input('Enter a valid destination directory\n') + destination = input('Enter a valid destination directory\n') fileNumber = getNumberOfFilesInFolderRecursively(source) onePercentFiles = int(fileNumber/100) @@ -72,26 +98,29 @@ print("Files to copy: " + totalAmountToCopy) fileCounter = 0 for root, dirs, files in os.walk(source, topdown=False): - for file in files: - extension = os.path.splitext(file)[1][1:].upper() - sourcePath = os.path.join(root, file) - - destinationDirectory = os.path.join(destination, extension) + for file in files: + extension = os.path.splitext(file)[1][1:].upper() + sourcePath = os.path.join(root, file) - if not os.path.exists(destinationDirectory): - os.mkdir(destinationDirectory) - - fileName = str(fileCounter) + "." + extension.lower() - destinationFile = os.path.join(destinationDirectory, fileName) - if not os.path.exists(destinationFile): - shutil.copy2(sourcePath, destinationFile) + destinationDirectory = os.path.join(destination, extension) - fileCounter += 1 - if((fileCounter % onePercentFiles) is 0): - log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") + if not os.path.exists(destinationDirectory): + os.mkdir(destinationDirectory) + if keepFilename: + fileName = file + else: + fileName = str(fileCounter) + "." + extension.lower() + + destinationFile = os.path.join(destinationDirectory, fileName) + if not os.path.exists(destinationFile): + shutil.copy2(sourcePath, destinationFile) + + fileCounter += 1 + if((fileCounter % onePercentFiles) == 0): + log(str(fileCounter) + " / " + totalAmountToCopy + " processed.") log("start special file treatment") -jpgSorter.postprocessImages(os.path.join(destination, "JPG")) +jpgSorter.postprocessImages(os.path.join(destination, "JPG"), minEventDeltaDays, splitMonths) log("assure max file per folder number") numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder)