Merge branch 'master' of https://github.com/tfrdidi/sort-PhotorecRecoveredFiles into tfrdidi-master

This commit is contained in:
Hashim 2022-04-04 21:28:44 +01:00
commit 442640b43b
5 changed files with 196 additions and 101 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*.pyc
.vscode

View File

@ -1,11 +1,10 @@
import os.path
import ntpath
import exifread
from time import localtime, strftime, strptime, mktime
import shutil
import exifread
minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds
unknownDateFolderName = "Datum unbekannt"
unknownDateFolderName = "date-unknown"
def getMinimumCreationTime(exif_data):
creationTime = None
@ -18,7 +17,7 @@ def getMinimumCreationTime(exif_data):
if (dateTimeOriginal is None):
# case 1/9: dateTime, dateTimeOriginal, and dateTimeDigitized = None
# case 2/9: dateTime and dateTimeOriginal = None, then use dateTimeDigitized
creationTime = dateTimeDigitized
creationTime = dateTimeDigitized
else:
# case 3/9: dateTime and dateTimeDigitized = None, then use dateTimeOriginal
# case 4/9: dateTime = None, prefere dateTimeOriginal over dateTimeDigitized
@ -33,7 +32,7 @@ def postprocessImage(images, imageDirectory, fileName):
imagePath = os.path.join(imageDirectory, fileName)
image = open(imagePath, 'rb')
creationTime = None
try:
try:
exifTags = exifread.process_file(image, details=False)
creationTime = getMinimumCreationTime(exifTags)
except:
@ -51,25 +50,30 @@ def postprocessImage(images, imageDirectory, fileName):
images.append((mktime(creationTime), imagePath))
image.close()
# Creates the requested path recursively.
def createPath(newPath):
if not os.path.exists(newPath):
os.makedirs(newPath)
def createNewFolder(destinationRoot, year, eventNumber):
yearPath = os.path.join(destinationRoot, year)
if not os.path.exists(yearPath):
os.mkdir(yearPath)
eventPath = os.path.join(yearPath, str(eventNumber))
if not os.path.exists(eventPath):
os.mkdir(eventPath)
# Pass None for month to create 'year/eventNumber' directories instead of 'year/month/eventNumber'.
def createNewFolder(destinationRoot, year, month, eventNumber):
if month is not None:
newPath = os.path.join(destinationRoot, year, month, str(eventNumber))
else:
newPath = os.path.join(destinationRoot, year, str(eventNumber))
createPath(newPath)
def createUnknownDateFolder(destinationRoot):
path = os.path.join(destinationRoot, unknownDateFolderName)
if not os.path.exists(path):
os.mkdir(path)
createPath(path)
def writeImages(images, destinationRoot):
def writeImages(images, destinationRoot, minEventDeltaDays, splitByMonth=False):
minEventDelta = minEventDeltaDays * 60 * 60 * 24 # convert in seconds
sortedImages = sorted(images)
previousTime = None
eventNumber = 0
previousDestination = None
today = strftime("%d/%m/%Y")
for imageTuple in sortedImages:
@ -77,6 +81,7 @@ def writeImages(images, destinationRoot):
destinationFilePath = ""
t = localtime(imageTuple[0])
year = strftime("%Y", t)
month = splitByMonth and strftime("%m", t) or None
creationDate = strftime("%d/%m/%Y", t)
fileName = ntpath.basename(imageTuple[1])
@ -84,21 +89,25 @@ def writeImages(images, destinationRoot):
createUnknownDateFolder(destinationRoot)
destination = os.path.join(destinationRoot, unknownDateFolderName)
destinationFilePath = os.path.join(destination, fileName)
else:
if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]):
previousTime = imageTuple[0]
eventNumber = eventNumber + 1
createNewFolder(destinationRoot, year, eventNumber)
createNewFolder(destinationRoot, year, month, eventNumber)
previousTime = imageTuple[0]
destination = os.path.join(destinationRoot, year, str(eventNumber))
# it may be possible that an event covers 2 years.
# in such a case put all the images to the even in the old year
if not (os.path.exists(destination)):
destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber))
destComponents = [destinationRoot, year, month, str(eventNumber)]
destComponents = [v for v in destComponents if v is not None]
destination = os.path.join(*destComponents)
# it may be possible that an event covers 2 years.
# in such a case put all the images to the event in the old year
if not (os.path.exists(destination)):
destination = previousDestination
# destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber))
previousDestination = destination
destinationFilePath = os.path.join(destination, fileName)
if not (os.path.exists(destinationFilePath)):
@ -108,10 +117,10 @@ def writeImages(images, destinationRoot):
os.remove(imageTuple[1])
def postprocessImages(imageDirectory):
def postprocessImages(imageDirectory, minEventDeltaDays, splitByMonth):
images = []
for root, dirs, files in os.walk(imageDirectory):
for file in files:
postprocessImage(images, imageDirectory, file)
writeImages(images, imageDirectory)
writeImages(images, imageDirectory, minEventDeltaDays, splitByMonth)

View File

@ -5,23 +5,23 @@ import shutil
def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder):
for root, dirs, files in os.walk(folder, topdown=False):
for dir in dirs:
dirPath = os.path.join(root, dir)
filesInFolder = len(os.listdir(dirPath))
if(filesInFolder > maxNumberOfFilesPerFolder):
numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1
for subFolderNumber in range(1, numberOfSubfolders+1):
subFolderPath = os.path.join(dirPath, str(subFolderNumber))
if not os.path.exists(subFolderPath):
os.mkdir(subFolderPath)
fileCounter = 1
for file in os.listdir(dirPath):
source = os.path.join(dirPath, file)
if os.path.isfile(source):
destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1)
destination = os.path.join(dirPath, destDir, file)
shutil.move(source, destination)
fileCounter += 1
for root, dirs, files in os.walk(folder, topdown=False):
for dir in dirs:
dirPath = os.path.join(root, dir)
filesInFolder = len(os.listdir(dirPath))
if(filesInFolder > maxNumberOfFilesPerFolder):
numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1
for subFolderNumber in range(1, numberOfSubfolders+1):
subFolderPath = os.path.join(dirPath, str(subFolderNumber))
if not os.path.exists(subFolderPath):
os.mkdir(subFolderPath)
fileCounter = 1
for file in os.listdir(dirPath):
source = os.path.join(dirPath, file)
if os.path.isfile(source):
destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1)
destination = os.path.join(dirPath, destDir, file)
shutil.move(source, destination)
fileCounter += 1

View File

@ -1,25 +1,80 @@
# Sort files recovered by Photorec
Photorec does a great job when recovering deleted files, but the result is a huge, unsorted, unnamed amount of files. Particularly for external hard drives that serve as a backup of all your personal data, sorting them is a tedious job.
Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job.
This program helps you sort the files that Photorec recovers. First, the **files are sorted into folders by their file extensions**. Second, **JPGs** are then further sorted **by the year they were taken** and **by the event as part of which they were taken**. An event is defined as a 4-day time span in which photos were taken, though this can be changed - see *"Adjust event distance"*. If no date from the past can be detected, these JPGs are put into one folder to be sorted manually.
This program sPRF helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually.
## Origin and Credits
This is a fork of tfrdidi's [much-improved fork](https://github.com/tfrdidi/sort-PhotorecRecoveredFiles) of Chris Masterson's [sort-PhotorecRecoveredFiles](https://github.com/ChrisMagnuson/sort-PhotorecRecoveredFiles). The code of this version is unchanged from tifrdidi's version, and was created purely for the purpose of enabling issue tracking and logging the issues I've come across during usage in the hope that someone more adept than me at Python either fix them or contribute to them. The bulk of this readme was also written by tfrdidi, and I improved the English slightly for the purpose of making it clearer.
## Usage
## Installation
```python recovery.py <path to files recovered by Photorec> <destination>```
First install the package [exifread](https://pypi.python.org/pypi/ExifRead):
`pip install exifread`
## Run the sorter
Then run the sorter:
`python recovery.py <path to files recovered by Photorec> <destination>`
This copies the recovered files to their file type folder in the destination directory. The recovered files are not modified. If a file already exists in the destination directory, it is skipped. This means that the program can be interrupted with Ctrl+C and then continued at a later point by running it again.
The first output of the programm is the number of files to copy. Counting them may take anything from a few minutes to a few hours depending on the amount of recovered files. After that, the program will output feedback every ~2000 processed files.
The first output of the programm is the number of files to copy. To count them might take some minutes depending on the amount of recovered files. Afterwareds you get some feedback on the processed files.
### Parameters
For an overview of all arguments, run with the `-h` option: `python recovery.py -h`.
#### Max numbers of files per folder
All directories contain a maximum of 500 files by default. If there are more for a file type, numbered subdirectories are created. If you want another file-limit, e.g. 1000, pass that number as the third parameter when running the program:
```python recovery.py <path to files recovered by Photorec> <destination> 1000```
`python recovery.py <path to files recovered by Photorec> <destination> -n1000`
## Adjust event distance
#### Folder for each month
By default, an event is defined as a 4-day time span in which photos have been taken. If you want to reduce or increase this, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds.
sPRF usually sorts your photos by year:
```
destination
|- 2015
|- 1.jpg
|- 2.jpg
|- ...
|- 2016
|- ...
```
Sometimes you might want to sort each year by month:
`python recovery.py <path to files recovered by Photorec> <destination> -m`
Now you get:
```
destination
|- 2015
|- 1
|- 1.jpg
|- 2.jpg
|- 2
|- 3.jpg
|- 4.jpg
|- ...
|- 2016
|- ...
```
#### Keep original filenames
Use the -k parameter to keep the original filenames:
`python recovery.py <path to files recovered by Photorec> <destination> -k`
#### Adjust event distance
For the case you want to reduce or increase the timespan between events, simply use the parameter -d. The default is 4:
`python recovery.py <path to files recovered by Photorec> <destination> -d10`

123
recovery.py Normal file → Executable file
View File

@ -1,12 +1,10 @@
#!/usr/bin/env python2
#!/usr/bin/env python
import os
import os.path
import sys
import jpgSorter, numberOfFilesPerFolderLimiter
import shutil
from time import localtime, strftime
import math
import multiprocessing as mp
import shutil
import jpgSorter
import numberOfFilesPerFolderLimiter
def getNumberOfFilesInFolderRecursively(start_path = '.'):
@ -15,53 +13,81 @@ def getNumberOfFilesInFolderRecursively(start_path = '.'):
for f in filenames:
fp = os.path.join(dirpath, f)
if(os.path.isfile(fp)):
numberOfFiles += 1
numberOfFiles += 1
return numberOfFiles
def getNumberOfFilesInFolder(path):
return len(os.listdir(path))
return len(os.listdir(path))
def log(logString):
print(strftime("%H:%M:%S", localtime()) + ": " + logString)
print(strftime("%H:%M:%S", localtime()) + ": " + logString)
def moveFile(file, destination):
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
destinationDirectory = os.path.join(destination, extension)
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy(sourcePath, destinationFile)
destinationDirectory = os.path.join(destination, extension)
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy(sourcePath, destinationFile)
def get_args():
import argparse
description = (
"Sort files recoverd by Photorec.\n"
"The input files are first copied to the destination, sorted by file type.\n"
"Then JPG files are sorted based on creation year (and optionally month).\n"
"Finally any directories containing more than a maximum number of files are accordingly split into separate directories."
)
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('source', metavar='src', type=str, help='source directory with files recovered by Photorec')
parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to')
parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory')
parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well')
parser.add_argument('-k', '--keep_filename', action='store_true', required=False, help='keeps the original filenames when copying')
parser.add_argument('-d', '--min-event-delta', type=int, default=4, required=False, help='minimum delta in days between two days')
return parser.parse_args()
maxNumberOfFilesPerFolder = 500
splitMonths = False
source = None
destination = None
keepFilename = False
if(len(sys.argv) < 3):
print("Enter source and destination: python sort.py source/path destination/path")
args = get_args()
source = args.source
destination = args.destination
maxNumberOfFilesPerFolder = args.max_per_dir
splitMonths = args.split_months
keepFilename = args.keep_filename
minEventDeltaDays = args.min_event_delta
print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." %
(source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only"))
if keepFilename:
print("I will keep you filenames as they are")
else:
source = sys.argv[1]
print("Source directory: " + source)
destination = sys.argv[2]
print("Destination directory: " + destination)
if(len(sys.argv) > 3):
maxNumberOfFilesPerFolder = int(sys.argv[3])
print("I will rename your files like '1.jpg'")
while ((source is None) or (not os.path.exists(source))):
source = input('Enter a valid source directory\n')
source = input('Enter a valid source directory\n')
while ((destination is None) or (not os.path.exists(destination))):
destination = input('Enter a valid destination directory\n')
destination = input('Enter a valid destination directory\n')
fileNumber = getNumberOfFilesInFolderRecursively(source)
onePercentFiles = int(fileNumber/100)
@ -72,26 +98,29 @@ print("Files to copy: " + totalAmountToCopy)
fileCounter = 0
for root, dirs, files in os.walk(source, topdown=False):
for file in files:
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
destinationDirectory = os.path.join(destination, extension)
for file in files:
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy2(sourcePath, destinationFile)
destinationDirectory = os.path.join(destination, extension)
fileCounter += 1
if((fileCounter % onePercentFiles) is 0):
log(str(fileCounter) + " / " + totalAmountToCopy + " processed.")
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
if keepFilename:
fileName = file
else:
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy2(sourcePath, destinationFile)
fileCounter += 1
if((fileCounter % onePercentFiles) == 0):
log(str(fileCounter) + " / " + totalAmountToCopy + " processed.")
log("start special file treatment")
jpgSorter.postprocessImages(os.path.join(destination, "JPG"))
jpgSorter.postprocessImages(os.path.join(destination, "JPG"), minEventDeltaDays, splitMonths)
log("assure max file per folder number")
numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder)