Merge pull request #2 from woutgg/master

Add option to allow sorting jpg files into both year and month directories
This commit is contained in:
Lukas Hahmann 2017-11-30 21:31:34 +01:00 committed by GitHub
commit 42e00be703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 121 additions and 90 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.pyc

View File

@ -5,7 +5,7 @@ from time import localtime, strftime, strptime, mktime
import shutil
minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds
unknownDateFolderName = "Datum unbekannt"
unknownDateFolderName = "date-unknown"
def getMinimumCreationTime(exif_data):
creationTime = None
@ -18,7 +18,7 @@ def getMinimumCreationTime(exif_data):
if (dateTimeOriginal is None):
# case 1/9: dateTime, dateTimeOriginal, and dateTimeDigitized = None
# case 2/9: dateTime and dateTimeOriginal = None, then use dateTimeDigitized
creationTime = dateTimeDigitized
creationTime = dateTimeDigitized
else:
# case 3/9: dateTime and dateTimeDigitized = None, then use dateTimeOriginal
# case 4/9: dateTime = None, prefere dateTimeOriginal over dateTimeDigitized
@ -33,7 +33,7 @@ def postprocessImage(images, imageDirectory, fileName):
imagePath = os.path.join(imageDirectory, fileName)
image = open(imagePath, 'rb')
creationTime = None
try:
try:
exifTags = exifread.process_file(image, details=False)
creationTime = getMinimumCreationTime(exifTags)
except:
@ -51,25 +51,29 @@ def postprocessImage(images, imageDirectory, fileName):
images.append((mktime(creationTime), imagePath))
image.close()
# Creates the requested path recursively.
def createPath(newPath):
if not os.path.exists(newPath):
os.makedirs(newPath)
def createNewFolder(destinationRoot, year, eventNumber):
yearPath = os.path.join(destinationRoot, year)
if not os.path.exists(yearPath):
os.mkdir(yearPath)
eventPath = os.path.join(yearPath, str(eventNumber))
if not os.path.exists(eventPath):
os.mkdir(eventPath)
# Pass None for month to create 'year/eventNumber' directories instead of 'year/month/eventNumber'.
def createNewFolder(destinationRoot, year, month, eventNumber):
if month is not None:
newPath = os.path.join(destinationRoot, year, month, str(eventNumber))
else:
newPath = os.path.join(destinationRoot, year, str(eventNumber))
createPath(newPath)
def createUnknownDateFolder(destinationRoot):
path = os.path.join(destinationRoot, unknownDateFolderName)
if not os.path.exists(path):
os.mkdir(path)
createPath(path)
def writeImages(images, destinationRoot):
def writeImages(images, destinationRoot, splitByMonth=False):
sortedImages = sorted(images)
previousTime = None
eventNumber = 0
previousDestination = None
today = strftime("%d/%m/%Y")
for imageTuple in sortedImages:
@ -77,6 +81,7 @@ def writeImages(images, destinationRoot):
destinationFilePath = ""
t = localtime(imageTuple[0])
year = strftime("%Y", t)
month = splitByMonth and strftime("%m", t) or None
creationDate = strftime("%d/%m/%Y", t)
fileName = ntpath.basename(imageTuple[1])
@ -84,21 +89,25 @@ def writeImages(images, destinationRoot):
createUnknownDateFolder(destinationRoot)
destination = os.path.join(destinationRoot, unknownDateFolderName)
destinationFilePath = os.path.join(destination, fileName)
else:
if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]):
previousTime = imageTuple[0]
eventNumber = eventNumber + 1
createNewFolder(destinationRoot, year, eventNumber)
createNewFolder(destinationRoot, year, month, eventNumber)
previousTime = imageTuple[0]
destination = os.path.join(destinationRoot, year, str(eventNumber))
# it may be possible that an event covers 2 years.
# in such a case put all the images to the even in the old year
if not (os.path.exists(destination)):
destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber))
destComponents = [destinationRoot, year, month, str(eventNumber)]
destComponents = [v for v in destComponents if v is not None]
destination = os.path.join(*destComponents)
# it may be possible that an event covers 2 years.
# in such a case put all the images to the event in the old year
if not (os.path.exists(destination)):
destination = previousDestination
# destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber))
previousDestination = destination
destinationFilePath = os.path.join(destination, fileName)
if not (os.path.exists(destinationFilePath)):
@ -108,10 +117,10 @@ def writeImages(images, destinationRoot):
os.remove(imageTuple[1])
def postprocessImages(imageDirectory):
def postprocessImages(imageDirectory, splitByMonth):
images = []
for root, dirs, files in os.walk(imageDirectory):
for file in files:
postprocessImage(images, imageDirectory, file)
writeImages(images, imageDirectory)
writeImages(images, imageDirectory, splitByMonth)

View File

@ -5,23 +5,23 @@ import shutil
def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder):
for root, dirs, files in os.walk(folder, topdown=False):
for dir in dirs:
dirPath = os.path.join(root, dir)
filesInFolder = len(os.listdir(dirPath))
if(filesInFolder > maxNumberOfFilesPerFolder):
numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1
for subFolderNumber in range(1, numberOfSubfolders+1):
subFolderPath = os.path.join(dirPath, str(subFolderNumber))
if not os.path.exists(subFolderPath):
os.mkdir(subFolderPath)
fileCounter = 1
for file in os.listdir(dirPath):
source = os.path.join(dirPath, file)
if os.path.isfile(source):
destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1)
destination = os.path.join(dirPath, destDir, file)
shutil.move(source, destination)
fileCounter += 1
for root, dirs, files in os.walk(folder, topdown=False):
for dir in dirs:
dirPath = os.path.join(root, dir)
filesInFolder = len(os.listdir(dirPath))
if(filesInFolder > maxNumberOfFilesPerFolder):
numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1
for subFolderNumber in range(1, numberOfSubfolders+1):
subFolderPath = os.path.join(dirPath, str(subFolderNumber))
if not os.path.exists(subFolderPath):
os.mkdir(subFolderPath)
fileCounter = 1
for file in os.listdir(dirPath):
source = os.path.join(dirPath, file)
if os.path.isfile(source):
destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1)
destination = os.path.join(dirPath, destDir, file)
shutil.move(source, destination)
fileCounter += 1

View File

@ -1,8 +1,8 @@
# Sort files recoverd by Photorec
Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sortig them is an endless job.
Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job.
This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually.
This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually.
## Usage
@ -15,9 +15,11 @@ The first output of the programm is the number of files to copy. To count them m
All directories contain maximum 500 files. If one contains more, numbered subdirectories are created. If you want another file-limit, e.g. 1000, just put that number as third parameter to the execution of the programm:
```python recovery.py <path to files recovered by Photorec> <destination> 1000```
```python recovery.py <path to files recovered by Photorec> <destination> -n1000```
For an overview of all arguments, run with the `-h` option: ```python recovery.py -h```.
## Adjust event distance
For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds.
For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds.

103
recovery.py Normal file → Executable file
View File

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python
import os
import os.path
import sys
@ -15,53 +15,72 @@ def getNumberOfFilesInFolderRecursively(start_path = '.'):
for f in filenames:
fp = os.path.join(dirpath, f)
if(os.path.isfile(fp)):
numberOfFiles += 1
numberOfFiles += 1
return numberOfFiles
def getNumberOfFilesInFolder(path):
return len(os.listdir(path))
return len(os.listdir(path))
def log(logString):
print(strftime("%H:%M:%S", localtime()) + ": " + logString)
print(strftime("%H:%M:%S", localtime()) + ": " + logString)
def moveFile(file, destination):
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
destinationDirectory = os.path.join(destination, extension)
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy(sourcePath, destinationFile)
destinationDirectory = os.path.join(destination, extension)
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy(sourcePath, destinationFile)
def get_args():
import argparse
description = (
"Sort files recoverd by Photorec.\n"
"The input files are first copied to the destination, sorted by file type.\n"
"Then JPG files are sorted based on creation year (and optionally month).\n"
"Finally any directories containing more than a maximum number of files are accordingly split into separate directories."
)
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('source', metavar='src', type=str, help='source directory with files recovered by Photorec')
parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to')
parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory')
parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well')
return parser.parse_args()
maxNumberOfFilesPerFolder = 500
splitMonths = False
source = None
destination = None
if(len(sys.argv) < 3):
print("Enter source and destination: python sort.py source/path destination/path")
else:
source = sys.argv[1]
print("Source directory: " + source)
destination = sys.argv[2]
print("Destination directory: " + destination)
if(len(sys.argv) > 3):
maxNumberOfFilesPerFolder = int(sys.argv[3])
args = get_args()
source = args.source
destination = args.destination
maxNumberOfFilesPerFolder = args.max_per_dir
splitMonths = args.split_months
print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." %
(source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only"))
while ((source is None) or (not os.path.exists(source))):
source = input('Enter a valid source directory\n')
source = input('Enter a valid source directory\n')
while ((destination is None) or (not os.path.exists(destination))):
destination = input('Enter a valid destination directory\n')
destination = input('Enter a valid destination directory\n')
fileNumber = getNumberOfFilesInFolderRecursively(source)
onePercentFiles = int(fileNumber/100)
@ -72,26 +91,26 @@ print("Files to copy: " + totalAmountToCopy)
fileCounter = 0
for root, dirs, files in os.walk(source, topdown=False):
for file in files:
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
destinationDirectory = os.path.join(destination, extension)
for file in files:
extension = os.path.splitext(file)[1][1:].upper()
sourcePath = os.path.join(root, file)
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy2(sourcePath, destinationFile)
destinationDirectory = os.path.join(destination, extension)
fileCounter += 1
if((fileCounter % onePercentFiles) is 0):
log(str(fileCounter) + " / " + totalAmountToCopy + " processed.")
if not os.path.exists(destinationDirectory):
os.mkdir(destinationDirectory)
fileName = str(fileCounter) + "." + extension.lower()
destinationFile = os.path.join(destinationDirectory, fileName)
if not os.path.exists(destinationFile):
shutil.copy2(sourcePath, destinationFile)
fileCounter += 1
if((fileCounter % onePercentFiles) is 0):
log(str(fileCounter) + " / " + totalAmountToCopy + " processed.")
log("start special file treatment")
jpgSorter.postprocessImages(os.path.join(destination, "JPG"))
jpgSorter.postprocessImages(os.path.join(destination, "JPG"), splitMonths)
log("assure max file per folder number")
numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder)