Merge pull request #2 from woutgg/master
Add option to allow sorting jpg files into both year and month directories
This commit is contained in:
commit
42e00be703
|
@ -0,0 +1 @@
|
|||
*.pyc
|
59
jpgSorter.py
59
jpgSorter.py
|
@ -5,7 +5,7 @@ from time import localtime, strftime, strptime, mktime
|
|||
import shutil
|
||||
|
||||
minEventDelta = 60 * 60 * 24 * 4 # 4 days in seconds
|
||||
unknownDateFolderName = "Datum unbekannt"
|
||||
unknownDateFolderName = "date-unknown"
|
||||
|
||||
def getMinimumCreationTime(exif_data):
|
||||
creationTime = None
|
||||
|
@ -18,7 +18,7 @@ def getMinimumCreationTime(exif_data):
|
|||
if (dateTimeOriginal is None):
|
||||
# case 1/9: dateTime, dateTimeOriginal, and dateTimeDigitized = None
|
||||
# case 2/9: dateTime and dateTimeOriginal = None, then use dateTimeDigitized
|
||||
creationTime = dateTimeDigitized
|
||||
creationTime = dateTimeDigitized
|
||||
else:
|
||||
# case 3/9: dateTime and dateTimeDigitized = None, then use dateTimeOriginal
|
||||
# case 4/9: dateTime = None, prefere dateTimeOriginal over dateTimeDigitized
|
||||
|
@ -33,7 +33,7 @@ def postprocessImage(images, imageDirectory, fileName):
|
|||
imagePath = os.path.join(imageDirectory, fileName)
|
||||
image = open(imagePath, 'rb')
|
||||
creationTime = None
|
||||
try:
|
||||
try:
|
||||
exifTags = exifread.process_file(image, details=False)
|
||||
creationTime = getMinimumCreationTime(exifTags)
|
||||
except:
|
||||
|
@ -51,25 +51,29 @@ def postprocessImage(images, imageDirectory, fileName):
|
|||
images.append((mktime(creationTime), imagePath))
|
||||
image.close()
|
||||
|
||||
# Creates the requested path recursively.
|
||||
def createPath(newPath):
|
||||
if not os.path.exists(newPath):
|
||||
os.makedirs(newPath)
|
||||
|
||||
def createNewFolder(destinationRoot, year, eventNumber):
|
||||
yearPath = os.path.join(destinationRoot, year)
|
||||
if not os.path.exists(yearPath):
|
||||
os.mkdir(yearPath)
|
||||
eventPath = os.path.join(yearPath, str(eventNumber))
|
||||
if not os.path.exists(eventPath):
|
||||
os.mkdir(eventPath)
|
||||
# Pass None for month to create 'year/eventNumber' directories instead of 'year/month/eventNumber'.
|
||||
def createNewFolder(destinationRoot, year, month, eventNumber):
|
||||
if month is not None:
|
||||
newPath = os.path.join(destinationRoot, year, month, str(eventNumber))
|
||||
else:
|
||||
newPath = os.path.join(destinationRoot, year, str(eventNumber))
|
||||
|
||||
createPath(newPath)
|
||||
|
||||
def createUnknownDateFolder(destinationRoot):
|
||||
path = os.path.join(destinationRoot, unknownDateFolderName)
|
||||
if not os.path.exists(path):
|
||||
os.mkdir(path)
|
||||
createPath(path)
|
||||
|
||||
|
||||
def writeImages(images, destinationRoot):
|
||||
def writeImages(images, destinationRoot, splitByMonth=False):
|
||||
sortedImages = sorted(images)
|
||||
previousTime = None
|
||||
eventNumber = 0
|
||||
previousDestination = None
|
||||
today = strftime("%d/%m/%Y")
|
||||
|
||||
for imageTuple in sortedImages:
|
||||
|
@ -77,6 +81,7 @@ def writeImages(images, destinationRoot):
|
|||
destinationFilePath = ""
|
||||
t = localtime(imageTuple[0])
|
||||
year = strftime("%Y", t)
|
||||
month = splitByMonth and strftime("%m", t) or None
|
||||
creationDate = strftime("%d/%m/%Y", t)
|
||||
fileName = ntpath.basename(imageTuple[1])
|
||||
|
||||
|
@ -84,21 +89,25 @@ def writeImages(images, destinationRoot):
|
|||
createUnknownDateFolder(destinationRoot)
|
||||
destination = os.path.join(destinationRoot, unknownDateFolderName)
|
||||
destinationFilePath = os.path.join(destination, fileName)
|
||||
|
||||
|
||||
else:
|
||||
if (previousTime == None) or ((previousTime + minEventDelta) < imageTuple[0]):
|
||||
previousTime = imageTuple[0]
|
||||
eventNumber = eventNumber + 1
|
||||
createNewFolder(destinationRoot, year, eventNumber)
|
||||
|
||||
createNewFolder(destinationRoot, year, month, eventNumber)
|
||||
|
||||
previousTime = imageTuple[0]
|
||||
|
||||
destination = os.path.join(destinationRoot, year, str(eventNumber))
|
||||
# it may be possible that an event covers 2 years.
|
||||
# in such a case put all the images to the even in the old year
|
||||
if not (os.path.exists(destination)):
|
||||
destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber))
|
||||
destComponents = [destinationRoot, year, month, str(eventNumber)]
|
||||
destComponents = [v for v in destComponents if v is not None]
|
||||
destination = os.path.join(*destComponents)
|
||||
|
||||
# it may be possible that an event covers 2 years.
|
||||
# in such a case put all the images to the event in the old year
|
||||
if not (os.path.exists(destination)):
|
||||
destination = previousDestination
|
||||
# destination = os.path.join(destinationRoot, str(int(year) - 1), str(eventNumber))
|
||||
|
||||
previousDestination = destination
|
||||
destinationFilePath = os.path.join(destination, fileName)
|
||||
|
||||
if not (os.path.exists(destinationFilePath)):
|
||||
|
@ -108,10 +117,10 @@ def writeImages(images, destinationRoot):
|
|||
os.remove(imageTuple[1])
|
||||
|
||||
|
||||
def postprocessImages(imageDirectory):
|
||||
def postprocessImages(imageDirectory, splitByMonth):
|
||||
images = []
|
||||
for root, dirs, files in os.walk(imageDirectory):
|
||||
for file in files:
|
||||
postprocessImage(images, imageDirectory, file)
|
||||
|
||||
writeImages(images, imageDirectory)
|
||||
writeImages(images, imageDirectory, splitByMonth)
|
||||
|
|
|
@ -5,23 +5,23 @@ import shutil
|
|||
|
||||
|
||||
def limitFilesPerFolder(folder, maxNumberOfFilesPerFolder):
|
||||
for root, dirs, files in os.walk(folder, topdown=False):
|
||||
for dir in dirs:
|
||||
dirPath = os.path.join(root, dir)
|
||||
filesInFolder = len(os.listdir(dirPath))
|
||||
if(filesInFolder > maxNumberOfFilesPerFolder):
|
||||
numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1
|
||||
for subFolderNumber in range(1, numberOfSubfolders+1):
|
||||
subFolderPath = os.path.join(dirPath, str(subFolderNumber))
|
||||
if not os.path.exists(subFolderPath):
|
||||
os.mkdir(subFolderPath)
|
||||
fileCounter = 1
|
||||
for file in os.listdir(dirPath):
|
||||
source = os.path.join(dirPath, file)
|
||||
if os.path.isfile(source):
|
||||
destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1)
|
||||
destination = os.path.join(dirPath, destDir, file)
|
||||
shutil.move(source, destination)
|
||||
fileCounter += 1
|
||||
|
||||
for root, dirs, files in os.walk(folder, topdown=False):
|
||||
for dir in dirs:
|
||||
dirPath = os.path.join(root, dir)
|
||||
filesInFolder = len(os.listdir(dirPath))
|
||||
if(filesInFolder > maxNumberOfFilesPerFolder):
|
||||
numberOfSubfolders = ((filesInFolder - 1) // maxNumberOfFilesPerFolder) + 1
|
||||
for subFolderNumber in range(1, numberOfSubfolders+1):
|
||||
subFolderPath = os.path.join(dirPath, str(subFolderNumber))
|
||||
if not os.path.exists(subFolderPath):
|
||||
os.mkdir(subFolderPath)
|
||||
fileCounter = 1
|
||||
for file in os.listdir(dirPath):
|
||||
source = os.path.join(dirPath, file)
|
||||
if os.path.isfile(source):
|
||||
destDir = str(((fileCounter - 1) // maxNumberOfFilesPerFolder) + 1)
|
||||
destination = os.path.join(dirPath, destDir, file)
|
||||
shutil.move(source, destination)
|
||||
fileCounter += 1
|
||||
|
||||
|
||||
|
|
10
readme.md
10
readme.md
|
@ -1,8 +1,8 @@
|
|||
# Sort files recoverd by Photorec
|
||||
|
||||
Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sortig them is an endless job.
|
||||
Photorec does a great job when recovering deleted files. But the result is a huge, unsorted, unnamed amount of files. Especially for external hard drives serving as backup of all the personal data, sorting them is an endless job.
|
||||
|
||||
This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually.
|
||||
This program helps you sorting your files. First of all, the **files are copied to own folders for each file type**. Second, **jpgs are distinguished by the year, and optionally by month as well** when they have been taken **and by the event**. We thereby define an event as a time span during them photos are taken. It has a delta of 4 days without a photo to another event. If no date from the past can be detected, these jpgs are put into one folder to be sorted manually.
|
||||
|
||||
|
||||
## Usage
|
||||
|
@ -15,9 +15,11 @@ The first output of the programm is the number of files to copy. To count them m
|
|||
|
||||
All directories contain maximum 500 files. If one contains more, numbered subdirectories are created. If you want another file-limit, e.g. 1000, just put that number as third parameter to the execution of the programm:
|
||||
|
||||
```python recovery.py <path to files recovered by Photorec> <destination> 1000```
|
||||
```python recovery.py <path to files recovered by Photorec> <destination> -n1000```
|
||||
|
||||
For an overview of all arguments, run with the `-h` option: ```python recovery.py -h```.
|
||||
|
||||
|
||||
## Adjust event distance
|
||||
|
||||
For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds.
|
||||
For the case you want to reduce or increase the timespan between events, simply adjust the variable ```minEventDelta``` in ```jpgHelper.py```. This variable contains the delta between events in seconds.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
|
@ -15,53 +15,72 @@ def getNumberOfFilesInFolderRecursively(start_path = '.'):
|
|||
for f in filenames:
|
||||
fp = os.path.join(dirpath, f)
|
||||
if(os.path.isfile(fp)):
|
||||
numberOfFiles += 1
|
||||
numberOfFiles += 1
|
||||
return numberOfFiles
|
||||
|
||||
|
||||
def getNumberOfFilesInFolder(path):
|
||||
return len(os.listdir(path))
|
||||
return len(os.listdir(path))
|
||||
|
||||
|
||||
def log(logString):
|
||||
print(strftime("%H:%M:%S", localtime()) + ": " + logString)
|
||||
print(strftime("%H:%M:%S", localtime()) + ": " + logString)
|
||||
|
||||
|
||||
def moveFile(file, destination):
|
||||
extension = os.path.splitext(file)[1][1:].upper()
|
||||
sourcePath = os.path.join(root, file)
|
||||
|
||||
destinationDirectory = os.path.join(destination, extension)
|
||||
extension = os.path.splitext(file)[1][1:].upper()
|
||||
sourcePath = os.path.join(root, file)
|
||||
|
||||
if not os.path.exists(destinationDirectory):
|
||||
os.mkdir(destinationDirectory)
|
||||
|
||||
fileName = str(fileCounter) + "." + extension.lower()
|
||||
destinationFile = os.path.join(destinationDirectory, fileName)
|
||||
if not os.path.exists(destinationFile):
|
||||
shutil.copy(sourcePath, destinationFile)
|
||||
destinationDirectory = os.path.join(destination, extension)
|
||||
|
||||
if not os.path.exists(destinationDirectory):
|
||||
os.mkdir(destinationDirectory)
|
||||
|
||||
fileName = str(fileCounter) + "." + extension.lower()
|
||||
destinationFile = os.path.join(destinationDirectory, fileName)
|
||||
if not os.path.exists(destinationFile):
|
||||
shutil.copy(sourcePath, destinationFile)
|
||||
|
||||
|
||||
def get_args():
|
||||
import argparse
|
||||
|
||||
description = (
|
||||
"Sort files recoverd by Photorec.\n"
|
||||
"The input files are first copied to the destination, sorted by file type.\n"
|
||||
"Then JPG files are sorted based on creation year (and optionally month).\n"
|
||||
"Finally any directories containing more than a maximum number of files are accordingly split into separate directories."
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('source', metavar='src', type=str, help='source directory with files recovered by Photorec')
|
||||
parser.add_argument('destination', metavar='dest', type=str, help='destination directory to write sorted files to')
|
||||
parser.add_argument('-n', '--max-per-dir', type=int, default=500, required=False, help='maximum number of files per directory')
|
||||
parser.add_argument('-m', '--split-months', action='store_true', required=False, help='split JPEG files not only by year but by month as well')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
||||
maxNumberOfFilesPerFolder = 500
|
||||
splitMonths = False
|
||||
source = None
|
||||
destination = None
|
||||
|
||||
if(len(sys.argv) < 3):
|
||||
print("Enter source and destination: python sort.py source/path destination/path")
|
||||
else:
|
||||
source = sys.argv[1]
|
||||
print("Source directory: " + source)
|
||||
destination = sys.argv[2]
|
||||
print("Destination directory: " + destination)
|
||||
|
||||
if(len(sys.argv) > 3):
|
||||
maxNumberOfFilesPerFolder = int(sys.argv[3])
|
||||
args = get_args()
|
||||
source = args.source
|
||||
destination = args.destination
|
||||
maxNumberOfFilesPerFolder = args.max_per_dir
|
||||
splitMonths = args.split_months
|
||||
|
||||
print("Reading from source '%s', writing to destination '%s' (max %i files per directory, splitting by year %s)." %
|
||||
(source, destination, maxNumberOfFilesPerFolder, splitMonths and "and month" or "only"))
|
||||
|
||||
while ((source is None) or (not os.path.exists(source))):
|
||||
source = input('Enter a valid source directory\n')
|
||||
source = input('Enter a valid source directory\n')
|
||||
while ((destination is None) or (not os.path.exists(destination))):
|
||||
destination = input('Enter a valid destination directory\n')
|
||||
destination = input('Enter a valid destination directory\n')
|
||||
|
||||
fileNumber = getNumberOfFilesInFolderRecursively(source)
|
||||
onePercentFiles = int(fileNumber/100)
|
||||
|
@ -72,26 +91,26 @@ print("Files to copy: " + totalAmountToCopy)
|
|||
fileCounter = 0
|
||||
for root, dirs, files in os.walk(source, topdown=False):
|
||||
|
||||
for file in files:
|
||||
extension = os.path.splitext(file)[1][1:].upper()
|
||||
sourcePath = os.path.join(root, file)
|
||||
|
||||
destinationDirectory = os.path.join(destination, extension)
|
||||
for file in files:
|
||||
extension = os.path.splitext(file)[1][1:].upper()
|
||||
sourcePath = os.path.join(root, file)
|
||||
|
||||
if not os.path.exists(destinationDirectory):
|
||||
os.mkdir(destinationDirectory)
|
||||
|
||||
fileName = str(fileCounter) + "." + extension.lower()
|
||||
destinationFile = os.path.join(destinationDirectory, fileName)
|
||||
if not os.path.exists(destinationFile):
|
||||
shutil.copy2(sourcePath, destinationFile)
|
||||
destinationDirectory = os.path.join(destination, extension)
|
||||
|
||||
fileCounter += 1
|
||||
if((fileCounter % onePercentFiles) is 0):
|
||||
log(str(fileCounter) + " / " + totalAmountToCopy + " processed.")
|
||||
if not os.path.exists(destinationDirectory):
|
||||
os.mkdir(destinationDirectory)
|
||||
|
||||
fileName = str(fileCounter) + "." + extension.lower()
|
||||
destinationFile = os.path.join(destinationDirectory, fileName)
|
||||
if not os.path.exists(destinationFile):
|
||||
shutil.copy2(sourcePath, destinationFile)
|
||||
|
||||
fileCounter += 1
|
||||
if((fileCounter % onePercentFiles) is 0):
|
||||
log(str(fileCounter) + " / " + totalAmountToCopy + " processed.")
|
||||
|
||||
log("start special file treatment")
|
||||
jpgSorter.postprocessImages(os.path.join(destination, "JPG"))
|
||||
jpgSorter.postprocessImages(os.path.join(destination, "JPG"), splitMonths)
|
||||
|
||||
log("assure max file per folder number")
|
||||
numberOfFilesPerFolderLimiter.limitFilesPerFolder(destination, maxNumberOfFilesPerFolder)
|
||||
|
|
Loading…
Reference in New Issue