Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions elodie/external/pyexiftool.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,23 @@ def get_metadata_batch(self, filenames):
The return value will have the format described in the
documentation of :py:meth:`execute_json()`.
"""
return self.execute_json(*filenames)
data = self.execute_json(*filenames)
if isinstance(data, list):
return data
return []

def get_metadata(self, filename):
"""Return meta-data for a single file.

The returned dictionary has the format described in the
documentation of :py:meth:`execute_json()`.
"""
return self.execute_json(filename)[0]
data = self.execute_json(filename)
if not isinstance(data, list) or len(data) == 0:
return None
if not isinstance(data[0], dict):
return None
return data[0]

def get_tags_batch(self, tags, filenames):
"""Return only specified tags for the given files.
Expand All @@ -368,15 +376,21 @@ def get_tags_batch(self, tags, filenames):
"an iterable of strings")
params = ["-" + t for t in tags]
params.extend(filenames)
return self.execute_json(*params)
data = self.execute_json(*params)
if isinstance(data, list):
return data
return []

def get_tags(self, tags, filename):
"""Return only specified tags for a single file.

The returned dictionary has the format described in the
documentation of :py:meth:`execute_json()`.
"""
return self.get_tags_batch(tags, [filename])[0]
data = self.get_tags_batch(tags, [filename])
if len(data) == 0:
return None
return data[0]

def get_tag_batch(self, tag, filenames):
"""Extract a single tag from the given files.
Expand All @@ -390,9 +404,14 @@ def get_tag_batch(self, tag, filenames):
non-existent tags, in the same order as ``filenames``.
"""
data = self.get_tags_batch([tag], filenames)
if len(data) == 0:
return [None for _ in filenames]
result = []
for d in data:
d.pop("SourceFile")
if not isinstance(d, dict):
result.append(None)
continue
d.pop("SourceFile", None)
result.append(next(iter(d.values()), None))
return result

Expand All @@ -402,7 +421,10 @@ def get_tag(self, tag, filename):
The return value is the value of the specified tag, or
``None`` if this tag was not found in the file.
"""
return self.get_tag_batch(tag, [filename])[0]
data = self.get_tag_batch(tag, [filename])
if len(data) == 0:
return None
return data[0]

def set_tags_batch(self, tags, filenames):
"""Writes the values of the specified tags for the given files.
Expand Down
86 changes: 70 additions & 16 deletions elodie/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re
import shutil
import time
import calendar
from send2trash import send2trash

from elodie import compatability
Expand Down Expand Up @@ -45,10 +46,58 @@ def __init__(self):
# See build failures in Python3 here.
# https://travis-ci.org/jmathai/elodie/builds/483012902
self.whitespace_regex = '[ \t\n\r\f\v]+'
# Disallow path separators and filesystem-invalid characters in a single path component.
self.invalid_path_component_regex = r'[<>:"/\\|?*\x00-\x1f]'
self.windows_reserved_names = {
'CON', 'PRN', 'AUX', 'NUL',
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9',
}

# Instantiate a plugins object
self.plugins = Plugins()

def sanitize_path_component(self, value):
"""Sanitize a single folder/file path component for cross-platform safety."""
if value is None:
return value

value = re.sub(self.invalid_path_component_regex, '-', value)

if os.sep:
value = value.replace(os.sep, '-')
if os.altsep:
value = value.replace(os.altsep, '-')

value = value.rstrip(' .')
if len(value) == 0:
return ''

# Windows has reserved device names which cannot be used as path components.
stem = value.split('.', 1)[0].upper()
if stem in self.windows_reserved_names:
value = '_%s' % value

return value

def _safe_timestamp(self, date_taken):
"""Convert struct_time to timestamp with a pre-epoch fallback."""
try:
return time.mktime(date_taken)
except (OverflowError, OSError, ValueError):
try:
return calendar.timegm(date_taken)
except (OverflowError, OSError, ValueError, TypeError):
return None

def _safe_set_mtime(self, file_path, mtime):
"""Set file mtime without crashing on unsupported timestamps."""
try:
os.utime(file_path, (time.time(), mtime))
return True
except (OverflowError, OSError, ValueError):
log.warn('Unable to set mtime for %s using %s' % (file_path, mtime))
return False
def _file_operation(self, operation_type, src, dst=None):
"""Perform file operation with dry-run support."""
if constants.dry_run:
Expand Down Expand Up @@ -208,7 +257,7 @@ def get_file_name(self, metadata):
# This helps when re-running the program on file
# which were already processed.
this_value = re.sub(
'^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-',
r'^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-',
'',
metadata['base_name']
)
Expand All @@ -234,12 +283,16 @@ def get_file_name(self, metadata):
name,
)
else:
this_value = self.sanitize_path_component(this_value)
name = re.sub(
'%{}'.format(part),
this_value,
name,
)

# Final guard to avoid unsafe separators from custom templates.
name = self.sanitize_path_component(name)

config = load_config()

if('File' in config and 'capitalization' in config['File'] and config['File']['capitalization'] == 'upper'):
Expand Down Expand Up @@ -281,7 +334,7 @@ def get_file_name_definition(self):
# name.
# I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'extension'] #noqa
path_parts = re.findall(
'(\%[a-z_]+)',
r'(%[a-z_]+)',
config_file['name']
)

Expand Down Expand Up @@ -341,7 +394,7 @@ def get_folder_path_definition(self):
# I.e. %foo/%bar => ['foo', 'bar']
# I.e. %foo/%bar|%example|"something" => ['foo', 'bar|example|"something"']
path_parts = re.findall(
'(\%[^/]+)',
r'(%[^/]+)',
config_directory['full_path']
)

Expand Down Expand Up @@ -385,7 +438,9 @@ def get_folder_path(self, metadata, path_parts=None):
part, mask = this_part
this_path = self.get_dynamic_path(part, mask, metadata)
if this_path:
path.append(this_path.strip())
this_path = self.sanitize_path_component(this_path).strip()
if len(this_path) > 0:
path.append(this_path)
# We break as soon as we have a value to append
# Else we continue for fallbacks
break
Expand Down Expand Up @@ -644,7 +699,7 @@ def set_utime_from_metadata(self, metadata, file_path):
date_taken = metadata['date_taken']
base_name = metadata['base_name']
year_month_day_match = re.search(
'^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})',
r'^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})',
base_name
)
if(year_month_day_match is not None):
Expand All @@ -654,18 +709,17 @@ def set_utime_from_metadata(self, metadata, file_path):
'%Y-%m-%d %H:%M:%S'
)

if not constants.dry_run:
os.utime(file_path, (time.time(), time.mktime(date_taken)))
else:
print(f"[DRY-RUN] Would set utime from date pattern for: {file_path}")
date_taken_in_seconds = self._safe_timestamp(date_taken)
if(date_taken_in_seconds is None):
log.warn('Could not convert date_taken to timestamp for %s' % file_path)
return

if not constants.dry_run:
self._safe_set_mtime(file_path, date_taken_in_seconds)
elif year_month_day_match is not None:
print(f"[DRY-RUN] Would set utime from date pattern for: {file_path}")
else:
# We don't make any assumptions about time zones and
# assume local time zone.
date_taken_in_seconds = time.mktime(date_taken)
if not constants.dry_run:
os.utime(file_path, (time.time(), (date_taken_in_seconds)))
else:
print(f"[DRY-RUN] Would set utime from metadata for: {file_path}")
print(f"[DRY-RUN] Would set utime from metadata for: {file_path}")

def should_exclude(self, path, regex_list=set(), needs_compiled=False):
if(len(regex_list) == 0):
Expand Down
9 changes: 6 additions & 3 deletions elodie/media/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,19 @@ def get_coordinate(self, type='latitude'):
def get_exiftool_attributes(self):
"""Get attributes for the media object from exiftool.

:returns: dict, or False if exiftool was not available.
:returns: dict, or None if exiftool metadata was unavailable.
"""
source = self.source

#Cache exif metadata results and use if already exists for media
if(self.exif_metadata is None):
self.exif_metadata = ExifTool().get_metadata(source)
try:
self.exif_metadata = ExifTool().get_metadata(source)
except Exception:
self.exif_metadata = None

if not self.exif_metadata:
return False
return None

return self.exif_metadata

Expand Down
10 changes: 5 additions & 5 deletions elodie/media/photo.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_date_taken(self):

exif = self.get_exiftool_attributes()
if not exif:
return seconds_since_epoch
return time.gmtime(seconds_since_epoch)

# We need to parse a string from EXIF into a timestamp.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
Expand All @@ -71,10 +71,10 @@ def get_date_taken(self):
dt, tm = exif[key].split(' ')
dt_list = compile(r'-|:').split(dt)
dt_list = dt_list + compile(r'-|:').split(tm)
dt_list = map(int, dt_list)
time_tuple = datetime(*dt_list).timetuple()
seconds_since_epoch = time.mktime(time_tuple)
break
dt_list = list(map(int, dt_list))
# Build a struct_time directly from EXIF to support
# pre-epoch dates on platforms where mktime can fail.
return datetime(*dt_list).utctimetuple()
except BaseException as e:
log.error(e)
pass
Expand Down
6 changes: 5 additions & 1 deletion elodie/media/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

from json import dumps, loads
import calendar
import os
from shutil import copy2, copyfileobj
import time
Expand Down Expand Up @@ -106,7 +107,10 @@ def set_date_taken(self, passed_in_time):
if(time is None):
return False

seconds_since_epoch = time.mktime(passed_in_time.timetuple())
try:
seconds_since_epoch = time.mktime(passed_in_time.timetuple())
except (OverflowError, OSError, ValueError):
seconds_since_epoch = calendar.timegm(passed_in_time.utctimetuple())
status = self.write_metadata(date_taken=seconds_since_epoch)
self.reset_cache()
return status
Expand Down
38 changes: 18 additions & 20 deletions elodie/media/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from __future__ import division

# load modules
from datetime import datetime
from datetime import datetime, timezone

import os
import re
Expand Down Expand Up @@ -66,8 +66,16 @@ def get_date_taken(self):

source = self.source
seconds_since_epoch = min(os.path.getmtime(source), os.path.getctime(source)) # noqa
fallback_date = datetime.fromtimestamp(seconds_since_epoch, timezone.utc)
best_date = fallback_date
found_exif_date = False

exif = self.get_exiftool_attributes()
if not exif:
if(seconds_since_epoch == 0):
return None
return fallback_date.utctimetuple()

for date_key in self.exif_map['date_taken']:
if date_key in exif:
# Example date strings we want to parse
Expand All @@ -76,28 +84,18 @@ def get_date_taken(self):
date = re.search('([0-9: ]+)([-+][0-9:]+)?', exif[date_key])
if(date is not None):
date_string = date.group(1)
date_offset = date.group(2)
try:
exif_seconds_since_epoch = time.mktime(
datetime.strptime(
date_string,
'%Y:%m:%d %H:%M:%S'
).timetuple()
)
if(exif_seconds_since_epoch < seconds_since_epoch):
seconds_since_epoch = exif_seconds_since_epoch
if date_offset is not None:
offset_parts = date_offset[1:].split(':')
offset_seconds = int(offset_parts[0]) * 3600
offset_seconds = offset_seconds + int(offset_parts[1]) * 60 # noqa
if date_offset[0] == '-':
seconds_since_epoch - offset_seconds
elif date_offset[0] == '+':
seconds_since_epoch + offset_seconds
exif_date = datetime.strptime(
date_string,
'%Y:%m:%d %H:%M:%S'
).replace(tzinfo=timezone.utc)
if(exif_date < best_date):
best_date = exif_date
found_exif_date = True
except:
pass

if(seconds_since_epoch == 0):
if(seconds_since_epoch == 0 and found_exif_date is False):
return None

return time.gmtime(seconds_since_epoch)
return best_date.utctimetuple()
16 changes: 15 additions & 1 deletion elodie/tests/external_pyexiftool_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,18 @@ def test_exiftool_with_non_ascii_file():
if os.path.exists(test_file):
os.remove(test_file)
if os.path.exists(test_dir):
os.rmdir(test_dir)
os.rmdir(test_dir)

def test_get_metadata_returns_none_when_execute_json_fails():
"""get_metadata() should not crash when execute_json returns None."""
et = ExifTool()
with patch.object(et, 'execute_json', return_value=None):
result = et.get_metadata("/tmp/test.jpg")
assert result is None

def test_get_metadata_returns_none_when_execute_json_is_empty():
"""get_metadata() should not crash when execute_json returns an empty list."""
et = ExifTool()
with patch.object(et, 'execute_json', return_value=[]):
result = et.get_metadata("/tmp/test.jpg")
assert result is None
Loading