From c6d0df5e2820eae323f92d400ff4affafd0eb73e Mon Sep 17 00:00:00 2001 From: Thierry Perraut Date: Sat, 7 Feb 2026 14:36:57 -0800 Subject: [PATCH 1/2] Harden unicode/exif handling and sanitize generated paths (cherry picked from commit eafdf57ede0a2b9341f57c7461d54a3803c15b45) --- elodie/external/pyexiftool.py | 34 ++++++++++++++++++---- elodie/filesystem.py | 37 +++++++++++++++++++++++- elodie/media/media.py | 9 ++++-- elodie/tests/external_pyexiftool_test.py | 16 +++++++++- elodie/tests/filesystem_test.py | 22 ++++++++++++++ elodie/tests/media/media_test.py | 15 ++++++++++ 6 files changed, 122 insertions(+), 11 deletions(-) diff --git a/elodie/external/pyexiftool.py b/elodie/external/pyexiftool.py index 5adebbe6..29a07af1 100644 --- a/elodie/external/pyexiftool.py +++ b/elodie/external/pyexiftool.py @@ -337,7 +337,10 @@ def get_metadata_batch(self, filenames): The return value will have the format described in the documentation of :py:meth:`execute_json()`. """ - return self.execute_json(*filenames) + data = self.execute_json(*filenames) + if isinstance(data, list): + return data + return [] def get_metadata(self, filename): """Return meta-data for a single file. @@ -345,7 +348,12 @@ def get_metadata(self, filename): The returned dictionary has the format described in the documentation of :py:meth:`execute_json()`. """ - return self.execute_json(filename)[0] + data = self.execute_json(filename) + if not isinstance(data, list) or len(data) == 0: + return None + if not isinstance(data[0], dict): + return None + return data[0] def get_tags_batch(self, tags, filenames): """Return only specified tags for the given files. @@ -368,7 +376,10 @@ def get_tags_batch(self, tags, filenames): "an iterable of strings") params = ["-" + t for t in tags] params.extend(filenames) - return self.execute_json(*params) + data = self.execute_json(*params) + if isinstance(data, list): + return data + return [] def get_tags(self, tags, filename): """Return only specified tags for a single file. @@ -376,7 +387,10 @@ def get_tags(self, tags, filename): The returned dictionary has the format described in the documentation of :py:meth:`execute_json()`. """ - return self.get_tags_batch(tags, [filename])[0] + data = self.get_tags_batch(tags, [filename]) + if len(data) == 0: + return None + return data[0] def get_tag_batch(self, tag, filenames): """Extract a single tag from the given files. @@ -390,9 +404,14 @@ def get_tag_batch(self, tag, filenames): non-existent tags, in the same order as ``filenames``. """ data = self.get_tags_batch([tag], filenames) + if len(data) == 0: + return [None for _ in filenames] result = [] for d in data: - d.pop("SourceFile") + if not isinstance(d, dict): + result.append(None) + continue + d.pop("SourceFile", None) result.append(next(iter(d.values()), None)) return result @@ -402,7 +421,10 @@ def get_tag(self, tag, filename): The return value is the value of the specified tag, or ``None`` if this tag was not found in the file. """ - return self.get_tag_batch(tag, [filename])[0] + data = self.get_tag_batch(tag, [filename]) + if len(data) == 0: + return None + return data[0] def set_tags_batch(self, tags, filenames): """Writes the values of the specified tags for the given files. diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 7f514101..7e1801b0 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -45,10 +45,39 @@ def __init__(self): # See build failures in Python3 here. # https://travis-ci.org/jmathai/elodie/builds/483012902 self.whitespace_regex = '[ \t\n\r\f\v]+' + # Disallow path separators and filesystem-invalid characters in a single path component. + self.invalid_path_component_regex = r'[<>:"/\\|?*\x00-\x1f]' + self.windows_reserved_names = { + 'CON', 'PRN', 'AUX', 'NUL', + 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', + 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9', + } # Instantiate a plugins object self.plugins = Plugins() + def sanitize_path_component(self, value): + """Sanitize a single folder/file path component for cross-platform safety.""" + if value is None: + return value + + value = re.sub(self.invalid_path_component_regex, '-', value) + + if os.sep: + value = value.replace(os.sep, '-') + if os.altsep: + value = value.replace(os.altsep, '-') + + value = value.rstrip(' .') + if len(value) == 0: + return '' + + # Windows has reserved device names which cannot be used as path components. + stem = value.split('.', 1)[0].upper() + if stem in self.windows_reserved_names: + value = '_%s' % value + + return value def _file_operation(self, operation_type, src, dst=None): """Perform file operation with dry-run support.""" if constants.dry_run: @@ -234,12 +263,16 @@ def get_file_name(self, metadata): name, ) else: + this_value = self.sanitize_path_component(this_value) name = re.sub( '%{}'.format(part), this_value, name, ) + # Final guard to avoid unsafe separators from custom templates. + name = self.sanitize_path_component(name) + config = load_config() if('File' in config and 'capitalization' in config['File'] and config['File']['capitalization'] == 'upper'): @@ -385,7 +418,9 @@ def get_folder_path(self, metadata, path_parts=None): part, mask = this_part this_path = self.get_dynamic_path(part, mask, metadata) if this_path: - path.append(this_path.strip()) + this_path = self.sanitize_path_component(this_path).strip() + if len(this_path) > 0: + path.append(this_path) # We break as soon as we have a value to append # Else we continue for fallbacks break diff --git a/elodie/media/media.py b/elodie/media/media.py index 788b670d..a862ca86 100644 --- a/elodie/media/media.py +++ b/elodie/media/media.py @@ -119,16 +119,19 @@ def get_coordinate(self, type='latitude'): def get_exiftool_attributes(self): """Get attributes for the media object from exiftool. - :returns: dict, or False if exiftool was not available. + :returns: dict, or None if exiftool metadata was unavailable. """ source = self.source #Cache exif metadata results and use if already exists for media if(self.exif_metadata is None): - self.exif_metadata = ExifTool().get_metadata(source) + try: + self.exif_metadata = ExifTool().get_metadata(source) + except Exception: + self.exif_metadata = None if not self.exif_metadata: - return False + return None return self.exif_metadata diff --git a/elodie/tests/external_pyexiftool_test.py b/elodie/tests/external_pyexiftool_test.py index 1cd2a458..a51b5d11 100644 --- a/elodie/tests/external_pyexiftool_test.py +++ b/elodie/tests/external_pyexiftool_test.py @@ -81,4 +81,18 @@ def test_exiftool_with_non_ascii_file(): if os.path.exists(test_file): os.remove(test_file) if os.path.exists(test_dir): - os.rmdir(test_dir) \ No newline at end of file + os.rmdir(test_dir) + +def test_get_metadata_returns_none_when_execute_json_fails(): + """get_metadata() should not crash when execute_json returns None.""" + et = ExifTool() + with patch.object(et, 'execute_json', return_value=None): + result = et.get_metadata("/tmp/test.jpg") + assert result is None + +def test_get_metadata_returns_none_when_execute_json_is_empty(): + """get_metadata() should not crash when execute_json returns an empty list.""" + et = ExifTool() + with patch.object(et, 'execute_json', return_value=[]): + result = et.get_metadata("/tmp/test.jpg") + assert result is None diff --git a/elodie/tests/filesystem_test.py b/elodie/tests/filesystem_test.py index c5cf9c2e..c62c2686 100644 --- a/elodie/tests/filesystem_test.py +++ b/elodie/tests/filesystem_test.py @@ -251,6 +251,20 @@ def test_get_file_name_with_uppercase_and_spaces(): assert file_name == helper.path_tz_fix('2015-12-05_00-59-26-plain-with-spaces-and-uppercase-123.jpg'), file_name +def test_get_file_name_sanitizes_invalid_path_characters(): + filesystem = FileSystem() + media = Photo(helper.get_file('with-title.jpg')) + metadata = media.get_metadata() + metadata['title'] = 'nami cc aapi / 中文部公众讲座 : 1?*' + + file_name = filesystem.get_file_name(metadata) + + assert '/' not in file_name, file_name + assert '\\' not in file_name, file_name + assert ':' not in file_name, file_name + assert '?' not in file_name, file_name + assert '*' not in file_name, file_name + @mock.patch('elodie.config.get_config_file', return_value='%s/config.ini-filename-custom' % gettempdir()) def test_get_file_name_custom(mock_get_config_file): with open(mock_get_config_file.return_value, 'w') as f: @@ -395,6 +409,14 @@ def test_get_folder_path_with_location(): assert path == os.path.join('2015-12-Dec','Sunnyvale'), path +@mock.patch('elodie.filesystem.geolocation.place_name', return_value={'default': u'Bellevue/WA', 'city': u'Bellevue/WA'}) +def test_get_folder_path_sanitizes_location_separator(mock_place_name): + filesystem = FileSystem() + media = Photo(helper.get_file('with-location.jpg')) + path = filesystem.get_folder_path(media.get_metadata()) + + assert path == os.path.join('2015-12-Dec', 'Bellevue-WA'), path + @mock.patch('elodie.config.get_config_file', return_value='%s/config.ini-original-with-camera-make-and-model' % gettempdir()) def test_get_folder_path_with_camera_make_and_model(mock_get_config_file): with open(mock_get_config_file.return_value, 'w') as f: diff --git a/elodie/tests/media/media_test.py b/elodie/tests/media/media_test.py index f435bb45..871a81ba 100644 --- a/elodie/tests/media/media_test.py +++ b/elodie/tests/media/media_test.py @@ -9,6 +9,7 @@ import string import tempfile import time +from unittest.mock import patch sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))) sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) @@ -18,6 +19,7 @@ from elodie.media.media import Media from elodie.media.photo import Photo from elodie.media.video import Video +from elodie.external.pyexiftool import ExifTool os.environ['TZ'] = 'GMT' @@ -89,6 +91,19 @@ def test_get_original_name_invalid_file(): assert original_name is None, original_name +def test_get_original_name_when_exiftool_metadata_is_unavailable(): + temporary_folder, folder = helper.create_working_folder() + + origin = '%s/%s' % (folder, 'plain.jpg') + file = helper.get_file('plain.jpg') + shutil.copyfile(file, origin) + + media = Media.get_class_by_file(origin, [Photo]) + with patch.object(ExifTool, 'get_metadata', return_value=None): + original_name = media.get_original_name() + + assert original_name is None, original_name + def test_set_original_name_when_exists(): temporary_folder, folder = helper.create_working_folder() From 4b6def2176a770af0e3f45d7247e67558bf2d957 Mon Sep 17 00:00:00 2001 From: Thierry Perraut Date: Sat, 7 Feb 2026 14:42:35 -0800 Subject: [PATCH 2/2] Make Ctrl+C interruption and ExifTool shutdown/restart reliable (cherry picked from commit 269f81e8190d8c5356b13ef93f7492db559612c9) --- elodie.py | 284 ++++++++++++++--------- elodie/external/pyexiftool.py | 137 +++++++++-- elodie/tests/elodie_test.py | 76 ++++++ elodie/tests/external_pyexiftool_test.py | 160 +++++++++++++ 4 files changed, 527 insertions(+), 130 deletions(-) diff --git a/elodie.py b/elodie.py index e516e2aa..1d081ceb 100755 --- a/elodie.py +++ b/elodie.py @@ -3,6 +3,7 @@ from __future__ import print_function import os import re +import signal import sys from datetime import datetime @@ -36,6 +37,28 @@ FILESYSTEM = FileSystem() + +class _GracefulInterruptHandler(object): + """Track SIGINT requests so commands can stop cleanly between files.""" + + def __init__(self): + self.interrupted = False + self._previous_sigint_handler = None + + def __enter__(self): + self._previous_sigint_handler = signal.getsignal(signal.SIGINT) + signal.signal(signal.SIGINT, self._handle_interrupt) + return self + + def __exit__(self, exc_type, exc_value, traceback): + signal.signal(signal.SIGINT, self._previous_sigint_handler) + return False + + def _handle_interrupt(self, signum, frame): + if not self.interrupted: + log.warn('Interrupt requested. Finishing current file before stopping.') + self.interrupted = True + def import_file(_file, destination, album_from_folder, trash, allow_duplicates, location=None, time=None): _file = _decode(_file) @@ -127,6 +150,7 @@ def _import(destination, source, file, album_from_folder, trash, allow_duplicate constants.debug = debug constants.dry_run = dry_run has_errors = False + interrupted = False result = Result() destination = _decode(destination) @@ -156,19 +180,27 @@ def _import(destination, source, file, album_from_folder, trash, allow_duplicate if not FILESYSTEM.should_exclude(path, exclude_regex_list, True): files.add(path) - for current_file in files: - dest_path = import_file(current_file, destination, album_from_folder, - trash, allow_duplicates, location, time) - if dest_path: - result.append((current_file, True)) - elif not allow_duplicates: - result.append((current_file, None)) # duplicate - else: - result.append((current_file, False)) # error - has_errors = has_errors is True or not dest_path + with _GracefulInterruptHandler() as interrupt_handler: + for current_file in files: + if interrupt_handler.interrupted: + interrupted = True + break + + dest_path = import_file(current_file, destination, album_from_folder, + trash, allow_duplicates, location, time) + if dest_path: + result.append((current_file, True)) + elif not allow_duplicates: + result.append((current_file, None)) # duplicate + else: + result.append((current_file, False)) # error + has_errors = has_errors is True or not dest_path result.write() + if interrupted: + sys.exit(130) + if has_errors: sys.exit(1) @@ -182,6 +214,7 @@ def _generate_db(source, debug): """Regenerate the hash.json database which contains all of the sha256 signatures of media files. The hash.json file is located at ~/.elodie/. """ constants.debug = debug + interrupted = False result = Result() source = os.path.abspath(os.path.expanduser(source)) @@ -193,39 +226,55 @@ def _generate_db(source, debug): db.backup_hash_db() db.reset_hash_db() - for current_file in FILESYSTEM.get_all_files(source): - result.append((current_file, True)) - db.add_hash(db.checksum(current_file), current_file) - log.progress() + with _GracefulInterruptHandler() as interrupt_handler: + for current_file in FILESYSTEM.get_all_files(source): + if interrupt_handler.interrupted: + interrupted = True + break + result.append((current_file, True)) + db.add_hash(db.checksum(current_file), current_file) + log.progress() db.update_hash_db() log.progress('', True) result.write() + if interrupted: + sys.exit(130) + @click.command('verify') @click.option('--debug', default=False, is_flag=True, help='Show more verbose debug output.') def _verify(debug): constants.debug = debug + interrupted = False result = Result() db = Db() - for checksum, file_path in db.all(): - if not os.path.isfile(file_path): - result.append((file_path, False)) - log.progress('x') - continue - - actual_checksum = db.checksum(file_path) - if checksum == actual_checksum: - result.append((file_path, True)) - log.progress() - else: - result.append((file_path, False)) - log.progress('x') + with _GracefulInterruptHandler() as interrupt_handler: + for checksum, file_path in db.all(): + if interrupt_handler.interrupted: + interrupted = True + break + + if not os.path.isfile(file_path): + result.append((file_path, False)) + log.progress('f') + continue + + actual_checksum = db.checksum(file_path) + if checksum == actual_checksum: + result.append((file_path, True)) + log.progress() + else: + result.append((file_path, False)) + log.progress('c') log.progress('', True) result.write() + if interrupted: + sys.exit(130) + def update_location(media, file_path, location_name): """Update location exif metadata of media. @@ -281,6 +330,7 @@ def _update(album, location, time, title, paths, debug, dry_run): constants.debug = debug constants.dry_run = dry_run has_errors = False + interrupted = False result = Result() files = set() @@ -291,94 +341,102 @@ def _update(album, location, time, title, paths, debug, dry_run): else: files.add(path) - for current_file in files: - if not os.path.exists(current_file): - has_errors = True - result.append((current_file, False)) - log.warn('Could not find %s' % current_file) - log.all('{"source":"%s", "error_msg":"Could not find %s"}' % - (current_file, current_file)) - continue - - current_file = os.path.expanduser(current_file) - - # The destination folder structure could contain any number of levels - # So we calculate that and traverse up the tree. - # '/path/to/file/photo.jpg' -> '/path/to/file' -> - # ['path','to','file'] -> ['path','to'] -> '/path/to' - current_directory = os.path.dirname(current_file) - destination_depth = -1 * len(FILESYSTEM.get_folder_path_definition()) - destination = os.sep.join( - os.path.normpath( - current_directory - ).split(os.sep)[:destination_depth] - ) - - media = Media.get_class_by_file(current_file, get_all_subclasses()) - if not media: - continue - - updated = False - if location: - update_location(media, current_file, location) - updated = True - if time: - update_time(media, current_file, time) - updated = True - if album: - media.set_album(album) - updated = True - - # Updating a title can be problematic when doing it 2+ times on a file. - # You would end up with img_001.jpg -> img_001-first-title.jpg -> - # img_001-first-title-second-title.jpg. - # To resolve that we have to track the prior title (if there was one. - # Then we massage the updated_media's metadata['base_name'] to remove - # the old title. - # Since FileSystem.get_file_name() relies on base_name it will properly - # rename the file by updating the title instead of appending it. - remove_old_title_from_name = False - if title: - # We call get_metadata() to cache it before making any changes - metadata = media.get_metadata() - title_update_status = media.set_title(title) - original_title = metadata['title'] - if title_update_status and original_title: - # @TODO: We should move this to a shared method since - # FileSystem.get_file_name() does it too. - original_title = re.sub(r'\W+', '-', original_title.lower()) - original_base_name = metadata['base_name'] - remove_old_title_from_name = True - updated = True - - if updated: - updated_media = Media.get_class_by_file(current_file, - get_all_subclasses()) - # See comments above on why we have to do this when titles - # get updated. - if remove_old_title_from_name and len(original_title) > 0: - updated_media.get_metadata() - updated_media.set_metadata_basename( - original_base_name.replace('-%s' % original_title, '')) - - dest_path = FILESYSTEM.process_file(current_file, destination, - updated_media, move=True, allowDuplicate=True) - log.info(u'%s -> %s' % (current_file, dest_path)) - log.all('{"source":"%s", "destination":"%s"}' % (current_file, - dest_path)) - # If the folder we moved the file out of or its parent are empty - # we delete it. - FILESYSTEM.delete_directory_if_empty(os.path.dirname(current_file)) - FILESYSTEM.delete_directory_if_empty( - os.path.dirname(os.path.dirname(current_file))) - result.append((current_file, bool(dest_path))) - # Trip has_errors to False if it's already False or dest_path is. - has_errors = has_errors is True or not dest_path - else: - has_errors = False - result.append((current_file, False)) + with _GracefulInterruptHandler() as interrupt_handler: + for current_file in files: + if interrupt_handler.interrupted: + interrupted = True + break + + if not os.path.exists(current_file): + has_errors = True + result.append((current_file, False)) + log.warn('Could not find %s' % current_file) + log.all('{"source":"%s", "error_msg":"Could not find %s"}' % + (current_file, current_file)) + continue + + current_file = os.path.expanduser(current_file) + + # The destination folder structure could contain any number of levels + # So we calculate that and traverse up the tree. + # '/path/to/file/photo.jpg' -> '/path/to/file' -> + # ['path','to','file'] -> ['path','to'] -> '/path/to' + current_directory = os.path.dirname(current_file) + destination_depth = -1 * len(FILESYSTEM.get_folder_path_definition()) + destination = os.sep.join( + os.path.normpath( + current_directory + ).split(os.sep)[:destination_depth] + ) + + media = Media.get_class_by_file(current_file, get_all_subclasses()) + if not media: + continue + + updated = False + if location: + update_location(media, current_file, location) + updated = True + if time: + update_time(media, current_file, time) + updated = True + if album: + media.set_album(album) + updated = True + + # Updating a title can be problematic when doing it 2+ times on a file. + # You would end up with img_001.jpg -> img_001-first-title.jpg -> + # img_001-first-title-second-title.jpg. + # To resolve that we have to track the prior title (if there was one. + # Then we massage the updated_media's metadata['base_name'] to remove + # the old title. + # Since FileSystem.get_file_name() relies on base_name it will properly + # rename the file by updating the title instead of appending it. + remove_old_title_from_name = False + if title: + # We call get_metadata() to cache it before making any changes + metadata = media.get_metadata() + title_update_status = media.set_title(title) + original_title = metadata['title'] + if title_update_status and original_title: + # @TODO: We should move this to a shared method since + # FileSystem.get_file_name() does it too. + original_title = re.sub(r'\W+', '-', original_title.lower()) + original_base_name = metadata['base_name'] + remove_old_title_from_name = True + updated = True + + if updated: + updated_media = Media.get_class_by_file(current_file, + get_all_subclasses()) + # See comments above on why we have to do this when titles + # get updated. + if remove_old_title_from_name and len(original_title) > 0: + updated_media.get_metadata() + updated_media.set_metadata_basename( + original_base_name.replace('-%s' % original_title, '')) + + dest_path = FILESYSTEM.process_file(current_file, destination, + updated_media, move=True, allowDuplicate=True) + log.info(u'%s -> %s' % (current_file, dest_path)) + log.all('{"source":"%s", "destination":"%s"}' % (current_file, + dest_path)) + # If the folder we moved the file out of or its parent are empty + # we delete it. + FILESYSTEM.delete_directory_if_empty(os.path.dirname(current_file)) + FILESYSTEM.delete_directory_if_empty( + os.path.dirname(os.path.dirname(current_file))) + result.append((current_file, bool(dest_path))) + # Trip has_errors to False if it's already False or dest_path is. + has_errors = has_errors is True or not dest_path + else: + has_errors = False + result.append((current_file, False)) result.write() + + if interrupted: + sys.exit(130) if has_errors: sys.exit(1) diff --git a/elodie/external/pyexiftool.py b/elodie/external/pyexiftool.py index 29a07af1..ac36e3d1 100644 --- a/elodie/external/pyexiftool.py +++ b/elodie/external/pyexiftool.py @@ -64,6 +64,7 @@ import warnings import logging import codecs +import errno from future.utils import with_metaclass @@ -72,6 +73,11 @@ except NameError: basestring = (bytes, str) +try: + BrokenPipeError +except NameError: # pragma: no cover (Python 2 compatibility) + BrokenPipeError = IOError + executable = "exiftool" """The name of the executable to run. @@ -225,6 +231,46 @@ def __init__(self, executable_=None, addedargs=None): self.running = False + def _is_pipe_io_error(self, error): + """Return True when the subprocess pipe is already closed/broken.""" + if isinstance(error, BrokenPipeError): + return True + + err_no = getattr(error, "errno", None) + if err_no in (errno.EPIPE, errno.EINVAL, 109): + return True + + # "I/O operation on closed file" can surface as ValueError. + if isinstance(error, ValueError): + return "closed file" in str(error).lower() + + return False + + def _cleanup_process(self): + """Reset internal process state and close any open pipe handles.""" + process = getattr(self, "_process", None) + if process is not None: + for stream_name in ("stdin", "stdout", "stderr"): + stream = getattr(process, stream_name, None) + if stream is not None and hasattr(stream, "close"): + try: + stream.close() + except Exception: + pass + del self._process + self.running = False + + def _ensure_running(self): + """Ensure exiftool process is alive, restarting when needed.""" + process = getattr(self, "_process", None) + if self.running and process is not None and process.poll() is None: + return True + + self._cleanup_process() + self.start() + process = getattr(self, "_process", None) + return self.running and process is not None and process.poll() is None + def start(self): """Start an ``exiftool`` process in batch mode for this instance. @@ -252,13 +298,27 @@ def terminate(self): If the subprocess isn't running, this method will do nothing. """ - if not self.running: + if not hasattr(self, "_process"): + self.running = False return - self._process.stdin.write(b"-stay_open\nFalse\n") - self._process.stdin.flush() - self._process.communicate() - del self._process - self.running = False + try: + sent_terminate = False + if self._process.poll() is None: + try: + self._process.stdin.write(b"-stay_open\nFalse\n") + self._process.stdin.flush() + sent_terminate = True + except (OSError, ValueError) as e: + if not self._is_pipe_io_error(e): + raise + + if sent_terminate: + self._process.communicate() + elif self._process.poll() is None: + self._process.terminate() + self._process.communicate() + finally: + self._cleanup_process() def __enter__(self): self.start() @@ -268,7 +328,10 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.terminate() def __del__(self): - self.terminate() + try: + self.terminate() + except Exception: + pass def execute(self, *params): """Execute the given batch of parameters with ``exiftool``. @@ -289,15 +352,32 @@ def execute(self, *params): .. note:: This is considered a low-level method, and should rarely be needed by application developers. """ - if not self.running: + if not self._ensure_running(): raise ValueError("ExifTool instance not running.") - self._process.stdin.write(b"\n".join(params + (b"-execute\n",))) - self._process.stdin.flush() - output = b"" - fd = self._process.stdout.fileno() - while not output[-32:].strip().endswith(sentinel): - output += os.read(fd, block_size) - return output.strip()[:-len(sentinel)] + + for attempt in range(2): + try: + self._process.stdin.write(b"\n".join(params + (b"-execute\n",))) + self._process.stdin.flush() + + output = b"" + fd = self._process.stdout.fileno() + while not output[-32:].strip().endswith(sentinel): + chunk = os.read(fd, block_size) + if chunk == b"": + raise OSError(errno.EPIPE, "ExifTool stdout closed unexpectedly") + output += chunk + return output.strip()[:-len(sentinel)] + except (OSError, ValueError) as e: + if not self._is_pipe_io_error(e): + raise + logging.warning("ExifTool pipe error during execute; restarting process.") + self._cleanup_process() + if attempt == 0 and self._ensure_running(): + continue + return b"" + + return b"" def execute_json(self, *params): """Execute the given batch of parameters and parse the JSON output. @@ -327,9 +407,32 @@ def execute_json(self, *params): # http://stackoverflow.com/a/5552623/1318758 # https://github.com/jmathai/elodie/issues/127 try: - return json.loads(self.execute(b"-j", *params).decode("utf-8")) + raw = self.execute(b"-j", *params) + if not raw: + return + return json.loads(raw.decode("utf-8")) except UnicodeDecodeError as e: - return json.loads(self.execute(b"-j", *params).decode("latin-1")) + try: + raw = self.execute(b"-j", *params) + if not raw: + return + return json.loads(raw.decode("latin-1")) + except UnicodeDecodeError as e: + # sys.stderr.write("An exception occurred: ", e) + logging.critical(params) # log exception info at CRITICAL log level + + logging.critical(e, exc_info=True) # log exception info at CRITICAL log level + return + except Exception as e: + # sys.stderr.write("An exception occurred: ", e) + logging.critical(e, exc_info=True) # log exception info at CRITICAL log level + return + except Exception as e: + # Handle the exception + logging.critical(e, exc_info=True) # log exception info at CRITICAL log level + # sys.stderr.write("An exception occurred: ", e) + # raise ValueError("Other Exception happened") + return def get_metadata_batch(self, filenames): """Return all meta-data for the given files. diff --git a/elodie/tests/elodie_test.py b/elodie/tests/elodie_test.py index 69d53973..dc6c145a 100644 --- a/elodie/tests/elodie_test.py +++ b/elodie/tests/elodie_test.py @@ -780,6 +780,82 @@ def test_regenerate_valid_source_with_invalid_files(): assert '3c19a5d751cf19e093b7447297731124d9cc987d3f91a9d1872c3b1c1b15639a' in db.hash_db, db.hash_db assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db +def test_import_stops_cleanly_on_interrupt_request(): + class FakeInterruptHandler(object): + def __init__(self): + self.interrupted = False + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + return False + + fake_interrupt = FakeInterruptHandler() + import_calls = {'count': 0} + + def fake_import_file(*args, **kwargs): + import_calls['count'] += 1 + fake_interrupt.interrupted = True + return '/tmp/imported-file.jpg' + + runner = CliRunner() + with mock.patch.object(elodie, '_GracefulInterruptHandler', return_value=fake_interrupt): + with mock.patch.object(elodie, 'import_file', side_effect=fake_import_file): + with mock.patch.object(elodie, 'load_config', return_value={}): + result = runner.invoke( + elodie._import, + ['--destination', '/tmp/elodie-import', '/tmp/a.jpg', '/tmp/b.jpg'] + ) + + assert result.exit_code == 130, result.output + assert import_calls['count'] == 1, import_calls + +def test_generate_db_stops_cleanly_on_interrupt_request(): + class FakeInterruptHandler(object): + def __init__(self): + self.interrupted = False + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + return False + + class FakeDb(object): + def __init__(self, interrupt_handler): + self.interrupt_handler = interrupt_handler + self.update_calls = 0 + self.hash_db = {} + + def backup_hash_db(self): + return None + + def reset_hash_db(self): + self.hash_db = {} + + def checksum(self, file_path): + return file_path + + def add_hash(self, key, value): + self.hash_db[key] = value + self.interrupt_handler.interrupted = True + + def update_hash_db(self): + self.update_calls += 1 + + fake_interrupt = FakeInterruptHandler() + fake_db = FakeDb(fake_interrupt) + + runner = CliRunner() + with mock.patch.object(elodie, '_GracefulInterruptHandler', return_value=fake_interrupt): + with mock.patch.object(elodie, 'Db', return_value=fake_db): + with mock.patch.object(elodie.FILESYSTEM, 'get_all_files', return_value=['/tmp/a.jpg', '/tmp/b.jpg']): + result = runner.invoke(elodie._generate_db, ['--source', helper.temp_dir()]) + + assert result.exit_code == 130, result.output + assert fake_db.update_calls == 1 + def test_verify_ok(): temporary_folder, folder = helper.create_working_folder() diff --git a/elodie/tests/external_pyexiftool_test.py b/elodie/tests/external_pyexiftool_test.py index a51b5d11..c9701f3e 100644 --- a/elodie/tests/external_pyexiftool_test.py +++ b/elodie/tests/external_pyexiftool_test.py @@ -96,3 +96,163 @@ def test_get_metadata_returns_none_when_execute_json_is_empty(): with patch.object(et, 'execute_json', return_value=[]): result = et.get_metadata("/tmp/test.jpg") assert result is None + +def test_terminate_handles_broken_stdin_pipe(): + class FakeStdin(object): + def write(self, data): + return len(data) + + def flush(self): + raise OSError(22, "Invalid argument") + + class FakeProcess(object): + def __init__(self): + self.stdin = FakeStdin() + self.stdout = None + self.stderr = None + + def poll(self): + return None + + def terminate(self): + return None + + def communicate(self): + return (b"", b"") + + class FakeExifTool(object): + def __init__(self): + self.running = True + self._process = FakeProcess() + + def _is_pipe_io_error(self, error): + return ExifTool._is_pipe_io_error(self, error) + + def _cleanup_process(self): + return ExifTool._cleanup_process(self) + + fake = FakeExifTool() + ExifTool.terminate(fake) + + assert fake.running is False + +def test_execute_returns_empty_when_stdin_pipe_is_invalid(): + class FakeStdin(object): + def write(self, data): + return len(data) + + def flush(self): + raise OSError(22, "Invalid argument") + + class FakeStdout(object): + def fileno(self): + return 0 + + class FakeProcess(object): + def __init__(self): + self.stdin = FakeStdin() + self.stdout = FakeStdout() + self.stderr = None + + def poll(self): + return None + + class FakeExifTool(object): + def __init__(self): + self.running = True + self._process = FakeProcess() + + def _is_pipe_io_error(self, error): + return ExifTool._is_pipe_io_error(self, error) + + def start(self): + self._process = FakeProcess() + self.running = True + + def _cleanup_process(self): + return ExifTool._cleanup_process(self) + + def _ensure_running(self): + return ExifTool._ensure_running(self) + + fake = FakeExifTool() + result = ExifTool.execute(fake, b"-ver") + + assert result == b"" + assert fake.running is False + +def test_execute_restarts_after_pipe_error(): + class FakeStdinBroken(object): + def write(self, data): + return len(data) + + def flush(self): + raise OSError(22, "Invalid argument") + + def close(self): + return None + + class FakeStdinOk(object): + def write(self, data): + return len(data) + + def flush(self): + return None + + def close(self): + return None + + class FakeStdout(object): + def __init__(self, fd_value): + self.fd_value = fd_value + + def fileno(self): + return self.fd_value + + def close(self): + return None + + class FakeProcess(object): + def __init__(self, stdin, stdout): + self.stdin = stdin + self.stdout = stdout + self.stderr = None + + def poll(self): + return None + + class FakeExifTool(object): + def __init__(self): + self.running = False + self._start_calls = 0 + self._processes = [ + FakeProcess(FakeStdinBroken(), FakeStdout(1001)), + FakeProcess(FakeStdinOk(), FakeStdout(1002)), + ] + + def start(self): + self._process = self._processes[self._start_calls] + self._start_calls += 1 + self.running = True + + def _is_pipe_io_error(self, error): + return ExifTool._is_pipe_io_error(self, error) + + def _cleanup_process(self): + return ExifTool._cleanup_process(self) + + def _ensure_running(self): + return ExifTool._ensure_running(self) + + fake = FakeExifTool() + + def fake_read(fd, blocksize): + assert fd == 1002 + return b'ok\n{ready}' + + with patch('elodie.external.pyexiftool.os.read', side_effect=fake_read): + result = ExifTool.execute(fake, b'-ver') + + assert result == b'ok\n' + assert fake._start_calls == 2 + assert fake.running is True