diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index aa137764f..a1c241b4b 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -11,6 +11,7 @@ import hashlib import json import logging +import re import os import shutil import fnmatch @@ -747,6 +748,46 @@ def get_preppers(self): for prepper in sorted(preps, key=lambda x: x.priority): yield prepper(options=self.opts) + def _prep_load_auditlogs(self): + """ + # Pre-load all audit logs from archives to all applicable preppers. + """ + self.log_debug("Pre-loading audit logs from all archives") + parsers_dict = {p.map_file_key.split('_')[0]: p for p in self.parsers} + parser_audits_map = [] + for prepper in self.get_preppers(): + if prepper.audit_logs_re and prepper.name in parsers_dict.keys(): + parser_audits_map.append(( + prepper.audit_logs_re, + prepper, + parsers_dict[prepper.name] + )) + for archive in self.report_paths: + # archives are not yet extracted so we cant easily iterate over + # globbed files. So let assume logrotated files follow just the + # most typical scenario: audit.log -> audit.log.1 -> audit.log.2 + # -> .. . And check just those files till they exist. + _file = 'var/log/audit/audit.log' + n = 0 + while True: + content = archive.get_file_content(_file) + if not content: + break + for line in content.splitlines(): + try: + for reg, prepper, parser in parser_audits_map: + matches = re.findall(reg, line) + if matches: + for item in matches: + if item not in prepper.skip_list: + parser.mapping.add(item) + except Exception as err: + self.log_debug( + f"Failed to prep content from {_file}: {err}" + ) + n += 1 + _file = f'var/log/audit/audit.log.{n}' + def preload_all_archives_into_maps(self): """Before doing the actual obfuscation, if we have multiple archives to obfuscate then we need to preload each of them into the mappings @@ -757,6 +798,7 @@ def preload_all_archives_into_maps(self): for prepper in self.get_preppers(): for archive in self.report_paths: self._prepare_archive_with_prepper(archive, prepper) + self._prep_load_auditlogs() self.main_archive.set_parsers(self.parsers) def obfuscate_report(self, archive): # pylint: disable=too-many-branches diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py index 790c9e152..89f5560e9 100644 --- a/sos/cleaner/preppers/__init__.py +++ b/sos/cleaner/preppers/__init__.py @@ -46,6 +46,8 @@ class SoSPrepper(): name = 'Undefined' priority = 100 + skip_list = [] + audit_logs_re = None def __init__(self, options): self.regex_items = { diff --git a/sos/cleaner/preppers/hostname.py b/sos/cleaner/preppers/hostname.py index 2843c03cf..c0c6f85c9 100644 --- a/sos/cleaner/preppers/hostname.py +++ b/sos/cleaner/preppers/hostname.py @@ -24,6 +24,7 @@ class HostnamePrepper(SoSPrepper): """ name = 'hostname' + audit_logs_re = r'\shostname=(\S+)' def _get_items_for_hostname(self, archive): items = [] diff --git a/sos/cleaner/preppers/usernames.py b/sos/cleaner/preppers/usernames.py index 688ab06f6..8f8bb8c40 100644 --- a/sos/cleaner/preppers/usernames.py +++ b/sos/cleaner/preppers/usernames.py @@ -35,6 +35,8 @@ class UsernamePrepper(SoSPrepper): 'wtmp', ] + audit_logs_re = r'(?:UID|AUID)=(?:")?(\w+)(?:")?' + def _get_items_for_username(self, archive): items = set() _files = [