diff --git a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs index 92dd3c2..156fdbf 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/ExpectedNumFilesTests.cs @@ -38,6 +38,7 @@ public static TheoryData ArchiveData { "TestData.a",3 }, { "TestData.bsd.ar",3 }, { "TestData.iso",3 }, + { "TestDataRockRidge.iso",2 }, { "TestData.vhdx",3 }, { "EmptyFile.txt", 1 }, { "TestDataArchivesNested.zip", RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? 54 : 52 }, @@ -79,6 +80,7 @@ public static TheoryData NoRecursionData { "TestData.a", 3 }, { "TestData.bsd.ar", 3 }, { "TestData.iso", 3 }, + { "TestDataRockRidge.iso", 2 }, { "TestData.vhdx", 3 }, { "EmptyFile.txt", 1 }, { "TestDataArchivesNested.zip", 14 }, diff --git a/RecursiveExtractor.Tests/ExtractorTests/FileMetadataTests.cs b/RecursiveExtractor.Tests/ExtractorTests/FileMetadataTests.cs index 1700217..5ffa462 100644 --- a/RecursiveExtractor.Tests/ExtractorTests/FileMetadataTests.cs +++ b/RecursiveExtractor.Tests/ExtractorTests/FileMetadataTests.cs @@ -101,6 +101,8 @@ public void MetadataDefaults_AreNull() Assert.Null(metadata.IsExecutable); Assert.Null(metadata.IsSetUid); Assert.Null(metadata.IsSetGid); + Assert.Null(metadata.FileAttributes); + Assert.Null(metadata.SecurityDescriptorSddl); } [Fact] @@ -140,4 +142,110 @@ public void FileEntry_MetadataDefaultsToNull() var entry = new FileEntry("test.txt", stream); Assert.Null(entry.Metadata); } + + [Fact] + public async Task IsoEntries_MetadataIsNullWithoutRockRidge() + { + // TestData.iso does not have RockRidge extensions, so Unix metadata is not available + var extractor = new Extractor(); + var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.iso"); + var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync(); + + Assert.NotEmpty(results); + foreach (var entry in results) + { + // Without RockRidge extensions, metadata should be null + Assert.Null(entry.Metadata); + } + } + + [Fact] + public void IsoEntries_MetadataIsNullWithoutRockRidge_Sync() + { + var extractor = new Extractor(); + var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.iso"); + var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList(); + + Assert.NotEmpty(results); + foreach (var entry in results) + { + Assert.Null(entry.Metadata); + } + } + + [Fact] + public async Task IsoRockRidgeEntries_HaveMetadata() + { + // TestDataRockRidge.iso has RockRidge extensions with Unix permissions + var extractor = new Extractor(); + var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestDataRockRidge.iso"); + var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync(); + + Assert.NotEmpty(results); + foreach (var entry in results) + { + Assert.NotNull(entry.Metadata); + Assert.NotNull(entry.Metadata!.Mode); + Assert.NotNull(entry.Metadata.Uid); + Assert.NotNull(entry.Metadata.Gid); + } + } + + [Fact] + public void IsoRockRidgeEntries_HaveMetadata_Sync() + { + var extractor = new Extractor(); + var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestDataRockRidge.iso"); + var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList(); + + Assert.NotEmpty(results); + foreach (var entry in results) + { + Assert.NotNull(entry.Metadata); + Assert.NotNull(entry.Metadata!.Mode); + Assert.NotNull(entry.Metadata.Uid); + Assert.NotNull(entry.Metadata.Gid); + } + } + + [Fact] + public async Task VhdxNtfsEntries_HaveWindowsMetadata() + { + // TestData.vhdx contains an NTFS file system that implements IDosFileSystem and IWindowsFileSystem + var extractor = new Extractor(); + var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.vhdx"); + var results = await extractor.ExtractAsync(path, new ExtractorOptions() { Recurse = false }).ToListAsync(); + + Assert.NotEmpty(results); + foreach (var entry in results) + { + Assert.NotNull(entry.Metadata); + // NTFS provides Windows file attributes + Assert.NotNull(entry.Metadata!.FileAttributes); + // NTFS provides security descriptors + Assert.NotNull(entry.Metadata.SecurityDescriptorSddl); + Assert.Contains("D:", entry.Metadata.SecurityDescriptorSddl); // DACL present + // NTFS does not provide Unix metadata + Assert.Null(entry.Metadata.Mode); + Assert.Null(entry.Metadata.Uid); + Assert.Null(entry.Metadata.Gid); + } + } + + [Fact] + public void VhdxNtfsEntries_HaveWindowsMetadata_Sync() + { + var extractor = new Extractor(); + var path = Path.Combine(Directory.GetCurrentDirectory(), "TestData", "TestDataArchives", "TestData.vhdx"); + var results = extractor.Extract(path, new ExtractorOptions() { Recurse = false }).ToList(); + + Assert.NotEmpty(results); + foreach (var entry in results) + { + Assert.NotNull(entry.Metadata); + Assert.NotNull(entry.Metadata!.FileAttributes); + Assert.NotNull(entry.Metadata.SecurityDescriptorSddl); + Assert.Null(entry.Metadata.Mode); + } + } } diff --git a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj index 248021b..e529ded 100644 --- a/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj +++ b/RecursiveExtractor.Tests/RecursiveExtractor.Tests.csproj @@ -165,6 +165,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/RecursiveExtractor.Tests/TestData/TestDataArchives/TestDataRockRidge.iso b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestDataRockRidge.iso new file mode 100644 index 0000000..f7a9291 Binary files /dev/null and b/RecursiveExtractor.Tests/TestData/TestDataArchives/TestDataRockRidge.iso differ diff --git a/RecursiveExtractor/Extractors/DiscCommon.cs b/RecursiveExtractor/Extractors/DiscCommon.cs index d15c8fc..7ffed75 100644 --- a/RecursiveExtractor/Extractors/DiscCommon.cs +++ b/RecursiveExtractor/Extractors/DiscCommon.cs @@ -14,6 +14,102 @@ public static class DiscCommon { private static readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger(); + /// + /// Tries to extract file metadata from a DiscUtils file system entry. + /// For file systems implementing (Ext, Xfs, Btrfs, HfsPlus), + /// returns permissions, UID, and GID. + /// For file systems implementing (NTFS, FAT, WIM), + /// returns Windows file attributes. + /// For file systems implementing (NTFS, WIM), + /// also returns the security descriptor in SDDL format. + /// Returns null for file systems that support none of these interfaces. + /// + /// The opened disc file system + /// Path of the file within the file system + /// Populated or null when not available + internal static FileEntryMetadata? TryGetFileMetadata(DiscFileSystem fs, string filePath) + { + FileEntryMetadata? metadata = null; + + if (fs is IUnixFileSystem unixFs) + { + try + { + var info = unixFs.GetUnixFileInfo(filePath); + metadata = new FileEntryMetadata + { + Mode = (long)info.Permissions, + Uid = info.UserId, + Gid = info.GroupId + }; + } + catch (Exception e) + { + Logger.Debug(e, "Could not retrieve Unix metadata for {0}", filePath); + } + } + + if (fs is IDosFileSystem dosFs) + { + try + { + var winInfo = dosFs.GetFileStandardInformation(filePath); + metadata ??= new FileEntryMetadata(); + metadata.FileAttributes = winInfo.FileAttributes; + } + catch (Exception e) + { + Logger.Debug(e, "Could not retrieve DOS file attributes for {0}", filePath); + } + } + + if (fs is IWindowsFileSystem windowsFs) + { + try + { + var securityDescriptor = windowsFs.GetSecurity(filePath); + if (securityDescriptor != null) + { + metadata ??= new FileEntryMetadata(); + metadata.SecurityDescriptorSddl = securityDescriptor.GetSddlForm( + DiscUtils.Core.WindowsSecurity.AccessControl.AccessControlSections.All); + } + } + catch (Exception e) + { + Logger.Debug(e, "Could not retrieve security descriptor for {0}", filePath); + } + } + + return metadata; + } + + /// + /// Pre-collects metadata for all files while the file system is still open. + /// Used by extractors (e.g., ISO) where the file system is disposed before files are processed. + /// + /// The opened disc file system + /// The file entries to collect metadata for + /// A dictionary mapping file paths to metadata, or null if the file system does not support metadata + internal static Dictionary? CollectMetadata(DiscFileSystem fs, DiscFileInfo[] fileInfos) + { + if (fs is not IUnixFileSystem && fs is not IDosFileSystem && fs is not IWindowsFileSystem) + { + return null; + } + + var result = new Dictionary(); + foreach (var fi in fileInfos) + { + var metadata = TryGetFileMetadata(fs, fi.FullName); + if (metadata != null) + { + result[fi.FullName] = metadata; + } + } + return result; + } + /// /// Dump the FileEntries from a Logical Volume asynchronously /// @@ -59,6 +155,7 @@ public static async IAsyncEnumerable DumpLogicalVolumeAsync(LogicalVo if (fileStream != null && fi != null) { var newFileEntry = await FileEntry.FromStreamAsync($"{volume.Identity}{Path.DirectorySeparatorChar}{fi.FullName}", fileStream, parent, fi.CreationTime, fi.LastWriteTime, fi.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + newFileEntry.Metadata = TryGetFileMetadata(fs, file); if (options.Recurse || topLevel) { await foreach (var entry in Context.ExtractAsync(newFileEntry, options, governor, false)) @@ -124,6 +221,7 @@ public static IEnumerable DumpLogicalVolume(LogicalVolumeInfo volume, if (fileStream != null) { var newFileEntry = new FileEntry($"{volume.Identity}{Path.DirectorySeparatorChar}{file}", fileStream, parent, false, creation, modification, access, memoryStreamCutoff: options.MemoryStreamCutoff); + newFileEntry.Metadata = TryGetFileMetadata(fs, file); if (options.Recurse || topLevel) { foreach (var extractedFile in Context.Extract(newFileEntry, options, governor, false)) diff --git a/RecursiveExtractor/Extractors/IsoExtractor.cs b/RecursiveExtractor/Extractors/IsoExtractor.cs index 3756b31..da94c09 100644 --- a/RecursiveExtractor/Extractors/IsoExtractor.cs +++ b/RecursiveExtractor/Extractors/IsoExtractor.cs @@ -31,11 +31,13 @@ public IsoExtractor(Extractor context) public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) { DiscUtils.DiscFileInfo[]? entries = null; + Dictionary? metadataByPath = null; var failed = false; try { using var cd = new CDReader(fileEntry.Content, true); entries = cd.Root.GetFiles("*.*", SearchOption.AllDirectories).ToArray(); + metadataByPath = DiscCommon.CollectMetadata(cd, entries); } catch (Exception e) { @@ -69,6 +71,10 @@ public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, Extra { var name = fileInfo.FullName.Replace('/', Path.DirectorySeparatorChar); var newFileEntry = await FileEntry.FromStreamAsync(name, stream, fileEntry, fileInfo.CreationTime, fileInfo.LastWriteTime, fileInfo.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + if (metadataByPath != null && metadataByPath.TryGetValue(fileInfo.FullName, out var entryMetadata)) + { + newFileEntry.Metadata = entryMetadata; + } if (options.Recurse || topLevel) { await foreach (var entry in Context.ExtractAsync(newFileEntry, options, governor, false)) @@ -92,11 +98,13 @@ public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, Extra public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) { DiscUtils.DiscFileInfo[]? entries = null; + Dictionary? metadataByPath = null; var failed = false; try { using var cd = new CDReader(fileEntry.Content, true); entries = cd.Root.GetFiles("*.*", SearchOption.AllDirectories).ToArray(); + metadataByPath = DiscCommon.CollectMetadata(cd, entries); } catch(Exception e) { @@ -130,6 +138,10 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti { var name = fileInfo.FullName.Replace('/', Path.DirectorySeparatorChar); var newFileEntry = new FileEntry(name, stream, fileEntry, createTime: file.CreationTime, modifyTime: file.LastWriteTime, accessTime: file.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff); + if (metadataByPath != null && metadataByPath.TryGetValue(fileInfo.FullName, out var entryMetadata)) + { + newFileEntry.Metadata = entryMetadata; + } if (options.Recurse || topLevel) { foreach (var entry in Context.Extract(newFileEntry, options, governor, false)) diff --git a/RecursiveExtractor/Extractors/UdfExtractor.cs b/RecursiveExtractor/Extractors/UdfExtractor.cs index efc016c..27d1bb5 100644 --- a/RecursiveExtractor/Extractors/UdfExtractor.cs +++ b/RecursiveExtractor/Extractors/UdfExtractor.cs @@ -31,11 +31,13 @@ public UdfExtractor(Extractor context) public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) { DiscUtils.DiscFileInfo[]? entries = null; + Dictionary? metadataByPath = null; var failed = false; try { using var cd = new UdfReader(fileEntry.Content); entries = cd.Root.GetFiles("*.*", SearchOption.AllDirectories).ToArray(); + metadataByPath = DiscCommon.CollectMetadata(cd, entries); } catch (Exception e) { @@ -69,6 +71,10 @@ public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, Extra { var name = fileInfo.FullName.Replace('/', Path.DirectorySeparatorChar); var newFileEntry = await FileEntry.FromStreamAsync(name, stream, fileEntry, fileInfo.CreationTime, fileInfo.LastWriteTime, fileInfo.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + if (metadataByPath != null && metadataByPath.TryGetValue(fileInfo.FullName, out var entryMetadata)) + { + newFileEntry.Metadata = entryMetadata; + } if (options.Recurse || topLevel) { await foreach (var entry in Context.ExtractAsync(newFileEntry, options, governor, false)) @@ -92,11 +98,13 @@ public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, Extra public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor, bool topLevel = true) { DiscUtils.DiscFileInfo[]? entries = null; + Dictionary? metadataByPath = null; var failed = false; try { using var cd = new UdfReader(fileEntry.Content); entries = cd.Root.GetFiles("*.*", SearchOption.AllDirectories).ToArray(); + metadataByPath = DiscCommon.CollectMetadata(cd, entries); } catch(Exception e) { @@ -130,6 +138,10 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti { var name = fileInfo.FullName.Replace('/', Path.DirectorySeparatorChar); var newFileEntry = new FileEntry(name, stream, fileEntry, createTime: file.CreationTime, modifyTime: file.LastWriteTime, accessTime: file.LastAccessTime, memoryStreamCutoff: options.MemoryStreamCutoff); + if (metadataByPath != null && metadataByPath.TryGetValue(fileInfo.FullName, out var entryMetadata)) + { + newFileEntry.Metadata = entryMetadata; + } if (options.Recurse || topLevel) { foreach (var entry in Context.Extract(newFileEntry, options, governor, false)) diff --git a/RecursiveExtractor/Extractors/WimExtractor.cs b/RecursiveExtractor/Extractors/WimExtractor.cs index fe7fd3d..4cf9d51 100644 --- a/RecursiveExtractor/Extractors/WimExtractor.cs +++ b/RecursiveExtractor/Extractors/WimExtractor.cs @@ -58,6 +58,7 @@ public async IAsyncEnumerable ExtractAsync(FileEntry fileEntry, Extra { var name = file.Replace('\\', Path.DirectorySeparatorChar); var newFileEntry = await FileEntry.FromStreamAsync($"{image.FriendlyName}{Path.DirectorySeparatorChar}{name}", stream, fileEntry, memoryStreamCutoff: options.MemoryStreamCutoff).ConfigureAwait(false); + newFileEntry.Metadata = DiscCommon.TryGetFileMetadata(image, file); if (options.Recurse || topLevel) { @@ -128,6 +129,7 @@ public IEnumerable Extract(FileEntry fileEntry, ExtractorOptions opti var name = file.Replace('\\', Path.DirectorySeparatorChar); var newFileEntry = new FileEntry($"{image.FriendlyName}{Path.DirectorySeparatorChar}{name}", stream, fileEntry, memoryStreamCutoff: options.MemoryStreamCutoff); + newFileEntry.Metadata = DiscCommon.TryGetFileMetadata(image, file); if (options.Recurse || topLevel) { foreach (var extractedFile in Context.Extract(newFileEntry, options, governor, false)) diff --git a/RecursiveExtractor/FileEntryMetadata.cs b/RecursiveExtractor/FileEntryMetadata.cs index 4254c44..f7ae560 100644 --- a/RecursiveExtractor/FileEntryMetadata.cs +++ b/RecursiveExtractor/FileEntryMetadata.cs @@ -1,5 +1,7 @@ // Copyright (c) Microsoft Corporation. Licensed under the MIT License. +using System.IO; + namespace Microsoft.CST.RecursiveExtractor { /// @@ -43,5 +45,19 @@ public class FileEntryMetadata /// Null if not available from the archive format. /// public long? Gid { get; set; } + + /// + /// The Windows file attributes (e.g., ReadOnly, Hidden, System, Archive). + /// Available for NTFS, FAT, and WIM file systems. + /// Null if not available from the archive format. + /// + public FileAttributes? FileAttributes { get; set; } + + /// + /// The NTFS security descriptor in SDDL (Security Descriptor Definition Language) format. + /// Available for NTFS and WIM file systems that implement IWindowsFileSystem. + /// Null if not available from the archive format. + /// + public string? SecurityDescriptorSddl { get; set; } } } diff --git a/nuget.config b/nuget.config index 227ad0c..ba47b6a 100644 --- a/nuget.config +++ b/nuget.config @@ -1,7 +1,7 @@ - + - \ No newline at end of file +