diff --git a/base/manager.go b/base/manager.go index 4b999b5..b3c7cd3 100644 --- a/base/manager.go +++ b/base/manager.go @@ -2,16 +2,17 @@ package base import ( "context" - "github.com/viant/afs/file" - "github.com/viant/afs/object" - "github.com/viant/afs/option" - "github.com/viant/afs/storage" - "github.com/viant/afs/url" "io" "os" "path" "reflect" "sync" + + "github.com/viant/afs/file" + "github.com/viant/afs/object" + "github.com/viant/afs/option" + "github.com/viant/afs/storage" + "github.com/viant/afs/url" ) // Manager represents Storager base manager @@ -67,8 +68,10 @@ func (m *Manager) List(ctx context.Context, URL string, options ...storage.Optio _, name := path.Split(URLPath) if files[0].Name() == "" { + // To verify: this seems to be a special handler for embedded URLs files[0] = file.NewInfo(name, files[0].Size(), files[0].Mode(), files[0].ModTime(), files[0].IsDir()) } + fileURL := "" for i := 0; i < len(files); i++ { if i == 0 && files[i].Name() == name { diff --git a/list_test.go b/list_test.go index 1424e41..cb072f3 100644 --- a/list_test.go +++ b/list_test.go @@ -3,16 +3,17 @@ package afs import ( "context" "fmt" + "os" + "path" + "testing" + "github.com/stretchr/testify/assert" "github.com/viant/afs/asset" "github.com/viant/afs/file" "github.com/viant/afs/option" - "os" - "path" - "testing" ) -func TestService_List(t *testing.T) { +func TestServiceList(t *testing.T) { baseDir := os.TempDir() ctx := context.Background() @@ -24,8 +25,8 @@ func TestService_List(t *testing.T) { assets []*asset.Resource }{ { - description: "multi resource walk", - baseLocation: "file://localhost/" + path.Join(baseDir, "service_walk"), + description: "l2 walk", + baseLocation: "file://localhost" + path.Join(baseDir, "service_walk_l2"), assets: []*asset.Resource{ asset.NewFile("foo1.txt", []byte("abc"), 0644), asset.NewDir("s1", file.DefaultDirOsMode), @@ -37,8 +38,8 @@ func TestService_List(t *testing.T) { }, }, { - description: "multi resource walk", - baseLocation: "file://localhost/" + path.Join(baseDir, "service_walk"), + description: "l3 walk", + baseLocation: "file://localhost" + path.Join(baseDir, "service_walk_l3"), assets: []*asset.Resource{ asset.NewFile("foo1.txt", []byte("abc"), 0644), asset.NewDir("r/s1/s1", file.DefaultDirOsMode), @@ -59,20 +60,93 @@ func TestService_List(t *testing.T) { err := asset.Create(fileManager, useCase.baseLocation, useCase.assets) assert.Nil(t, err, useCase.description) - actuals := map[string]bool{} + removePrefixLen := len(useCase.baseLocation) + + // Non-recursive List + { + actuals := map[string]bool{} + objects, err := service.List(ctx, useCase.baseLocation) + assert.Nil(t, err) + for _, object := range objects { + URL := object.URL() + + alsoRemoveSlash := 0 + if len(URL) > removePrefixLen { + alsoRemoveSlash = 1 + } + + renamed := string(URL[removePrefixLen+alsoRemoveSlash:]) + + actuals[renamed] = true + } - objects, err := service.List(ctx, useCase.baseLocation, option.NewRecursive(true)) - assert.Nil(t, err) - for _, object := range objects { - URL := object.URL() - relative := string(URL[len(useCase.baseLocation):]) - actuals[relative] = true + assert.Nil(t, err, useCase.description) + for _, asset := range useCase.assets { + dir := path.Dir(asset.Name) + if dir != "." { + continue + } + + _, ok := actuals[asset.Name] + assert.True(t, ok, fmt.Sprintf(useCase.description+" missing %v, %v", asset.Name, actuals)) + } + + assetMap := map[string]bool{} + for _, asset := range useCase.assets { + assetMap[asset.Name] = true + + // include all subdirs as visible items + dir := path.Dir(asset.Name) + for dir != "." && assetMap[dir] != true { + assetMap[dir] = true + dir = path.Dir(dir) + } + } + + // non-recursive list includes root named "" + assetMap[""] = true + + for filename := range actuals { + _, ok := assetMap[filename] + assert.True(t, ok, fmt.Sprintf(useCase.description+" unexpected %v, %v", filename, actuals)) + } } - assert.Nil(t, err, useCase.description) - for _, asset := range useCase.assets { - _, ok := actuals[asset.Name] - assert.True(t, ok, fmt.Sprintf(useCase.description+" %v, %v", asset.Name, actuals)) + // Recursive List + { + actuals := map[string]bool{} + objects, err := service.List(ctx, useCase.baseLocation, option.NewRecursive(true)) + assert.Nil(t, err) + for _, object := range objects { + URL := object.URL() + relative := string(URL[removePrefixLen+1:]) + actuals[relative] = true + } + + assert.Nil(t, err, useCase.description) + for _, asset := range useCase.assets { + _, ok := actuals[asset.Name] + assert.True(t, ok, fmt.Sprintf(useCase.description+" missing %v, %v", asset.Name, actuals)) + } + + assetMap := map[string]bool{} + for _, asset := range useCase.assets { + assetMap[asset.Name] = true + + // include all subdirs as visible items + dir := path.Dir(asset.Name) + for dir != "." && assetMap[dir] != true { + assetMap[dir] = true + dir = path.Dir(dir) + } + } + + // recursive list does not include root named "" + + for filename := range actuals { + _, ok := assetMap[filename] + assert.True(t, ok, fmt.Sprintf(useCase.description+" unexpected %v, %v", filename, actuals)) + } } } diff --git a/zip/manager_test.go b/zip/manager_test.go index ee99ea2..48b50a8 100644 --- a/zip/manager_test.go +++ b/zip/manager_test.go @@ -1,25 +1,33 @@ package zip_test import ( + "archive/zip" "context" "fmt" + "os" + "path" + "runtime" + "testing" + "github.com/stretchr/testify/assert" "github.com/viant/afs" "github.com/viant/afs/option" "github.com/viant/afs/storage" - "path" - "runtime" - "testing" ) type useCaseFn func(s afs.Service, ctx context.Context, url string) ([]storage.Object, error) func TestNew(t *testing.T) { testCases(t, func(service afs.Service, ctx context.Context, url string) ([]storage.Object, error) { - return service.List(ctx, url) + return service.List(ctx, url) }) } +func TestNoCache(t *testing.T) { + testCases(t, func(service afs.Service, ctx context.Context, url string) ([]storage.Object, error) { + return service.List(ctx, url, &option.NoCache{Source: option.NoCacheBaseURL}) + }) +} func testCases(t *testing.T, callList useCaseFn) { _, filename, _, _ := runtime.Caller(0) @@ -30,20 +38,38 @@ func testCases(t *testing.T, callList useCaseFn) { description string URL string expect map[string]bool + isDir bool }{ { - description: "list war classes", - URL: fmt.Sprintf("file:%v/test/app.war/zip://localhost/WEB-INF/classes", baseDir), + description: "list war web-inf directory", + URL: fmt.Sprintf("file:%vtest/app.war/zip://localhost/WEB-INF", baseDir), + expect: map[string]bool{ + // this is the listed directory + "WEB-INF": true, + + "web.xml": true, + "HelloWorld.class": true, + "config.properties": true, + }, + isDir: true, + }, + + { + description: "list war classes directory", + URL: fmt.Sprintf("file:%vtest/app.war/zip://localhost/WEB-INF/classes", baseDir), expect: map[string]bool{ - "classes": true, + // this is the listed directory + "classes": true, + "HelloWorld.class": true, "config.properties": true, }, + isDir: true, }, { - description: "list war classes", - URL: fmt.Sprintf("file:%v/test/app.war/zip://localhost/WEB-INF/classes/config.properties", baseDir), + description: "list war file only", + URL: fmt.Sprintf("file:%vtest/app.war/zip://localhost/WEB-INF/classes/config.properties", baseDir), expect: map[string]bool{ "config.properties": true, }, @@ -54,16 +80,158 @@ func testCases(t *testing.T, callList useCaseFn) { service := afs.New() objects, err := callList(service, ctx, useCase.URL) assert.Nil(t, err, useCase.description) - assert.EqualValues(t, len(useCase.expect), len(objects)) + + cleanObjects := make([]storage.Object, 0) for _, obj := range objects { - assert.True(t, useCase.expect[obj.Name()], useCase.description+" "+obj.URL()) + if obj.Name() == "" { + continue + } + cleanObjects = append(cleanObjects, obj) + } + + assert.EqualValues(t, len(useCase.expect), len(cleanObjects)) + for _, obj := range cleanObjects { + assert.True(t, useCase.expect[obj.Name()], fmt.Sprintf("%s missing [%s] [%s]", useCase.description, obj.Name(), obj.URL())) } } } -func TestNoCache(t *testing.T) { - testCases(t, func(service afs.Service, ctx context.Context, url string) ([]storage.Object, error) { - return service.List(ctx, url, &option.NoCache{Source: option.NoCacheBaseURL}) - }) +func TestCopyFromZip(t *testing.T) { + ctx := context.Background() + + service := afs.New() + + // Create a temporary file for the zip + tempFile, err := os.CreateTemp("", "test_nosubdir_*.zip") + assert.Nil(t, err) + + defer tempFile.Close() + defer os.Remove(tempFile.Name()) + + // Create a new zip writer + zipWriter := zip.NewWriter(tempFile) + + // Add files to the zip + files := []struct { + path string + content string + }{ + {"a.txt", "a"}, + {"b.txt", "b"}, + } + + // Write each file to the zip + for _, file := range files { + w, err := zipWriter.Create(file.path) + assert.Nil(t, err) + _, err = w.Write([]byte(file.content)) + assert.Nil(t, err) + } + + // Close the zip writer to flush the contents + err = zipWriter.Close() + assert.Nil(t, err) + + srcURL := fmt.Sprintf("file:%s/zip://localhost", tempFile.Name()) + + // Verify zip contents + zipObjects, err := service.List(ctx, srcURL) + assert.Nil(t, err) + // 3 objects - root dir, 2 files + assert.EqualValues(t, 3, len(zipObjects), "expected 3 objects in zip, got %+V", zipObjects) + + // Manually create files like a ZIP might but without using ZIP + manualURL := path.Join(os.TempDir(), "nosubdir_manual/") + os.MkdirAll(manualURL, 0755) + for _, file := range files { + os.WriteFile(path.Join(manualURL, file.path), []byte(file.content), 0644) + } + + // Create a temporary destination directory + destURL := path.Join(os.TempDir(), "nosubdir_test/") + defer os.RemoveAll(destURL) + + // Copy from zip to filesystem + err = service.Copy(ctx, srcURL, destURL) + assert.Nil(t, err) + + // Verify destination contents + objects, err := service.List(ctx, destURL) + assert.Nil(t, err) + // 3 objects - root dir, 2 files + assert.EqualValues(t, 3, len(objects), "expected 3 objects in dest, got %+V", objects) +} + +func TestCreateZipWithoutDirEntries(t *testing.T) { + ctx := context.Background() + service := afs.New() + + // Test creating a zip with depth 2 directory structure without explicit directory entries + // Create a temporary file for the zip + tempFile, err := os.CreateTemp("", "test_no_dir_entries_*.zip") + assert.Nil(t, err) + defer tempFile.Close() + defer os.Remove(tempFile.Name()) + + // Create a new zip writer + zipWriter := zip.NewWriter(tempFile) + defer zipWriter.Close() + + // Add files with directory paths but no explicit directory entries + files := []struct { + path string + content string + }{ + {"dir1/file1.txt", "content of file1"}, + {"dir1/file2.txt", "content of file2"}, + {"dir1/subdir/file3.txt", "content of file3"}, + {"dir1/subdir/file4.txt", "content of file4"}, + {"dir2/file5.txt", "content of file5"}, + } + + // Write each file to the zip + for _, file := range files { + w, err := zipWriter.Create(file.path) + assert.Nil(t, err) + _, err = w.Write([]byte(file.content)) + assert.Nil(t, err) + } + + // Close the zip writer to flush the contents + err = zipWriter.Close() + assert.Nil(t, err) + + // Verify the zip contents + zipURL := fmt.Sprintf("file:%s/zip://localhost", tempFile.Name()) + // this test unfortunately is invalid since we're changing how List works... + objects, err := service.List(ctx, zipURL) + assert.Nil(t, err) + + // We should see both directories and files, even though we didn't explicitly create directory entries + expectedPaths := map[string]bool{ + "file1.txt": true, + "file2.txt": true, + "file3.txt": true, + "file4.txt": true, + "file5.txt": true, + } + + foundPaths := map[string]bool{} + + // Check that we can find all expected paths + for _, obj := range objects { + name := obj.Name() + if name == "" { + // directories don't get names in List() + continue + } + + assert.True(t, expectedPaths[name], "Found unexpected object: %v, url: %v", name, obj.URL()) + assert.False(t, foundPaths[name], "Found repeat object: %v, url: %v", name, obj.URL()) + foundPaths[name] = true + } + + // Check that we found all expected paths + assert.Equal(t, len(expectedPaths), len(foundPaths), "Some expected paths were not found: %v ", foundPaths) } diff --git a/zip/storager_test.go b/zip/storager_test.go index 1068a6d..ae0b7f8 100644 --- a/zip/storager_test.go +++ b/zip/storager_test.go @@ -3,15 +3,17 @@ package zip import ( "bytes" "context" - "github.com/stretchr/testify/assert" - "github.com/viant/afs/asset" - "github.com/viant/afs/mem" - "github.com/viant/afs/storage" + "fmt" "io" "io/ioutil" "os" "path" "testing" + + "github.com/stretchr/testify/assert" + "github.com/viant/afs/asset" + "github.com/viant/afs/mem" + "github.com/viant/afs/storage" ) func TestNewStorager(t *testing.T) { @@ -74,7 +76,8 @@ func TestNewStorager(t *testing.T) { assert.Nil(t, err, useCase.description) objects, err = storager.List(ctx, parent) assert.Nil(t, err, useCase.description) - assert.EqualValues(t, 2, len(objects), useCase.description) + + assert.EqualValues(t, 2, len(objects), fmt.Sprintf("%s: expected objects", useCase.description)) ok, _ = storager.Exists(ctx, useCase.resource.Name) assert.True(t, ok, useCase.description) diff --git a/zip/walker.go b/zip/walker.go index 5cae429..8ed60fd 100644 --- a/zip/walker.go +++ b/zip/walker.go @@ -4,13 +4,15 @@ import ( "archive/zip" "bytes" "context" + "io" + "io/ioutil" + "os" + "path" + "github.com/viant/afs/file" "github.com/viant/afs/option" "github.com/viant/afs/storage" "github.com/viant/afs/url" - "io" - "io/ioutil" - "path" ) type walker struct { @@ -42,8 +44,13 @@ func (w *walker) open(ctx context.Context, URL string, options ...storage.Option return bytes.NewReader(w.data), len(w.data), nil } +// Walk walks the zip file and calls the handler for each file and directory. +// This is inconsistent with the expected behavior of other List() operations. +// List() for a file system requires the recursive flag, and in the recursive case, does not include the root directory. +// This function will always be recursive, and includes the root directory. func (w *walker) Walk(ctx context.Context, URL string, handler storage.OnVisit, options ...storage.Option) error { URL = url.Normalize(URL, file.Scheme) + readerAt, size, err := w.open(ctx, URL, options...) if err != nil { return err @@ -55,21 +62,78 @@ func (w *walker) Walk(ctx context.Context, URL string, handler storage.OnVisit, if err != nil { return err } - //cache is only used if sym link are used + + processedDirs := make(map[string]bool) + + // cache is only used if sym link are used for _, fileHandler := range reader.File { parentPath, name := path.Split(fileHandler.Name) + if parentPath != "" { + // Split() returns a trailing slash + parentPath = parentPath[:len(parentPath)-1] + } + fileInfo := fileHandler.FileInfo() - info := file.NewInfo(name, fileInfo.Size(), fileInfo.Mode(), fileInfo.ModTime(), fileInfo.IsDir()) + fileModTime := fileInfo.ModTime() + + dirPath := parentPath + + // For files, process parent directories first (implicit directories) + // Note that the root directory has dirPath of "" (instead of "." or "/"). + // This is because of the for condition and behavior of path.Dir() + for !fileHandler.Mode().IsDir() && dirPath != "." && dirPath != "/" { + if !processedDirs[dirPath] { + // markPath is the path to be used to mark the directory as processed + var markPath string + + var parentDir string + + // special case for root, we need to establish a fake initial dir + if dirPath == "" { + markPath = "" + parentDir = "" + } else { + markPath = dirPath + parentDir = path.Dir(dirPath) + } + + // Create virtual directory entry. + // There is a use case of List() where there is a prefix filter, which breaks if the directory FileInfo has a name. + // However, this results in a bunch of pretty much identical directory FileInfos. + // For Copy(), this will cause a bunch of repeat directories to be created. + dirInfo := file.NewInfo("", 0, os.ModeDir|0755, fileModTime, true) + + // Visit the directory + shallContinue, err := handler(ctx, URL, parentDir, dirInfo, nil) + if err != nil || !shallContinue { + return err + } + + processedDirs[markPath] = true + } + + dirPath = path.Dir(dirPath) + } + + // note that in the case of a directory, the name is always "" due to path.Split() + info := file.NewInfo(name, + fileInfo.Size(), fileInfo.Mode(), fileModTime, fileInfo.IsDir()) + var reader io.ReadCloser if !fileHandler.Mode().IsDir() { if reader, err = fileHandler.Open(); err != nil { return err } + } else { + // handle case if the entry is a directory + processedDirs[fileHandler.Name] = true } + shallContinue, err := handler(ctx, URL, parentPath, info, reader) if reader != nil { err = reader.Close() } + if err != nil || !shallContinue { return err } @@ -77,12 +141,12 @@ func (w *walker) Walk(ctx context.Context, URL string, handler storage.OnVisit, return nil } -//NewWalker returns a walker +// NewWalker returns a walker func newWalker(download storage.Opener) *walker { return &walker{Opener: download} } -//NewWalker returns a walker +// NewWalker returns a walker func NewWalker(downloader storage.Opener) storage.Walker { return newWalker(downloader) }