isolate: (refactor) extract blacklisting into common package

BUG=692940

Review-Url: https://codereview.chromium.org/2981243002
diff --git a/client/archiver/directory.go b/client/archiver/directory.go
index 6a9c367..ecb38d0 100644
--- a/client/archiver/directory.go
+++ b/client/archiver/directory.go
@@ -22,6 +22,7 @@
 	"os"
 	"path/filepath"
 
+	"github.com/luci/luci-go/client/internal/common"
 	"github.com/luci/luci-go/common/isolated"
 	"github.com/luci/luci-go/common/isolatedclient"
 	"github.com/luci/luci-go/common/runtime/tracer"
@@ -31,15 +32,11 @@
 	fullPath string
 	relPath  string
 	info     os.FileInfo
-	err      error
 }
 
 // walk() enumerates a directory tree synchronously and sends the items to
 // channel c.
-//
-// blacklist is a list of globs of files to ignore. Each blacklist glob is
-// relative to root.
-func walk(root string, blacklist []string, c chan<- *walkItem) {
+func walk(root string, fsView common.FilesystemView, c chan<- *walkItem) {
 	// TODO(maruel): Walk() sorts the file names list, which is not needed here
 	// and slows things down. Options:
 	// #1 Use os.File.Readdir() directly. It's in the stdlib and works fine, but
@@ -63,56 +60,43 @@
 	total := 0
 	end := tracer.Span(root, "walk:"+filepath.Base(root), nil)
 	defer func() { end(tracer.Args{"root": root, "total": total}) }()
-	// Check patterns upfront, so it has consistent behavior w.r.t. bad glob
-	// patterns.
-	for _, b := range blacklist {
-		if _, err := filepath.Match(b, b); err != nil {
-			c <- &walkItem{err: fmt.Errorf("bad blacklist pattern \"%s\"", b)}
-			return
-		}
-	}
-	err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
+
+	err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
 		total++
+
 		if err != nil {
-			return fmt.Errorf("walk(%q): %v", p, err)
+			return fmt.Errorf("walk(%q): %v", path, err)
 		}
 
-		relPath, err := filepath.Rel(root, p)
+		relPath, err := fsView.RelativePath(path)
 		if err != nil {
-			return fmt.Errorf("walk: calculating relative path(%q): %v", p, err)
+			return fmt.Errorf("walk(%q): %v", path, err)
 		}
 
-		// filepath.Rel is documented to call filepath.Clean on its result before returning it,
-		// which results in "." for an empty relative path.
-		if relPath == "." {
-			return nil // Root directory.
+		if relPath == "" { // empty string indicates skip.
+			return returnSkip(info)
 		}
 
-		for _, b := range blacklist {
-			matched, _ := filepath.Match(b, relPath)
-			if !matched {
-				// Also check at the base file name.
-				matched, _ = filepath.Match(b, filepath.Base(relPath))
-			}
-			if matched {
-				// Must not return io.SkipDir for file, filepath.walk() handles this
-				// badly.
-				if info.IsDir() {
-					return filepath.SkipDir
-				}
-				return nil
-			}
+		if !info.IsDir() {
+			c <- &walkItem{fullPath: path, relPath: relPath, info: info}
 		}
-		if info.IsDir() {
-			return nil
-		}
-		c <- &walkItem{fullPath: p, relPath: relPath, info: info}
 		return nil
 	})
+
 	if err != nil {
 		// No point continuing if an error occurred during walk.
 		log.Fatalf("Unable to walk %q: %v", root, err)
 	}
+
+}
+
+// returnSkip returns the return value expected from a filepath.WalkFunc in the case where no more processing of file should occur.
+func returnSkip(file os.FileInfo) error {
+	if file.IsDir() {
+		// Must not return io.SkipDir for file, filepath.walk() handles this badly.
+		return filepath.SkipDir
+	}
+	return nil
 }
 
 // PushDirectory walks a directory at root and creates a .isolated file.
@@ -132,28 +116,31 @@
 	end := tracer.Span(a, "PushDirectory", tracer.Args{"path": relDir, "root": root})
 	defer func() { end(tracer.Args{"total": total}) }()
 	c := make(chan *walkItem)
+
+	displayName := filepath.Base(root) + ".isolated"
+	s := &Item{DisplayName: displayName}
+	fsView, err := common.NewFilesystemView(root, blacklist)
+	if err != nil {
+		s.SetErr(err)
+		return s
+	}
+
 	go func() {
-		walk(root, blacklist, c)
+		walk(root, fsView, c)
 		close(c)
 	}()
 
-	displayName := filepath.Base(root) + ".isolated"
 	i := isolated.Isolated{
 		Algo:    "sha-1",
 		Files:   map[string]isolated.File{},
 		Version: isolated.IsolatedFormatVersion,
 	}
 	items := []*Item{}
-	s := &Item{DisplayName: displayName}
 	for item := range c {
 		if s.Error() != nil {
 			// Empty the queue.
 			continue
 		}
-		if item.err != nil {
-			s.SetErr(item.err)
-			continue
-		}
 		total++
 		if relDir != "" {
 			item.relPath = filepath.Join(relDir, item.relPath)
diff --git a/client/archiver/directory_test.go b/client/archiver/directory_test.go
index b509dfa..f810df7 100644
--- a/client/archiver/directory_test.go
+++ b/client/archiver/directory_test.go
@@ -16,12 +16,10 @@
 
 import (
 	"encoding/json"
-	"errors"
 	"io/ioutil"
 	"net/http/httptest"
 	"os"
 	"path/filepath"
-	"sync"
 	"testing"
 
 	"golang.org/x/net/context"
@@ -35,25 +33,6 @@
 	. "github.com/smartystreets/goconvey/convey"
 )
 
-func TestWalkBadRegexp(t *testing.T) {
-	Convey(`A bad regexp should fail when walking a directory.`, t, func() {
-		ch := make(chan *walkItem)
-		var wg sync.WaitGroup
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			defer close(ch)
-			walk("inexistent", []string{"a["}, ch)
-		}()
-		item := <-ch
-		So(item, ShouldResemble, &walkItem{err: errors.New("bad blacklist pattern \"a[\"")})
-		item, ok := <-ch
-		So(item, ShouldBeNil)
-		So(ok, ShouldBeFalse)
-		wg.Wait()
-	})
-}
-
 func TestPushDirectory(t *testing.T) {
 	// Uploads a real directory. 2 times the same file.
 	t.Parallel()
diff --git a/client/cmd/isolate/common.go b/client/cmd/isolate/common.go
index ed01f43..3a5ffdd 100644
--- a/client/cmd/isolate/common.go
+++ b/client/cmd/isolate/common.go
@@ -89,7 +89,7 @@
 	f.StringVar(&c.Isolate, "i", "", "Alias for --isolate")
 	f.StringVar(&c.Isolated, "isolated", "", ".isolated file to generate or read")
 	f.StringVar(&c.Isolated, "s", "", "Alias for --isolated")
-	f.Var(&c.Blacklist, "blacklist", "List of regexp to use as blacklist filter when uploading directories")
+	f.Var(&c.Blacklist, "blacklist", "List of globs to use as blacklist filter when uploading directories")
 	f.Var(&c.ConfigVariables, "config-variable", "Config variables are used to determine which conditions should be matched when loading a .isolate file, default: [].")
 	f.Var(&c.PathVariables, "path-variable", "Path variables are used to replace file paths when loading a .isolate file, default: {}")
 	f.Var(&c.ExtraVariables, "extra-variable", "Extraneous variables are replaced on the command entry and on paths in the .isolate file but are not considered relative paths.")
diff --git a/client/internal/common/filesystem_view.go b/client/internal/common/filesystem_view.go
new file mode 100644
index 0000000..42bd502
--- /dev/null
+++ b/client/internal/common/filesystem_view.go
@@ -0,0 +1,81 @@
+// Copyright 2017 The LUCI Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"fmt"
+	"path/filepath"
+)
+
+// FilesystemView provides a filtered "view" of a filesystem.
+// It translates absolute paths to relative paths based on its configured root path.
+// It also hides any paths which match a blacklist entry.
+type FilesystemView struct {
+	root      string
+	blacklist []string
+}
+
+// NewFilesystemView returns a FilesystemView based on the supplied root and blacklist, or
+// an error if blacklist contains a bad pattern.
+// root is the the base path used by RelativePath to calulate relative paths.
+// blacklist is a list of globs of files to ignore.  See RelativePath for more information.
+func NewFilesystemView(root string, blacklist []string) (FilesystemView, error) {
+	for _, b := range blacklist {
+		if _, err := filepath.Match(b, b); err != nil {
+			return FilesystemView{}, fmt.Errorf("bad blacklist pattern \"%s\"", b)
+		}
+	}
+	return FilesystemView{root: root, blacklist: blacklist}, nil
+}
+
+// RelativePath returns a version of path which is relative to the FilesystemView root,
+// or an empty string if path matches a blacklist entry.
+//
+// Blacklist globs are matched against the entirety of each of:
+//  * the path relative to the FilesystemView root.
+//  * the basename return by filepath.Base(path).
+// See filepath.Match for details about the format of blacklist globs.
+func (ff FilesystemView) RelativePath(path string) (string, error) {
+	relPath, err := filepath.Rel(ff.root, path)
+	if err != nil {
+		return "", fmt.Errorf("calculating relative path(%q): %v", path, err)
+	}
+	if ff.skipRelPath(relPath) {
+		relPath = ""
+	}
+	return relPath, nil
+}
+
+func (ff FilesystemView) skipRelPath(relPath string) bool {
+	// filepath.Rel is documented to call filepath.Clean on its result before returning it,
+	// which results in "." for an empty relative path.
+	if relPath == "." { // Root directory.
+		return false
+	}
+
+	for _, glob := range ff.blacklist {
+		if match(glob, relPath) || match(glob, filepath.Base(relPath)) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// match is equivalent to filepath.Match, but assumes that pattern is valid.
+func match(pattern, name string) bool {
+	matched, _ := filepath.Match(pattern, name)
+	return matched
+}
diff --git a/client/internal/common/filesystem_view_linux_test.go b/client/internal/common/filesystem_view_linux_test.go
new file mode 100644
index 0000000..4fdca5c
--- /dev/null
+++ b/client/internal/common/filesystem_view_linux_test.go
@@ -0,0 +1,226 @@
+// Copyright 2017 The LUCI Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package common
+
+import (
+	"testing"
+
+	. "github.com/smartystreets/goconvey/convey"
+)
+
+func TestValidatesBlacklist(t *testing.T) {
+	type testCase struct {
+		desc      string
+		blacklist []string
+		wantErr   bool
+	}
+
+	testCases := []testCase{
+		{
+			desc:      "no patterns",
+			blacklist: []string{},
+			wantErr:   false,
+		},
+		{
+			desc:      "good pattern",
+			blacklist: []string{"*"},
+			wantErr:   false,
+		},
+		{
+			desc:      "good patterns",
+			blacklist: []string{"a*", "b*"},
+			wantErr:   false,
+		},
+		{
+			desc:      "bad pattern",
+			blacklist: []string{"["},
+			wantErr:   true,
+		},
+		{
+			desc:      "bad first pattern",
+			blacklist: []string{"[", "*"},
+			wantErr:   true,
+		},
+		{
+			desc:      "bad last pattern",
+			blacklist: []string{"*", "["},
+			wantErr:   true,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		Convey(tc.desc, t, func() {
+			_, err := NewFilesystemView("/root", tc.blacklist)
+			if tc.wantErr {
+				So(err, ShouldNotBeNil)
+			} else {
+				So(err, ShouldBeNil)
+			}
+		})
+	}
+
+}
+
+func TestCalculatesRelativePaths(t *testing.T) {
+	type testCase struct {
+		desc        string
+		root        string
+		absPath     string
+		wantRelPath string
+		wantErr     bool
+	}
+
+	testCases := []testCase{
+		{
+			desc:        "no trailing slash",
+			root:        "/a",
+			absPath:     "/a/b",
+			wantRelPath: "b",
+			wantErr:     false,
+		},
+		{
+			desc:        "trailing slash",
+			root:        "/a",
+			absPath:     "/a/b/",
+			wantRelPath: "b",
+			wantErr:     false,
+		},
+		{
+			desc:        "no common root",
+			root:        "/a",
+			absPath:     "/x/y",
+			wantRelPath: "../x/y",
+			wantErr:     false,
+		},
+		{
+			desc:        "no computable relative path",
+			root:        "/a",
+			absPath:     "./x/y",
+			wantRelPath: "",
+			wantErr:     true,
+		},
+		{
+			desc:        "root",
+			root:        "/a",
+			absPath:     "/a",
+			wantRelPath: ".",
+			wantErr:     false,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		Convey(tc.desc, t, func() {
+			fsView, err := NewFilesystemView(tc.root, nil)
+
+			So(err, ShouldBeNil)
+
+			relPath, err := fsView.RelativePath(tc.absPath)
+			So(relPath, ShouldEqual, tc.wantRelPath)
+			if tc.wantErr {
+				So(err, ShouldNotBeNil)
+			} else {
+				So(err, ShouldBeNil)
+			}
+		})
+	}
+}
+
+func TestAppliesBlacklist(t *testing.T) {
+	type testCase struct {
+		desc        string
+		root        string
+		blacklist   []string
+		absPath     string
+		wantRelPath string
+	}
+
+	testCases := []testCase{
+		{
+			desc:        "no blacklist",
+			root:        "/a",
+			blacklist:   []string{},
+			absPath:     "/a/x/y",
+			wantRelPath: "x/y",
+		},
+		{
+			desc:        "blacklist matches relative path",
+			root:        "/a",
+			blacklist:   []string{"?/z"},
+			absPath:     "/a/x/z",
+			wantRelPath: "",
+		},
+		{
+			desc:        "blacklist doesn't match relative path",
+			root:        "/a",
+			blacklist:   []string{"?/z"},
+			absPath:     "/a/x/y",
+			wantRelPath: "x/y",
+		},
+		{
+			desc:        "blacklist matches basename",
+			root:        "/a",
+			blacklist:   []string{"z"},
+			absPath:     "/a/x/z",
+			wantRelPath: "",
+		},
+		{
+			desc:        "blacklist doesn't match basename",
+			root:        "/a",
+			blacklist:   []string{"z"},
+			absPath:     "/a/z/y",
+			wantRelPath: "z/y",
+		},
+		{
+			desc:        "root never matches blacklist",
+			root:        "/a",
+			blacklist:   []string{"?"},
+			absPath:     "/a",
+			wantRelPath: ".",
+		},
+		{
+			desc:        "only one blacklist need match path (1)",
+			root:        "/a",
+			blacklist:   []string{"z", "abc"},
+			absPath:     "/a/x/z",
+			wantRelPath: "",
+		},
+		{
+			desc:        "only one blacklist need match path (2)",
+			root:        "/a",
+			blacklist:   []string{"abc", "z"},
+			absPath:     "/a/x/z",
+			wantRelPath: "",
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		Convey(tc.desc, t, func() {
+			fsView, err := NewFilesystemView(tc.root, tc.blacklist)
+
+			// These test cases contain only valid blacklists.
+			// Invalid blacklists are tested in TestValidatesBlacklist.
+			So(err, ShouldBeNil)
+
+			relPath, err := fsView.RelativePath(tc.absPath)
+			// These test cases contain only valid relative paths.
+			// non-computable relative paths are tested in TestCalculatesRelativePaths.
+			So(err, ShouldBeNil)
+			So(relPath, ShouldEqual, tc.wantRelPath)
+		})
+	}
+}