gps/verify/digest.go - external/github.com/golang/dep - Git at Google

 // Copyright 2017 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package verify

 import (
 	"bytes"
 	"crypto/sha256"
 	"encoding/binary"
 	"encoding/hex"
 	"fmt"
 	"hash"
 	"io"
 	"os"
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"

 	"github.com/pkg/errors"
 )

 // HashVersion is an arbitrary number that identifies the hash algorithm used by
 // the directory hasher.
 //
 //   1: SHA256, as implemented in crypto/sha256
 const HashVersion = 1

 const osPathSeparator = string(filepath.Separator)

 // lineEndingReader is a `io.Reader` that converts CRLF sequences to LF.
 //
 // When cloning or checking out repositories, some Version Control Systems,
 // VCSs, on some supported Go Operating System architectures, GOOS, will
 // automatically convert line endings that end in a single line feed byte, LF,
 // to line endings that end in a two byte sequence of carriage return, CR,
 // followed by LF. This LF to CRLF conversion would cause otherwise identical
 // versioned files to have different on disk contents simply based on which VCS
 // and GOOS are involved. Different file contents for the same file would cause
 // the resultant hashes to differ. In order to ensure file contents normalize
 // and produce the same hash, this structure wraps an io.Reader that modifies
 // the file's contents when it is read, translating all CRLF sequences to LF.
 type lineEndingReader struct {
 	src             io.Reader // source io.Reader from which this reads
 	prevReadEndedCR bool      // used to track whether final byte of previous Read was CR
 }

 // newLineEndingReader returns a new lineEndingReader that reads from the
 // specified source io.Reader.
 func newLineEndingReader(src io.Reader) *lineEndingReader {
 	return &lineEndingReader{src: src}
 }

 var crlf = []byte("\r\n")

 // Read consumes bytes from the structure's source io.Reader to fill the
 // specified slice of bytes. It converts all CRLF byte sequences to LF, and
 // handles cases where CR and LF straddle across two Read operations.
 func (f *lineEndingReader) Read(buf []byte) (int, error) {
 	buflen := len(buf)
 	if f.prevReadEndedCR {
 		// Read one fewer bytes so we have room if the first byte of the
 		// upcoming Read is not a LF, in which case we will need to insert
 		// trailing CR from previous read.
 		buflen--
 	}
 	nr, er := f.src.Read(buf[:buflen])
 	if nr > 0 {
 		if f.prevReadEndedCR && buf[0] != '\n' {
 			// Having a CRLF split across two Read operations is rare, so the
 			// performance impact of copying entire buffer to the right by one
 			// byte, while suboptimal, will at least will not happen very
 			// often. This negative performance impact is mitigated somewhat on
 			// many Go compilation architectures, GOARCH, because the `copy`
 			// builtin uses a machine opcode for performing the memory copy on
 			// possibly overlapping regions of memory. This machine opcodes is
 			// not instantaneous and does require multiple CPU cycles to
 			// complete, but is significantly faster than the application
 			// looping through bytes.
 			copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte
 			buf[0] = '\r'               // insert the previous skipped CR byte at start of buf
 			nr++                        // pretend we read one more byte
 		}

 		// Remove any CRLF sequences in the buffer using `bytes.Index` because,
 		// like the `copy` builtin on many GOARCHs, it also takes advantage of a
 		// machine opcode to search for byte patterns.
 		var searchOffset int // index within buffer from whence the search will commence for each loop; set to the index of the end of the previous loop.
 		var shiftCount int   // each subsequenct shift operation needs to shift bytes to the left by one more position than the shift that preceded it.
 		previousIndex := -1  // index of previously found CRLF; -1 means no previous index
 		for {
 			index := bytes.Index(buf[searchOffset:nr], crlf)
 			if index == -1 {
 				break
 			}
 			index += searchOffset // convert relative index to absolute
 			if previousIndex != -1 {
 				// shift substring between previous index and this index
 				copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index])
 				shiftCount++ // next shift needs to be 1 byte to the left
 			}
 			previousIndex = index
 			searchOffset = index + 2 // start next search after len(crlf)
 		}
 		if previousIndex != -1 {
 			// handle final shift
 			copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr])
 			shiftCount++
 		}
 		nr -= shiftCount // shorten byte read count by number of shifts executed

 		// When final byte from a read operation is CR, do not emit it until
 		// ensure first byte on next read is not LF.
 		if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR {
 			nr-- // pretend byte was never read from source
 		}
 	} else if f.prevReadEndedCR {
 		// Reading from source returned nothing, but this struct is sitting on a
 		// trailing CR from previous Read, so let's give it to client now.
 		buf[0] = '\r'
 		nr = 1
 		er = nil
 		f.prevReadEndedCR = false // prevent infinite loop
 	}
 	return nr, er
 }

 // writeBytesWithNull appends the specified data to the specified hash, followed by
 // the NULL byte, in order to make accidental hash collisions less likely.
 func writeBytesWithNull(h hash.Hash, data []byte) {
 	// Ignore return values from writing to the hash, because hash write always
 	// returns nil error.
 	_, _ = h.Write(append(data, 0))
 }

 // dirWalkClosure is used to reduce number of allocation involved in closing
 // over these variables.
 type dirWalkClosure struct {
 	someCopyBufer []byte // allocate once and reuse for each file copy
 	someModeBytes []byte // allocate once and reuse for each node
 	someDirLen    int
 	someHash      hash.Hash
 }

 // DigestFromDirectory returns a hash of the specified directory contents, which
 // will match the hash computed for any directory on any supported Go platform
 // whose contents exactly match the specified directory.
 //
 // This function ignores any file system node named `vendor`, `.bzr`, `.git`,
 // `.hg`, and `.svn`, as these are typically used as Version Control System
 // (VCS) directories.
 //
 // Other than the `vendor` and VCS directories mentioned above, the calculated
 // hash includes the pathname to every discovered file system node, whether it
 // is an empty directory, a non-empty directory, an empty file, or a non-empty file.
 //
 // Symbolic links are excluded, as they are not considered valid elements in the
 // definition of a Go module.
 func DigestFromDirectory(osDirname string) (VersionedDigest, error) {
 	osDirname = filepath.Clean(osDirname)

 	// Create a single hash instance for the entire operation, rather than a new
 	// hash for each node we encounter.

 	closure := dirWalkClosure{
 		someCopyBufer: make([]byte, 4*1024), // only allocate a single page
 		someModeBytes: make([]byte, 4),      // scratch place to store encoded os.FileMode (uint32)
 		someDirLen:    len(osDirname) + len(osPathSeparator),
 		someHash:      sha256.New(),
 	}

 	err := filepath.Walk(osDirname, func(osPathname string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
 		}

 		// Completely ignore symlinks.
 		if info.Mode()&os.ModeSymlink != 0 {
 			return nil
 		}

 		var osRelative string
 		if len(osPathname) > closure.someDirLen {
 			osRelative = osPathname[closure.someDirLen:]
 		}

 		switch filepath.Base(osRelative) {
 		case "vendor", ".bzr", ".git", ".hg", ".svn":
 			return filepath.SkipDir
 		}

 		// We could make our own enum-like data type for encoding the file type,
 		// but Go's runtime already gives us architecture independent file
 		// modes, as discussed in `os/types.go`:
 		//
 		//    Go's runtime FileMode type has same definition on all systems, so
 		//    that information about files can be moved from one system to
 		//    another portably.
 		var mt os.FileMode

 		// We only care about the bits that identify the type of a file system
 		// node, and can ignore append, exclusive, temporary, setuid, setgid,
 		// permission bits, and sticky bits, which are coincident to bits which
 		// declare type of the file system node.
 		modeType := info.Mode() & os.ModeType
 		var shouldSkip bool // skip some types of file system nodes

 		switch {
 		case modeType&os.ModeDir > 0:
 			mt = os.ModeDir
 			// This func does not need to enumerate children, because
 			// filepath.Walk will do that for us.
 			shouldSkip = true
 		case modeType&os.ModeNamedPipe > 0:
 			mt = os.ModeNamedPipe
 			shouldSkip = true
 		case modeType&os.ModeSocket > 0:
 			mt = os.ModeSocket
 			shouldSkip = true
 		case modeType&os.ModeDevice > 0:
 			mt = os.ModeDevice
 			shouldSkip = true
 		}

 		// Write the relative pathname to hash because the hash is a function of
 		// the node names, node types, and node contents. Added benefit is that
 		// empty directories, named pipes, sockets, and devices. Use
 		// `filepath.ToSlash` to ensure relative pathname is os-agnostic.
 		writeBytesWithNull(closure.someHash, []byte(filepath.ToSlash(osRelative)))

 		binary.LittleEndian.PutUint32(closure.someModeBytes, uint32(mt)) // encode the type of mode
 		writeBytesWithNull(closure.someHash, closure.someModeBytes)      // and write to hash

 		if shouldSkip {
 			return nil // nothing more to do for some of the node types
 		}

 		// If we get here, node is a regular file.
 		fh, err := os.Open(osPathname)
 		if err != nil {
 			return errors.Wrap(err, "cannot Open")
 		}

 		var bytesWritten int64
 		bytesWritten, err = io.CopyBuffer(closure.someHash, newLineEndingReader(fh), closure.someCopyBufer) // fast copy of file contents to hash
 		err = errors.Wrap(err, "cannot Copy")                                                               // errors.Wrap only wraps non-nil, so skip extra check
 		writeBytesWithNull(closure.someHash, []byte(strconv.FormatInt(bytesWritten, 10)))                   // 10: format file size as base 10 integer

 		// Close the file handle to the open file without masking
 		// possible previous error value.
 		if er := fh.Close(); err == nil {
 			err = errors.Wrap(er, "cannot Close")
 		}
 		return err
 	})

 	if err != nil {
 		return VersionedDigest{}, err
 	}

 	return VersionedDigest{
 		HashVersion: HashVersion,
 		Digest:      closure.someHash.Sum(nil),
 	}, nil
 }

 // VendorStatus represents one of a handful of possible status conditions for a
 // particular file system node in the vendor directory tree.
 type VendorStatus uint8

 const (
 	// NotInLock is used when a file system node exists for which there is no
 	// corresponding dependency in the lock file.
 	NotInLock VendorStatus = iota

 	// NotInTree is used when a lock file dependency exists for which there is
 	// no corresponding file system node.
 	NotInTree

 	// NoMismatch is used when the digest for a dependency listed in the
 	// lockfile matches what is calculated from the file system.
 	NoMismatch

 	// EmptyDigestInLock is used when the digest for a dependency listed in the
 	// lock file is the empty string. While this is a special case of
 	// DigestMismatchInLock, separating the cases is a desired feature.
 	EmptyDigestInLock

 	// DigestMismatchInLock is used when the digest for a dependency listed in
 	// the lock file does not match what is calculated from the file system.
 	DigestMismatchInLock

 	// HashVersionMismatch indicates that the hashing algorithm used to generate
 	// the digest being compared against is not the same as the one used by the
 	// current program.
 	HashVersionMismatch
 )

 func (ls VendorStatus) String() string {
 	switch ls {
 	case NotInLock:
 		return "not in lock"
 	case NotInTree:
 		return "not in tree"
 	case NoMismatch:
 		return "match"
 	case EmptyDigestInLock:
 		return "empty digest in lock"
 	case DigestMismatchInLock:
 		return "mismatch"
 	case HashVersionMismatch:
 		return "hasher changed"
 	}
 	return "unknown"
 }

 // fsnode is used to track which file system nodes are required by the lock
 // file. When a directory is found whose name matches one of the declared
 // projects in the lock file, e.g., "github.com/alice/alice1", an fsnode is
 // created for that directory, but not for any of its children. All other file
 // system nodes encountered will result in a fsnode created to represent it.
 type fsnode struct {
 	osRelative           string // os-specific relative path of a resource under vendor root
 	isRequiredAncestor   bool   // true iff this node or one of its descendants is in the lock file
 	myIndex, parentIndex int    // index of this node and its parent in the tree's slice
 }

 // VersionedDigest comprises both a hash digest, and a simple integer indicating
 // the version of the hash algorithm that produced the digest.
 type VersionedDigest struct {
 	HashVersion int
 	Digest      []byte
 }

 func (vd VersionedDigest) String() string {
 	return fmt.Sprintf("%s:%s", strconv.Itoa(vd.HashVersion), hex.EncodeToString(vd.Digest))
 }

 // IsEmpty indicates if the VersionedDigest is the zero value.
 func (vd VersionedDigest) IsEmpty() bool {
 	return vd.HashVersion == 0 && len(vd.Digest) == 0
 }

 // ParseVersionedDigest decodes the string representation of versioned digest
 // information - a colon-separated string with a version number in the first
 // part and the hex-encdoed hash digest in the second - as a VersionedDigest.
 func ParseVersionedDigest(input string) (VersionedDigest, error) {
 	var vd VersionedDigest
 	var err error

 	parts := strings.Split(input, ":")
 	if len(parts) != 2 {
 		return VersionedDigest{}, errors.Errorf("expected two colon-separated components in the versioned hash digest, got %q", input)
 	}
 	if vd.Digest, err = hex.DecodeString(parts[1]); err != nil {
 		return VersionedDigest{}, err
 	}
 	if vd.HashVersion, err = strconv.Atoi(parts[0]); err != nil {
 		return VersionedDigest{}, err
 	}

 	return vd, nil
 }

 // CheckDepTree verifies a dependency tree according to expected digest sums,
 // and returns an associative array of file system nodes and their respective
 // vendor status conditions.
 //
 // The keys to the expected digest sums associative array represent the
 // project's dependencies, and each is required to be expressed using the
 // solidus character, `/`, as its path separator. For example, even on a GOOS
 // platform where the file system path separator is a character other than
 // solidus, one particular dependency would be represented as
 // "github.com/alice/alice1".
 func CheckDepTree(osDirname string, wantDigests map[string]VersionedDigest) (map[string]VendorStatus, error) {
 	osDirname = filepath.Clean(osDirname)

 	// Create associative array to store the results of calling this function.
 	slashStatus := make(map[string]VendorStatus)

 	// Ensure top level pathname is a directory
 	fi, err := os.Stat(osDirname)
 	if err != nil {
 		// If the dir doesn't exist at all, that's OK - just consider all the
 		// wanted paths absent.
 		if os.IsNotExist(err) {
 			for path := range wantDigests {
 				slashStatus[path] = NotInTree
 			}
 			return slashStatus, nil
 		}
 		return nil, errors.Wrap(err, "cannot Stat")
 	}

 	if !fi.IsDir() {
 		return nil, errors.Errorf("cannot verify non directory: %q", osDirname)
 	}

 	// Initialize work queue with a node representing the specified directory
 	// name by declaring its relative pathname under the directory name as the
 	// empty string.
 	currentNode := &fsnode{osRelative: "", parentIndex: -1, isRequiredAncestor: true}
 	queue := []*fsnode{currentNode} // queue of directories that must be inspected

 	// In order to identify all file system nodes that are not in the lock file,
 	// represented by the specified expected sums parameter, and in order to
 	// only report the top level of a subdirectory of file system nodes, rather
 	// than every node internal to them, we will create a tree of nodes stored
 	// in a slice. We do this because we cannot predict the depth at which
 	// project roots occur. Some projects are fewer than and some projects more
 	// than the typical three layer subdirectory under the vendor root
 	// directory.
 	//
 	// For a following few examples, assume the below vendor root directory:
 	//
 	// github.com/alice/alice1/a1.go
 	// github.com/alice/alice2/a2.go
 	// github.com/bob/bob1/b1.go
 	// github.com/bob/bob2/b2.go
 	// launchpad.net/nifty/n1.go
 	//
 	// 1) If only the `alice1` and `alice2` projects were in the lock file, we'd
 	// prefer the output to state that `github.com/bob` is `NotInLock`, and
 	// `launchpad.net/nifty` is `NotInLock`.
 	//
 	// 2) If `alice1`, `alice2`, and `bob1` were in the lock file, we'd want to
 	// report `github.com/bob/bob2` as `NotInLock`, and `launchpad.net/nifty` is
 	// `NotInLock`.
 	//
 	// 3) If none of `alice1`, `alice2`, `bob1`, or `bob2` were in the lock
 	// file, the entire `github.com` directory would be reported as `NotInLock`,
 	// along with `launchpad.net/nifty` is `NotInLock`.
 	//
 	// Each node in our tree has the slice index of its parent node, so once we
 	// can categorically state a particular directory is required because it is
 	// in the lock file, we can mark all of its ancestors as also being
 	// required. Then, when we finish walking the directory hierarchy, any nodes
 	// which are not required but have a required parent will be marked as
 	// `NotInLock`.
 	nodes := []*fsnode{currentNode}

 	// Mark directories of expected projects as required. When each respective
 	// project is later found while traversing the vendor root hierarchy, its
 	// status will be updated to reflect whether its digest is empty, or,
 	// whether or not it matches the expected digest.
 	for slashPathname := range wantDigests {
 		slashStatus[slashPathname] = NotInTree
 	}

 	for len(queue) > 0 {
 		// Pop node from the top of queue (depth first traversal, reverse
 		// lexicographical order inside a directory), clearing the value stored
 		// in the slice's backing array as we proceed.
 		lq1 := len(queue) - 1
 		currentNode, queue[lq1], queue = queue[lq1], nil, queue[:lq1]
 		slashPathname := filepath.ToSlash(currentNode.osRelative)
 		osPathname := filepath.Join(osDirname, currentNode.osRelative)

 		if expectedSum, ok := wantDigests[slashPathname]; ok {
 			ls := EmptyDigestInLock
 			if expectedSum.HashVersion != HashVersion {
 				if !expectedSum.IsEmpty() {
 					ls = HashVersionMismatch
 				}
 			} else if len(expectedSum.Digest) > 0 {
 				projectSum, err := DigestFromDirectory(osPathname)
 				if err != nil {
 					return nil, errors.Wrap(err, "cannot compute dependency hash")
 				}
 				if bytes.Equal(projectSum.Digest, expectedSum.Digest) {
 					ls = NoMismatch
 				} else {
 					ls = DigestMismatchInLock
 				}
 			}
 			slashStatus[slashPathname] = ls

 			// Mark current nodes and all its parents as required.
 			for i := currentNode.myIndex; i != -1; i = nodes[i].parentIndex {
 				nodes[i].isRequiredAncestor = true
 			}

 			// Do not need to process this directory's contents because we
 			// already accounted for its contents while calculating its digest.
 			continue
 		}

 		osChildrenNames, err := sortedChildrenFromDirname(osPathname)
 		if err != nil {
 			return nil, errors.Wrap(err, "cannot get sorted list of directory children")
 		}
 		for _, osChildName := range osChildrenNames {
 			switch osChildName {
 			case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn":
 				// skip
 			default:
 				osChildRelative := filepath.Join(currentNode.osRelative, osChildName)
 				osChildPathname := filepath.Join(osDirname, osChildRelative)

 				// Create a new fsnode for this file system node, with a parent
 				// index set to the index of the current node.
 				otherNode := &fsnode{osRelative: osChildRelative, myIndex: len(nodes), parentIndex: currentNode.myIndex}

 				fi, err := os.Stat(osChildPathname)
 				if err != nil {
 					return nil, errors.Wrap(err, "cannot Stat")
 				}
 				nodes = append(nodes, otherNode) // Track all file system nodes...
 				if fi.IsDir() {
 					queue = append(queue, otherNode) // but only need to add directories to the work queue.
 				}
 			}
 		}
 	}

 	// Ignoring first node in the list, walk nodes from last to first. Whenever
 	// the current node is not required, but its parent is required, then the
 	// current node ought to be marked as `NotInLock`.
 	for len(nodes) > 1 {
 		// Pop node from top of queue, clearing the value stored in the slice's
 		// backing array as we proceed.
 		ln1 := len(nodes) - 1
 		currentNode, nodes[ln1], nodes = nodes[ln1], nil, nodes[:ln1]

 		if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor {
 			slashStatus[filepath.ToSlash(currentNode.osRelative)] = NotInLock
 		}
 	}
 	currentNode, nodes = nil, nil

 	return slashStatus, nil
 }

 // sortedChildrenFromDirname returns a lexicographically sorted list of child
 // nodes for the specified directory.
 func sortedChildrenFromDirname(osDirname string) ([]string, error) {
 	fh, err := os.Open(osDirname)
 	if err != nil {
 		return nil, errors.Wrap(err, "cannot Open")
 	}

 	osChildrenNames, err := fh.Readdirnames(0) // 0: read names of all children
 	if err != nil {
 		return nil, errors.Wrap(err, "cannot Readdirnames")
 	}
 	sort.Strings(osChildrenNames)

 	// Close the file handle to the open directory without masking possible
 	// previous error value.
 	if er := fh.Close(); err == nil {
 		err = errors.Wrap(er, "cannot Close")
 	}
 	return osChildrenNames, err
 }
	// Copyright 2017 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package verify

	import (
	"bytes"
	"crypto/sha256"
	"encoding/binary"
	"encoding/hex"
	"fmt"
	"hash"
	"io"
	"os"
	"path/filepath"
	"sort"
	"strconv"
	"strings"

	"github.com/pkg/errors"
	)

	// HashVersion is an arbitrary number that identifies the hash algorithm used by
	// the directory hasher.
	//
	// 1: SHA256, as implemented in crypto/sha256
	const HashVersion = 1

	const osPathSeparator = string(filepath.Separator)

	// lineEndingReader is a `io.Reader` that converts CRLF sequences to LF.
	//
	// When cloning or checking out repositories, some Version Control Systems,
	// VCSs, on some supported Go Operating System architectures, GOOS, will
	// automatically convert line endings that end in a single line feed byte, LF,
	// to line endings that end in a two byte sequence of carriage return, CR,
	// followed by LF. This LF to CRLF conversion would cause otherwise identical
	// versioned files to have different on disk contents simply based on which VCS
	// and GOOS are involved. Different file contents for the same file would cause
	// the resultant hashes to differ. In order to ensure file contents normalize
	// and produce the same hash, this structure wraps an io.Reader that modifies
	// the file's contents when it is read, translating all CRLF sequences to LF.
	type lineEndingReader struct {
	src io.Reader // source io.Reader from which this reads
	prevReadEndedCR bool // used to track whether final byte of previous Read was CR
	}

	// newLineEndingReader returns a new lineEndingReader that reads from the
	// specified source io.Reader.
	func newLineEndingReader(src io.Reader) *lineEndingReader {
	return &lineEndingReader{src: src}
	}

	var crlf = []byte("\r\n")

	// Read consumes bytes from the structure's source io.Reader to fill the
	// specified slice of bytes. It converts all CRLF byte sequences to LF, and
	// handles cases where CR and LF straddle across two Read operations.
	func (f *lineEndingReader) Read(buf []byte) (int, error) {
	buflen := len(buf)
	if f.prevReadEndedCR {
	// Read one fewer bytes so we have room if the first byte of the
	// upcoming Read is not a LF, in which case we will need to insert
	// trailing CR from previous read.
	buflen--
	}
	nr, er := f.src.Read(buf[:buflen])
	if nr > 0 {
	if f.prevReadEndedCR && buf[0] != '\n' {
	// Having a CRLF split across two Read operations is rare, so the
	// performance impact of copying entire buffer to the right by one
	// byte, while suboptimal, will at least will not happen very
	// often. This negative performance impact is mitigated somewhat on
	// many Go compilation architectures, GOARCH, because the `copy`
	// builtin uses a machine opcode for performing the memory copy on
	// possibly overlapping regions of memory. This machine opcodes is
	// not instantaneous and does require multiple CPU cycles to
	// complete, but is significantly faster than the application
	// looping through bytes.
	copy(buf[1:nr+1], buf[:nr]) // shift data to right one byte
	buf[0] = '\r' // insert the previous skipped CR byte at start of buf
	nr++ // pretend we read one more byte
	}

	// Remove any CRLF sequences in the buffer using `bytes.Index` because,
	// like the `copy` builtin on many GOARCHs, it also takes advantage of a
	// machine opcode to search for byte patterns.
	var searchOffset int // index within buffer from whence the search will commence for each loop; set to the index of the end of the previous loop.
	var shiftCount int // each subsequenct shift operation needs to shift bytes to the left by one more position than the shift that preceded it.
	previousIndex := -1 // index of previously found CRLF; -1 means no previous index
	for {
	index := bytes.Index(buf[searchOffset:nr], crlf)
	if index == -1 {
	break
	}
	index += searchOffset // convert relative index to absolute
	if previousIndex != -1 {
	// shift substring between previous index and this index
	copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:index])
	shiftCount++ // next shift needs to be 1 byte to the left
	}
	previousIndex = index
	searchOffset = index + 2 // start next search after len(crlf)
	}
	if previousIndex != -1 {
	// handle final shift
	copy(buf[previousIndex-shiftCount:], buf[previousIndex+1:nr])
	shiftCount++
	}
	nr -= shiftCount // shorten byte read count by number of shifts executed

	// When final byte from a read operation is CR, do not emit it until
	// ensure first byte on next read is not LF.
	if f.prevReadEndedCR = buf[nr-1] == '\r'; f.prevReadEndedCR {
	nr-- // pretend byte was never read from source
	}
	} else if f.prevReadEndedCR {
	// Reading from source returned nothing, but this struct is sitting on a
	// trailing CR from previous Read, so let's give it to client now.
	buf[0] = '\r'
	nr = 1
	er = nil
	f.prevReadEndedCR = false // prevent infinite loop
	}
	return nr, er
	}

	// writeBytesWithNull appends the specified data to the specified hash, followed by
	// the NULL byte, in order to make accidental hash collisions less likely.
	func writeBytesWithNull(h hash.Hash, data []byte) {
	// Ignore return values from writing to the hash, because hash write always
	// returns nil error.
	_, _ = h.Write(append(data, 0))
	}

	// dirWalkClosure is used to reduce number of allocation involved in closing
	// over these variables.
	type dirWalkClosure struct {
	someCopyBufer []byte // allocate once and reuse for each file copy
	someModeBytes []byte // allocate once and reuse for each node
	someDirLen int
	someHash hash.Hash
	}

	// DigestFromDirectory returns a hash of the specified directory contents, which
	// will match the hash computed for any directory on any supported Go platform
	// whose contents exactly match the specified directory.
	//
	// This function ignores any file system node named `vendor`, `.bzr`, `.git`,
	// `.hg`, and `.svn`, as these are typically used as Version Control System
	// (VCS) directories.
	//
	// Other than the `vendor` and VCS directories mentioned above, the calculated
	// hash includes the pathname to every discovered file system node, whether it
	// is an empty directory, a non-empty directory, an empty file, or a non-empty file.
	//
	// Symbolic links are excluded, as they are not considered valid elements in the
	// definition of a Go module.
	func DigestFromDirectory(osDirname string) (VersionedDigest, error) {
	osDirname = filepath.Clean(osDirname)

	// Create a single hash instance for the entire operation, rather than a new
	// hash for each node we encounter.

	closure := dirWalkClosure{
	someCopyBufer: make([]byte, 4*1024), // only allocate a single page
	someModeBytes: make([]byte, 4), // scratch place to store encoded os.FileMode (uint32)
	someDirLen: len(osDirname) + len(osPathSeparator),
	someHash: sha256.New(),
	}

	err := filepath.Walk(osDirname, func(osPathname string, info os.FileInfo, err error) error {
	if err != nil {
	return err
	}

	// Completely ignore symlinks.
	if info.Mode()&os.ModeSymlink != 0 {
	return nil
	}

	var osRelative string
	if len(osPathname) > closure.someDirLen {
	osRelative = osPathname[closure.someDirLen:]
	}

	switch filepath.Base(osRelative) {
	case "vendor", ".bzr", ".git", ".hg", ".svn":
	return filepath.SkipDir
	}

	// We could make our own enum-like data type for encoding the file type,
	// but Go's runtime already gives us architecture independent file
	// modes, as discussed in `os/types.go`:
	//
	// Go's runtime FileMode type has same definition on all systems, so
	// that information about files can be moved from one system to
	// another portably.
	var mt os.FileMode

	// We only care about the bits that identify the type of a file system
	// node, and can ignore append, exclusive, temporary, setuid, setgid,
	// permission bits, and sticky bits, which are coincident to bits which
	// declare type of the file system node.
	modeType := info.Mode() & os.ModeType
	var shouldSkip bool // skip some types of file system nodes

	switch {
	case modeType&os.ModeDir > 0:
	mt = os.ModeDir
	// This func does not need to enumerate children, because
	// filepath.Walk will do that for us.
	shouldSkip = true
	case modeType&os.ModeNamedPipe > 0:
	mt = os.ModeNamedPipe
	shouldSkip = true
	case modeType&os.ModeSocket > 0:
	mt = os.ModeSocket
	shouldSkip = true
	case modeType&os.ModeDevice > 0:
	mt = os.ModeDevice
	shouldSkip = true
	}

	// Write the relative pathname to hash because the hash is a function of
	// the node names, node types, and node contents. Added benefit is that
	// empty directories, named pipes, sockets, and devices. Use
	// `filepath.ToSlash` to ensure relative pathname is os-agnostic.
	writeBytesWithNull(closure.someHash, []byte(filepath.ToSlash(osRelative)))

	binary.LittleEndian.PutUint32(closure.someModeBytes, uint32(mt)) // encode the type of mode
	writeBytesWithNull(closure.someHash, closure.someModeBytes) // and write to hash

	if shouldSkip {
	return nil // nothing more to do for some of the node types
	}

	// If we get here, node is a regular file.
	fh, err := os.Open(osPathname)
	if err != nil {
	return errors.Wrap(err, "cannot Open")
	}

	var bytesWritten int64
	bytesWritten, err = io.CopyBuffer(closure.someHash, newLineEndingReader(fh), closure.someCopyBufer) // fast copy of file contents to hash
	err = errors.Wrap(err, "cannot Copy") // errors.Wrap only wraps non-nil, so skip extra check
	writeBytesWithNull(closure.someHash, []byte(strconv.FormatInt(bytesWritten, 10))) // 10: format file size as base 10 integer

	// Close the file handle to the open file without masking
	// possible previous error value.
	if er := fh.Close(); err == nil {
	err = errors.Wrap(er, "cannot Close")
	}
	return err
	})

	if err != nil {
	return VersionedDigest{}, err
	}

	return VersionedDigest{
	HashVersion: HashVersion,
	Digest: closure.someHash.Sum(nil),
	}, nil
	}

	// VendorStatus represents one of a handful of possible status conditions for a
	// particular file system node in the vendor directory tree.
	type VendorStatus uint8

	const (
	// NotInLock is used when a file system node exists for which there is no
	// corresponding dependency in the lock file.
	NotInLock VendorStatus = iota

	// NotInTree is used when a lock file dependency exists for which there is
	// no corresponding file system node.
	NotInTree

	// NoMismatch is used when the digest for a dependency listed in the
	// lockfile matches what is calculated from the file system.
	NoMismatch

	// EmptyDigestInLock is used when the digest for a dependency listed in the
	// lock file is the empty string. While this is a special case of
	// DigestMismatchInLock, separating the cases is a desired feature.
	EmptyDigestInLock

	// DigestMismatchInLock is used when the digest for a dependency listed in
	// the lock file does not match what is calculated from the file system.
	DigestMismatchInLock

	// HashVersionMismatch indicates that the hashing algorithm used to generate
	// the digest being compared against is not the same as the one used by the
	// current program.
	HashVersionMismatch
	)

	func (ls VendorStatus) String() string {
	switch ls {
	case NotInLock:
	return "not in lock"
	case NotInTree:
	return "not in tree"
	case NoMismatch:
	return "match"
	case EmptyDigestInLock:
	return "empty digest in lock"
	case DigestMismatchInLock:
	return "mismatch"
	case HashVersionMismatch:
	return "hasher changed"
	}
	return "unknown"
	}

	// fsnode is used to track which file system nodes are required by the lock
	// file. When a directory is found whose name matches one of the declared
	// projects in the lock file, e.g., "github.com/alice/alice1", an fsnode is
	// created for that directory, but not for any of its children. All other file
	// system nodes encountered will result in a fsnode created to represent it.
	type fsnode struct {
	osRelative string // os-specific relative path of a resource under vendor root
	isRequiredAncestor bool // true iff this node or one of its descendants is in the lock file
	myIndex, parentIndex int // index of this node and its parent in the tree's slice
	}

	// VersionedDigest comprises both a hash digest, and a simple integer indicating
	// the version of the hash algorithm that produced the digest.
	type VersionedDigest struct {
	HashVersion int
	Digest []byte
	}

	func (vd VersionedDigest) String() string {
	return fmt.Sprintf("%s:%s", strconv.Itoa(vd.HashVersion), hex.EncodeToString(vd.Digest))
	}

	// IsEmpty indicates if the VersionedDigest is the zero value.
	func (vd VersionedDigest) IsEmpty() bool {
	return vd.HashVersion == 0 && len(vd.Digest) == 0
	}

	// ParseVersionedDigest decodes the string representation of versioned digest
	// information - a colon-separated string with a version number in the first
	// part and the hex-encdoed hash digest in the second - as a VersionedDigest.
	func ParseVersionedDigest(input string) (VersionedDigest, error) {
	var vd VersionedDigest
	var err error

	parts := strings.Split(input, ":")
	if len(parts) != 2 {
	return VersionedDigest{}, errors.Errorf("expected two colon-separated components in the versioned hash digest, got %q", input)
	}
	if vd.Digest, err = hex.DecodeString(parts[1]); err != nil {
	return VersionedDigest{}, err
	}
	if vd.HashVersion, err = strconv.Atoi(parts[0]); err != nil {
	return VersionedDigest{}, err
	}

	return vd, nil
	}

	// CheckDepTree verifies a dependency tree according to expected digest sums,
	// and returns an associative array of file system nodes and their respective
	// vendor status conditions.
	//
	// The keys to the expected digest sums associative array represent the
	// project's dependencies, and each is required to be expressed using the
	// solidus character, `/`, as its path separator. For example, even on a GOOS
	// platform where the file system path separator is a character other than
	// solidus, one particular dependency would be represented as
	// "github.com/alice/alice1".
	func CheckDepTree(osDirname string, wantDigests map[string]VersionedDigest) (map[string]VendorStatus, error) {
	osDirname = filepath.Clean(osDirname)

	// Create associative array to store the results of calling this function.
	slashStatus := make(map[string]VendorStatus)

	// Ensure top level pathname is a directory
	fi, err := os.Stat(osDirname)
	if err != nil {
	// If the dir doesn't exist at all, that's OK - just consider all the
	// wanted paths absent.
	if os.IsNotExist(err) {
	for path := range wantDigests {
	slashStatus[path] = NotInTree
	}
	return slashStatus, nil
	}
	return nil, errors.Wrap(err, "cannot Stat")
	}

	if !fi.IsDir() {
	return nil, errors.Errorf("cannot verify non directory: %q", osDirname)
	}

	// Initialize work queue with a node representing the specified directory
	// name by declaring its relative pathname under the directory name as the
	// empty string.
	currentNode := &fsnode{osRelative: "", parentIndex: -1, isRequiredAncestor: true}
	queue := []*fsnode{currentNode} // queue of directories that must be inspected

	// In order to identify all file system nodes that are not in the lock file,
	// represented by the specified expected sums parameter, and in order to
	// only report the top level of a subdirectory of file system nodes, rather
	// than every node internal to them, we will create a tree of nodes stored
	// in a slice. We do this because we cannot predict the depth at which
	// project roots occur. Some projects are fewer than and some projects more
	// than the typical three layer subdirectory under the vendor root
	// directory.
	//
	// For a following few examples, assume the below vendor root directory:
	//
	// github.com/alice/alice1/a1.go
	// github.com/alice/alice2/a2.go
	// github.com/bob/bob1/b1.go
	// github.com/bob/bob2/b2.go
	// launchpad.net/nifty/n1.go
	//
	// 1) If only the `alice1` and `alice2` projects were in the lock file, we'd
	// prefer the output to state that `github.com/bob` is `NotInLock`, and
	// `launchpad.net/nifty` is `NotInLock`.
	//
	// 2) If `alice1`, `alice2`, and `bob1` were in the lock file, we'd want to
	// report `github.com/bob/bob2` as `NotInLock`, and `launchpad.net/nifty` is
	// `NotInLock`.
	//
	// 3) If none of `alice1`, `alice2`, `bob1`, or `bob2` were in the lock
	// file, the entire `github.com` directory would be reported as `NotInLock`,
	// along with `launchpad.net/nifty` is `NotInLock`.
	//
	// Each node in our tree has the slice index of its parent node, so once we
	// can categorically state a particular directory is required because it is
	// in the lock file, we can mark all of its ancestors as also being
	// required. Then, when we finish walking the directory hierarchy, any nodes
	// which are not required but have a required parent will be marked as
	// `NotInLock`.
	nodes := []*fsnode{currentNode}

	// Mark directories of expected projects as required. When each respective
	// project is later found while traversing the vendor root hierarchy, its
	// status will be updated to reflect whether its digest is empty, or,
	// whether or not it matches the expected digest.
	for slashPathname := range wantDigests {
	slashStatus[slashPathname] = NotInTree
	}

	for len(queue) > 0 {
	// Pop node from the top of queue (depth first traversal, reverse
	// lexicographical order inside a directory), clearing the value stored
	// in the slice's backing array as we proceed.
	lq1 := len(queue) - 1
	currentNode, queue[lq1], queue = queue[lq1], nil, queue[:lq1]
	slashPathname := filepath.ToSlash(currentNode.osRelative)
	osPathname := filepath.Join(osDirname, currentNode.osRelative)

	if expectedSum, ok := wantDigests[slashPathname]; ok {
	ls := EmptyDigestInLock
	if expectedSum.HashVersion != HashVersion {
	if !expectedSum.IsEmpty() {
	ls = HashVersionMismatch
	}
	} else if len(expectedSum.Digest) > 0 {
	projectSum, err := DigestFromDirectory(osPathname)
	if err != nil {
	return nil, errors.Wrap(err, "cannot compute dependency hash")
	}
	if bytes.Equal(projectSum.Digest, expectedSum.Digest) {
	ls = NoMismatch
	} else {
	ls = DigestMismatchInLock
	}
	}
	slashStatus[slashPathname] = ls

	// Mark current nodes and all its parents as required.
	for i := currentNode.myIndex; i != -1; i = nodes[i].parentIndex {
	nodes[i].isRequiredAncestor = true
	}

	// Do not need to process this directory's contents because we
	// already accounted for its contents while calculating its digest.
	continue
	}

	osChildrenNames, err := sortedChildrenFromDirname(osPathname)
	if err != nil {
	return nil, errors.Wrap(err, "cannot get sorted list of directory children")
	}
	for _, osChildName := range osChildrenNames {
	switch osChildName {
	case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn":
	// skip
	default:
	osChildRelative := filepath.Join(currentNode.osRelative, osChildName)
	osChildPathname := filepath.Join(osDirname, osChildRelative)

	// Create a new fsnode for this file system node, with a parent
	// index set to the index of the current node.
	otherNode := &fsnode{osRelative: osChildRelative, myIndex: len(nodes), parentIndex: currentNode.myIndex}

	fi, err := os.Stat(osChildPathname)
	if err != nil {
	return nil, errors.Wrap(err, "cannot Stat")
	}
	nodes = append(nodes, otherNode) // Track all file system nodes...
	if fi.IsDir() {
	queue = append(queue, otherNode) // but only need to add directories to the work queue.
	}
	}
	}
	}

	// Ignoring first node in the list, walk nodes from last to first. Whenever
	// the current node is not required, but its parent is required, then the
	// current node ought to be marked as `NotInLock`.
	for len(nodes) > 1 {
	// Pop node from top of queue, clearing the value stored in the slice's
	// backing array as we proceed.
	ln1 := len(nodes) - 1
	currentNode, nodes[ln1], nodes = nodes[ln1], nil, nodes[:ln1]

	if !currentNode.isRequiredAncestor && nodes[currentNode.parentIndex].isRequiredAncestor {
	slashStatus[filepath.ToSlash(currentNode.osRelative)] = NotInLock
	}
	}
	currentNode, nodes = nil, nil

	return slashStatus, nil
	}

	// sortedChildrenFromDirname returns a lexicographically sorted list of child
	// nodes for the specified directory.
	func sortedChildrenFromDirname(osDirname string) ([]string, error) {
	fh, err := os.Open(osDirname)
	if err != nil {
	return nil, errors.Wrap(err, "cannot Open")
	}

	osChildrenNames, err := fh.Readdirnames(0) // 0: read names of all children
	if err != nil {
	return nil, errors.Wrap(err, "cannot Readdirnames")
	}
	sort.Strings(osChildrenNames)

	// Close the file handle to the open directory without masking possible
	// previous error value.
	if er := fh.Close(); err == nil {
	err = errors.Wrap(er, "cannot Close")
	}
	return osChildrenNames, err
	}