blob: 239fc32bbca99685189c5b1599fd91a9f7e99609 [file] [log] [blame]
// Copyright 2016 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package archiver
import (
"archive/tar"
"crypto"
"io"
"os"
"sort"
"go.chromium.org/luci/common/iotools"
"go.chromium.org/luci/common/isolated"
)
// osOpen wraps os.Open to allow faking out during tests.
var osOpen = func(name string) (io.ReadCloser, error) {
return os.Open(name)
}
// itemBundle is a slice of *Item that will be archived together.
type itemBundle struct {
items []*Item
// itemSize is the total size (in bytes) of the constituent files. It will be
// smaller than the resultant tar.
itemSize int64
}
// shardItems shards the provided items into itemBundles, using the provided
// threshold as the maximum size the resultant tars should be.
//
// shardItems does not access the filesystem.
func shardItems(items []*Item, threshold int64) []*itemBundle {
// For deterministic isolated hashes, sort the items by path.
sort.Sort(itemByPath(items))
var bundles []*itemBundle
for len(items) > 0 {
var bundle *itemBundle
bundle, items = oneBundle(items, threshold)
bundles = append(bundles, bundle)
}
return bundles
}
func oneBundle(items []*Item, threshold int64) (*itemBundle, []*Item) {
bundle := &itemBundle{}
bundleTarSize := int64(1024) // two trailing blank 512-byte records.
for i, item := range items {
// The in-tar size of the file (512 header + rounded up to nearest 512).
tarSize := (item.Size + 1023) & ^511
if i > 0 && bundleTarSize+tarSize > threshold {
return bundle, items[i:]
}
bundle.items = items[:i+1]
bundle.itemSize += item.Size
bundleTarSize += tarSize
}
return bundle, nil
}
// Digest returns the hash and total size of the tar constructed from the
// bundle's items.
func (b *itemBundle) Digest(h crypto.Hash) (isolated.HexDigest, int64, error) {
a := h.New()
cw := &iotools.CountingWriter{Writer: a}
if err := b.writeTar(cw); err != nil {
return "", 0, err
}
return isolated.Sum(a), cw.Count, nil
}
// Contents returns an io.ReadCloser containing the tar's contents.
func (b *itemBundle) Contents() (io.ReadCloser, error) {
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(b.writeTar(pw))
}()
return pr, nil
}
func (b *itemBundle) writeTar(w io.Writer) error {
tw := tar.NewWriter(w)
for _, item := range b.items {
if err := tw.WriteHeader(&tar.Header{
Name: item.RelPath,
Mode: int64(item.Mode),
Typeflag: tar.TypeReg,
Size: item.Size,
}); err != nil {
return err
}
file, err := osOpen(item.Path)
if err != nil {
return err
}
_, err = io.Copy(tw, file)
file.Close()
if err != nil {
return err
}
}
return tw.Close()
}
// itemByPath implements sort.Interface through path-based comparison.
type itemByPath []*Item
func (s itemByPath) Len() int {
return len(s)
}
func (s itemByPath) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s itemByPath) Less(i, j int) bool {
return s[i].RelPath < s[j].RelPath
}