blob: b2d62f3c51c171e10ff237fb25f5e6acbd03341a [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
// TODO(dsymonds):
// - pax extensions
import (
"bytes"
"errors"
"io"
"io/ioutil"
"os"
"strconv"
"strings"
"time"
)
var (
ErrHeader = errors.New("archive/tar: invalid tar header")
)
const maxNanoSecondIntSize = 9
// A Reader provides sequential access to the contents of a tar archive.
// A tar archive consists of a sequence of files.
// The Next method advances to the next file in the archive (including the first),
// and then it can be treated as an io.Reader to access the file's data.
type Reader struct {
r io.Reader
err error
nb int64 // number of unread bytes for current file entry
pad int64 // amount of padding (ignored) after current file entry
}
// NewReader creates a new Reader reading from r.
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
// Next advances to the next entry in the tar archive.
func (tr *Reader) Next() (*Header, error) {
var hdr *Header
if tr.err == nil {
tr.skipUnread()
}
if tr.err != nil {
return hdr, tr.err
}
hdr = tr.readHeader()
if hdr == nil {
return hdr, tr.err
}
// Check for PAX/GNU header.
switch hdr.Typeflag {
case TypeXHeader:
// PAX extended header
headers, err := parsePAX(tr)
if err != nil {
return nil, err
}
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
hdr = tr.readHeader()
mergePAX(hdr, headers)
return hdr, nil
case TypeGNULongName:
// We have a GNU long name header. Its contents are the real file name.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, err := tr.Next()
hdr.Name = cString(realname)
return hdr, err
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, err := tr.Next()
hdr.Linkname = cString(realname)
return hdr, err
}
return hdr, tr.err
}
// mergePAX merges well known headers according to PAX standard.
// In general headers with the same name as those found
// in the header struct overwrite those found in the header
// struct with higher precision or longer values. Esp. useful
// for name and linkname fields.
func mergePAX(hdr *Header, headers map[string]string) error {
for k, v := range headers {
switch k {
case paxPath:
hdr.Name = v
case paxLinkpath:
hdr.Linkname = v
case paxGname:
hdr.Gname = v
case paxUname:
hdr.Uname = v
case paxUid:
uid, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Uid = int(uid)
case paxGid:
gid, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Gid = int(gid)
case paxAtime:
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.AccessTime = t
case paxMtime:
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.ModTime = t
case paxCtime:
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.ChangeTime = t
case paxSize:
size, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Size = int64(size)
}
}
return nil
}
// parsePAXTime takes a string of the form %d.%d as described in
// the PAX specification.
func parsePAXTime(t string) (time.Time, error) {
buf := []byte(t)
pos := bytes.IndexByte(buf, '.')
var seconds, nanoseconds int64
var err error
if pos == -1 {
seconds, err = strconv.ParseInt(t, 10, 0)
if err != nil {
return time.Time{}, err
}
} else {
seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
if err != nil {
return time.Time{}, err
}
nano_buf := string(buf[pos+1:])
// Pad as needed before converting to a decimal.
// For example .030 -> .030000000 -> 30000000 nanoseconds
if len(nano_buf) < maxNanoSecondIntSize {
// Right pad
nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
} else if len(nano_buf) > maxNanoSecondIntSize {
// Right truncate
nano_buf = nano_buf[:maxNanoSecondIntSize]
}
nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
if err != nil {
return time.Time{}, err
}
}
ts := time.Unix(seconds, nanoseconds)
return ts, nil
}
// parsePAX parses PAX headers.
// If an extended header (type 'x') is invalid, ErrHeader is returned
func parsePAX(r io.Reader) (map[string]string, error) {
buf, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
headers := make(map[string]string)
// Each record is constructed as
// "%d %s=%s\n", length, keyword, value
for len(buf) > 0 {
// or the header was empty to start with.
var sp int
// The size field ends at the first space.
sp = bytes.IndexByte(buf, ' ')
if sp == -1 {
return nil, ErrHeader
}
// Parse the first token as a decimal integer.
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
if err != nil {
return nil, ErrHeader
}
// Extract everything between the decimal and the n -1 on the
// beginning to to eat the ' ', -1 on the end to skip the newline.
var record []byte
record, buf = buf[sp+1:n-1], buf[n:]
// The first equals is guaranteed to mark the end of the key.
// Everything else is value.
eq := bytes.IndexByte(record, '=')
if eq == -1 {
return nil, ErrHeader
}
key, value := record[:eq], record[eq+1:]
headers[string(key)] = string(value)
}
return headers, nil
}
// cString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
n := 0
for n < len(b) && b[n] != 0 {
n++
}
return string(b[0:n])
}
func (tr *Reader) octal(b []byte) int64 {
// Check for binary format first.
if len(b) > 0 && b[0]&0x80 != 0 {
var x int64
for i, c := range b {
if i == 0 {
c &= 0x7f // ignore signal bit in first byte
}
x = x<<8 | int64(c)
}
return x
}
// Because unused fields are filled with NULs, we need
// to skip leading NULs. Fields may also be padded with
// spaces or NULs.
// So we remove leading and trailing NULs and spaces to
// be sure.
b = bytes.Trim(b, " \x00")
if len(b) == 0 {
return 0
}
x, err := strconv.ParseUint(cString(b), 8, 64)
if err != nil {
tr.err = err
}
return int64(x)
}
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.nb + tr.pad // number of bytes to skip
tr.nb, tr.pad = 0, 0
if sr, ok := tr.r.(io.Seeker); ok {
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
return
}
}
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
}
func (tr *Reader) verifyChecksum(header []byte) bool {
if tr.err != nil {
return false
}
given := tr.octal(header[148:156])
unsigned, signed := checksum(header)
return given == unsigned || given == signed
}
func (tr *Reader) readHeader() *Header {
header := make([]byte, blockSize)
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}
// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) {
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}
if bytes.Equal(header, zeroBlock[0:blockSize]) {
tr.err = io.EOF
} else {
tr.err = ErrHeader // zero block and then non-zero block
}
return nil
}
if !tr.verifyChecksum(header) {
tr.err = ErrHeader
return nil
}
// Unpack
hdr := new(Header)
s := slicer(header)
hdr.Name = cString(s.next(100))
hdr.Mode = tr.octal(s.next(8))
hdr.Uid = int(tr.octal(s.next(8)))
hdr.Gid = int(tr.octal(s.next(8)))
hdr.Size = tr.octal(s.next(12))
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
s.next(8) // chksum
hdr.Typeflag = s.next(1)[0]
hdr.Linkname = cString(s.next(100))
// The remainder of the header depends on the value of magic.
// The original (v7) version of tar had no explicit magic field,
// so its magic bytes, like the rest of the block, are NULs.
magic := string(s.next(8)) // contains version field as well.
var format string
switch magic {
case "ustar\x0000": // POSIX tar (1003.1-1988)
if string(header[508:512]) == "tar\x00" {
format = "star"
} else {
format = "posix"
}
case "ustar \x00": // old GNU tar
format = "gnu"
}
switch format {
case "posix", "gnu", "star":
hdr.Uname = cString(s.next(32))
hdr.Gname = cString(s.next(32))
devmajor := s.next(8)
devminor := s.next(8)
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = tr.octal(devmajor)
hdr.Devminor = tr.octal(devminor)
}
var prefix string
switch format {
case "posix", "gnu":
prefix = cString(s.next(155))
case "star":
prefix = cString(s.next(131))
hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
}
if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name
}
}
if tr.err != nil {
tr.err = ErrHeader
return nil
}
// Maximum value of hdr.Size is 64 GB (12 octal digits),
// so there's no risk of int64 overflowing.
tr.nb = int64(hdr.Size)
tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two
return hdr
}
// Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry.
func (tr *Reader) Read(b []byte) (n int, err error) {
if tr.nb == 0 {
// file consumed
return 0, io.EOF
}
if int64(len(b)) > tr.nb {
b = b[0:tr.nb]
}
n, err = tr.r.Read(b)
tr.nb -= int64(n)
if err == io.EOF && tr.nb > 0 {
err = io.ErrUnexpectedEOF
}
tr.err = err
return
}