Merge pull request #1025 from mcuadros/eoie

plumbing: format/index: support for EOIE extension
diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go
index 0d13066..2166e0a 100644
--- a/plumbing/format/packfile/packfile.go
+++ b/plumbing/format/packfile/packfile.go
@@ -114,62 +114,6 @@
 	return h, err
 }
 
-func (p *Packfile) getObjectData(
-	h *ObjectHeader,
-) (typ plumbing.ObjectType, size int64, err error) {
-	switch h.Type {
-	case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
-		typ = h.Type
-		size = h.Length
-	case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
-		buf := bufPool.Get().(*bytes.Buffer)
-		buf.Reset()
-		defer bufPool.Put(buf)
-
-		_, _, err = p.s.NextObject(buf)
-		if err != nil {
-			return
-		}
-
-		delta := buf.Bytes()
-		_, delta = decodeLEB128(delta) // skip src size
-		sz, _ := decodeLEB128(delta)
-		size = int64(sz)
-
-		var offset int64
-		if h.Type == plumbing.REFDeltaObject {
-			offset, err = p.FindOffset(h.Reference)
-			if err != nil {
-				return
-			}
-		} else {
-			offset = h.OffsetReference
-		}
-
-		if baseType, ok := p.offsetToType[offset]; ok {
-			typ = baseType
-		} else {
-			if _, err = p.s.SeekFromStart(offset); err != nil {
-				return
-			}
-
-			h, err = p.nextObjectHeader()
-			if err != nil {
-				return
-			}
-
-			typ, _, err = p.getObjectData(h)
-			if err != nil {
-				return
-			}
-		}
-	default:
-		err = ErrInvalidObject.AddDetails("type %q", h.Type)
-	}
-
-	return
-}
-
 func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
 	switch h.Type {
 	case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go
index 28582b5..5a62d63 100644
--- a/plumbing/format/packfile/parser.go
+++ b/plumbing/format/packfile/parser.go
@@ -38,15 +38,14 @@
 // Parser decodes a packfile and calls any observer associated to it. Is used
 // to generate indexes.
 type Parser struct {
-	storage          storer.EncodedObjectStorer
-	scanner          *Scanner
-	count            uint32
-	oi               []*objectInfo
-	oiByHash         map[plumbing.Hash]*objectInfo
-	oiByOffset       map[int64]*objectInfo
-	hashOffset       map[plumbing.Hash]int64
-	pendingRefDeltas map[plumbing.Hash][]*objectInfo
-	checksum         plumbing.Hash
+	storage    storer.EncodedObjectStorer
+	scanner    *Scanner
+	count      uint32
+	oi         []*objectInfo
+	oiByHash   map[plumbing.Hash]*objectInfo
+	oiByOffset map[int64]*objectInfo
+	hashOffset map[plumbing.Hash]int64
+	checksum   plumbing.Hash
 
 	cache *cache.BufferLRU
 	// delta content by offset, only used if source is not seekable
@@ -78,13 +77,12 @@
 	}
 
 	return &Parser{
-		storage:          storage,
-		scanner:          scanner,
-		ob:               ob,
-		count:            0,
-		cache:            cache.NewBufferLRUDefault(),
-		pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo),
-		deltas:           deltas,
+		storage: storage,
+		scanner: scanner,
+		ob:      ob,
+		count:   0,
+		cache:   cache.NewBufferLRUDefault(),
+		deltas:  deltas,
 	}, nil
 }
 
@@ -150,10 +148,6 @@
 		return plumbing.ZeroHash, err
 	}
 
-	if len(p.pendingRefDeltas) > 0 {
-		return plumbing.ZeroHash, ErrReferenceDeltaNotFound
-	}
-
 	if err := p.onFooter(p.checksum); err != nil {
 		return plumbing.ZeroHash, err
 	}
@@ -205,18 +199,21 @@
 			parent.Children = append(parent.Children, ota)
 		case plumbing.REFDeltaObject:
 			delta = true
-
 			parent, ok := p.oiByHash[oh.Reference]
-			if ok {
-				ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
-				parent.Children = append(parent.Children, ota)
-			} else {
-				ota = newBaseObject(oh.Offset, oh.Length, t)
-				p.pendingRefDeltas[oh.Reference] = append(
-					p.pendingRefDeltas[oh.Reference],
-					ota,
-				)
+			if !ok {
+				// can't find referenced object in this pack file
+				// this must be a "thin" pack.
+				parent = &objectInfo{ //Placeholder parent
+					SHA1:        oh.Reference,
+					ExternalRef: true, // mark as an external reference that must be resolved
+					Type:        plumbing.AnyObject,
+					DiskType:    plumbing.AnyObject,
+				}
+				p.oiByHash[oh.Reference] = parent
 			}
+			ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
+			parent.Children = append(parent.Children, ota)
+
 		default:
 			ota = newBaseObject(oh.Offset, oh.Length, t)
 		}
@@ -297,16 +294,20 @@
 	return nil
 }
 
-func (p *Parser) get(o *objectInfo) ([]byte, error) {
-	b, ok := p.cache.Get(o.Offset)
+func (p *Parser) get(o *objectInfo) (b []byte, err error) {
+	var ok bool
+	if !o.ExternalRef { // skip cache check for placeholder parents
+		b, ok = p.cache.Get(o.Offset)
+	}
+
 	// If it's not on the cache and is not a delta we can try to find it in the
-	// storage, if there's one.
+	// storage, if there's one. External refs must enter here.
 	if !ok && p.storage != nil && !o.Type.IsDelta() {
-		var err error
 		e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
 		if err != nil {
 			return nil, err
 		}
+		o.Type = e.Type()
 
 		r, err := e.Reader()
 		if err != nil {
@@ -323,6 +324,11 @@
 		return b, nil
 	}
 
+	if o.ExternalRef {
+		// we were not able to resolve a ref in a thin pack
+		return nil, ErrReferenceDeltaNotFound
+	}
+
 	var data []byte
 	if o.DiskType.IsDelta() {
 		base, err := p.get(o.Parent)
@@ -335,7 +341,6 @@
 			return nil, err
 		}
 	} else {
-		var err error
 		data, err = p.readData(o)
 		if err != nil {
 			return nil, err
@@ -367,14 +372,6 @@
 		return nil, err
 	}
 
-	if pending, ok := p.pendingRefDeltas[o.SHA1]; ok {
-		for _, po := range pending {
-			po.Parent = o
-			o.Children = append(o.Children, po)
-		}
-		delete(p.pendingRefDeltas, o.SHA1)
-	}
-
 	if p.storage != nil {
 		obj := new(plumbing.MemoryObject)
 		obj.SetSize(o.Size())
@@ -447,10 +444,11 @@
 }
 
 type objectInfo struct {
-	Offset   int64
-	Length   int64
-	Type     plumbing.ObjectType
-	DiskType plumbing.ObjectType
+	Offset      int64
+	Length      int64
+	Type        plumbing.ObjectType
+	DiskType    plumbing.ObjectType
+	ExternalRef bool // indicates this is an external reference in a thin pack file
 
 	Crc32 uint32
 
diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go
index 012a140..6e7c84b 100644
--- a/plumbing/format/packfile/parser_test.go
+++ b/plumbing/format/packfile/parser_test.go
@@ -1,10 +1,13 @@
 package packfile_test
 
 import (
+	"io"
 	"testing"
 
+	git "gopkg.in/src-d/go-git.v4"
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
+	"gopkg.in/src-d/go-git.v4/plumbing/storer"
 
 	. "gopkg.in/check.v1"
 	"gopkg.in/src-d/go-git-fixtures.v3"
@@ -74,6 +77,53 @@
 	c.Assert(obs.objects, DeepEquals, objs)
 }
 
+func (s *ParserSuite) TestThinPack(c *C) {
+
+	// Initialize an empty repository
+	fs, err := git.PlainInit(c.MkDir(), true)
+	c.Assert(err, IsNil)
+
+	// Try to parse a thin pack without having the required objects in the repo to
+	// see if the correct errors are returned
+	thinpack := fixtures.ByTag("thinpack").One()
+	scanner := packfile.NewScanner(thinpack.Packfile())
+	parser, err := packfile.NewParserWithStorage(scanner, fs.Storer) // ParserWithStorage writes to the storer all parsed objects!
+	c.Assert(err, IsNil)
+
+	_, err = parser.Parse()
+	c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+
+	// start over with a clean repo
+	fs, err = git.PlainInit(c.MkDir(), true)
+	c.Assert(err, IsNil)
+
+	// Now unpack a base packfile into our empty repo:
+	f := fixtures.ByURL("https://github.com/spinnaker/spinnaker.git").One()
+	w, err := fs.Storer.(storer.PackfileWriter).PackfileWriter()
+	c.Assert(err, IsNil)
+	_, err = io.Copy(w, f.Packfile())
+	c.Assert(err, IsNil)
+	w.Close()
+
+	// Check that the test object that will come with our thin pack is *not* in the repo
+	_, err = fs.Storer.EncodedObject(plumbing.CommitObject, thinpack.Head)
+	c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+
+	// Now unpack the thin pack:
+	scanner = packfile.NewScanner(thinpack.Packfile())
+	parser, err = packfile.NewParserWithStorage(scanner, fs.Storer) // ParserWithStorage writes to the storer all parsed objects!
+	c.Assert(err, IsNil)
+
+	h, err := parser.Parse()
+	c.Assert(err, IsNil)
+	c.Assert(h, Equals, plumbing.NewHash("1288734cbe0b95892e663221d94b95de1f5d7be8"))
+
+	// Check that our test object is now accessible
+	_, err = fs.Storer.EncodedObject(plumbing.CommitObject, thinpack.Head)
+	c.Assert(err, IsNil)
+
+}
+
 type observerObject struct {
 	hash   string
 	otype  plumbing.ObjectType
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 6cd2d4c..57dcbb4 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -61,6 +61,11 @@
 	return nil
 }
 
+// Reindex indexes again all packfiles. Useful if git changed packfiles externally
+func (s *ObjectStorage) Reindex() {
+	s.index = nil
+}
+
 func (s *ObjectStorage) loadIdxFile(h plumbing.Hash) (err error) {
 	f, err := s.dir.ObjectPackIdx(h)
 	if err != nil {
diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go
index 4e6bbfb..77eb31d 100644
--- a/storage/filesystem/object_test.go
+++ b/storage/filesystem/object_test.go
@@ -1,8 +1,11 @@
 package filesystem
 
 import (
+	"fmt"
+	"io"
 	"io/ioutil"
 	"os"
+	"path/filepath"
 	"testing"
 
 	"gopkg.in/src-d/go-git.v4/plumbing"
@@ -204,6 +207,59 @@
 	})
 }
 
+func copyFile(c *C, dstDir, dstFilename string, srcFile *os.File) {
+	_, err := srcFile.Seek(0, 0)
+	c.Assert(err, IsNil)
+
+	err = os.MkdirAll(dstDir, 0750|os.ModeDir)
+	c.Assert(err, IsNil)
+
+	dst, err := os.OpenFile(filepath.Join(dstDir, dstFilename), os.O_CREATE|os.O_WRONLY, 0666)
+	c.Assert(err, IsNil)
+	defer dst.Close()
+
+	_, err = io.Copy(dst, srcFile)
+	c.Assert(err, IsNil)
+}
+
+// TestPackfileReindex tests that externally-added packfiles are considered by go-git
+// after calling the Reindex method
+func (s *FsSuite) TestPackfileReindex(c *C) {
+	// obtain a standalone packfile that is not part of any other repository
+	// in the fixtures:
+	packFixture := fixtures.ByTag("packfile").ByTag("standalone").One()
+	packFile := packFixture.Packfile()
+	idxFile := packFixture.Idx()
+	packFilename := packFixture.PackfileHash.String()
+	testObjectHash := plumbing.NewHash("a771b1e94141480861332fd0e4684d33071306c6") // this is an object we know exists in the standalone packfile
+	fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
+		fs := f.DotGit()
+		storer := NewStorage(fs, cache.NewObjectLRUDefault())
+
+		// check that our test object is NOT found
+		_, err := storer.EncodedObject(plumbing.CommitObject, testObjectHash)
+		c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+
+		// add the external packfile+idx to the packs folder
+		// this simulates a git bundle unbundle command, or a repack, for example.
+		copyFile(c, filepath.Join(storer.Filesystem().Root(), "objects", "pack"),
+			fmt.Sprintf("pack-%s.pack", packFilename), packFile)
+		copyFile(c, filepath.Join(storer.Filesystem().Root(), "objects", "pack"),
+			fmt.Sprintf("pack-%s.idx", packFilename), idxFile)
+
+		// check that we cannot still retrieve the test object
+		_, err = storer.EncodedObject(plumbing.CommitObject, testObjectHash)
+		c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+
+		storer.Reindex() // actually reindex
+
+		// Now check that the test object can be retrieved
+		_, err = storer.EncodedObject(plumbing.CommitObject, testObjectHash)
+		c.Assert(err, IsNil)
+
+	})
+}
+
 func (s *FsSuite) TestPackfileIterKeepDescriptors(c *C) {
 	fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
 		fs := f.DotGit()