Improve gitiles.Log.

* implement paging
* add docs
* add tests

BUG=646067

Review-Url: https://codereview.chromium.org/2983513002
diff --git a/common/api/gitiles/gitiles.go b/common/api/gitiles/gitiles.go
index 39f71f5..9d9ec0b 100644
--- a/common/api/gitiles/gitiles.go
+++ b/common/api/gitiles/gitiles.go
@@ -24,8 +24,10 @@
 	"strings"
 	"time"
 
-	"github.com/luci/luci-go/server/auth"
+	"github.com/luci/luci-go/common/errors"
+	"github.com/luci/luci-go/common/retry/transient"
 	"golang.org/x/net/context"
+	"golang.org/x/net/context/ctxhttp"
 )
 
 // User is the author or the committer returned from gitiles.
@@ -50,15 +52,15 @@
 	Message   string   `json:"message"`
 }
 
-// LogResponse is the JSON response from querying gitiles for a log request.
-type LogResponse struct {
-	Log  []Commit `json:"log"`
-	Next string   `json:"next"`
+// ValidateRepoURL validates gitiles repository URL for use in this package.
+func ValidateRepoURL(repoURL string) error {
+	_, err := NormalizeRepoURL(repoURL)
+	return err
 }
 
-// fixURL validates and normalizes a repoURL and treeish, and returns the
-// log JSON gitiles URL.
-func fixURL(repoURL, treeish string) (string, error) {
+// NormalizeRepoURL returns canonical for gitiles URL of the repo including "a/" path prefix.
+// error is returned if validation fails.
+func NormalizeRepoURL(repoURL string) (string, error) {
 	u, err := url.Parse(repoURL)
 	if err != nil {
 		return "", err
@@ -67,46 +69,134 @@
 		return "", fmt.Errorf("%s should start with https://", repoURL)
 	}
 	if !strings.HasSuffix(u.Host, ".googlesource.com") {
-		return "", fmt.Errorf("Only .googlesource.com repos supported")
+		return "", errors.New("only .googlesource.com repos supported")
 	}
-	// Use the authenticated URL
-	u.Path = "a/" + u.Path
-	URL := fmt.Sprintf("%s/+log/%s?format=JSON", u.String(), treeish)
-	return URL, nil
+	if u.Fragment != "" {
+		return "", errors.New("no fragments allowed in repoURL")
+	}
+	if u.Path == "" || u.Path == "/" {
+		return "", errors.New("path to repo is empty")
+	}
+	if !strings.HasPrefix(u.Path, "/") {
+		u.Path = "/" + u.Path
+	}
+	if !strings.HasPrefix(u.Path, "/a/") {
+		// Use the authenticated URL
+		u.Path = "/a" + u.Path
+	}
+
+	u.Path = strings.TrimRight(u.Path, "/")
+	u.Path = strings.TrimSuffix(u.Path, ".git")
+	return u.String(), nil
 }
 
-// Log returns a list of commits based on a repo and treeish (usually
-// a branch). This should be equivilent of a "git log <treeish>" call in
-// that repository.
-func Log(c context.Context, repoURL, treeish string, limit int) ([]Commit, error) {
-	// TODO(hinoka): Respect the limit.
-	URL, err := fixURL(repoURL, treeish)
+// Client is Gitiles client.
+type Client struct {
+	Client *http.Client
+
+	// Used for testing only.
+	mockRepoURL string
+}
+
+// Log returns a list of commits based on a repo and treeish.
+// This should be equivalent of a "git log <treeish>" call in that repository.
+//
+// treeish can be either:
+//   (1) a git revision as 40-char string or its prefix so long as its unique in repo.
+//   (2) a ref such as "refs/heads/branch" or just "branch"
+//   (3) a ref defined as n-th parent of R in the form "R~n".
+//       For example, "master~2" or "deadbeef~1".
+//   (4) a range between two revisions in the form "CHILD..PREDECESSOR", where
+//       CHILD and PREDECESSOR are each specified in either (1), (2) or (3)
+//       formats listed above.
+//       For example, "foo..ba1", "master..refs/branch-heads/1.2.3",
+//         or even "master~5..master~9".
+//
+//
+// If the returned log has a commit with 2+ parents, the order of commits after
+// that is whatever Gitiles returns, which currently means ordered
+// by topological sort first, and then by commit timestamps.
+//
+// This means that if Log(C) contains commit A, Log(A) will not necessarily return
+// a subsequence of Log(C) (though definitely a subset). For example,
+//
+//     common... -> base ------> A ----> C
+//                       \            /
+//                        --> B ------
+//
+//     ----commit timestamp increases--->
+//
+//  Log(A) = [A, base, common...]
+//  Log(B) = [B, base, common...]
+//  Log(C) = [C, A, B, base, common...]
+//
+func (c *Client) Log(ctx context.Context, repoURL, treeish string, limit int) ([]Commit, error) {
+	repoURL, err := NormalizeRepoURL(repoURL)
 	if err != nil {
 		return nil, err
 	}
-	t, err := auth.GetRPCTransport(c, auth.AsSelf, auth.WithScopes(
-		"https://www.googleapis.com/auth/gerritcodereview",
-	))
-	if err != nil {
+	if limit < 1 {
+		return nil, fmt.Errorf("limit must be at least 1, but %d provided", limit)
+	}
+	subPath := fmt.Sprintf("+log/%s?format=JSON", url.PathEscape(treeish))
+	resp := &logResponse{}
+	if err := c.get(ctx, repoURL, subPath, resp); err != nil {
 		return nil, err
 	}
-	client := http.Client{Transport: t}
-	r, err := client.Get(URL)
-	if err != nil {
-		return nil, err
+	result := resp.Log
+	for {
+		if resp.Next == "" || len(result) >= limit {
+			if len(result) > limit {
+				result = result[:limit]
+			}
+			return result, nil
+		}
+		nextPath := subPath + "&s=" + resp.Next
+		resp = &logResponse{}
+		if err := c.get(ctx, repoURL, nextPath, resp); err != nil {
+			return nil, err
+		}
+		result = append(result, resp.Log...)
 	}
-	if r.StatusCode != 200 {
-		return nil, fmt.Errorf("Failed to fetch %s, status code %d", URL, r.StatusCode)
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// logResponse is the JSON response from querying gitiles for a log request.
+type logResponse struct {
+	Log  []Commit `json:"log"`
+	Next string   `json:"next"`
+}
+
+func (c *Client) get(ctx context.Context, repoURL, subPath string, result interface{}) error {
+	URL := fmt.Sprintf("%s/%s", repoURL, subPath)
+	if c.mockRepoURL != "" {
+		URL = fmt.Sprintf("%s/%s", c.mockRepoURL, subPath)
+	}
+	r, err := ctxhttp.Get(ctx, c.Client, URL)
+	if err != nil {
+		return transient.Tag.Apply(err)
 	}
 	defer r.Body.Close()
+	if r.StatusCode != 200 {
+		err = fmt.Errorf("failed to fetch %s, status code %d", URL, r.StatusCode)
+		if r.StatusCode >= 500 {
+			// TODO(tandrii): consider retrying.
+			err = transient.Tag.Apply(err)
+		}
+		return err
+	}
 	// Strip out the jsonp header, which is ")]}'"
 	trash := make([]byte, 4)
-	r.Body.Read(trash) // Read the jsonp header
-	commits := LogResponse{}
-	if err := json.NewDecoder(r.Body).Decode(&commits); err != nil {
-		return nil, err
+	cnt, err := r.Body.Read(trash)
+	if err != nil {
+		return errors.Annotate(err, "unexpected response from Gitiles").Err()
 	}
-	// TODO(hinoka): If there is a page and we have gotten less than the limit,
-	// keep making requests for the next page until we have enough commits.
-	return commits.Log, nil
+	if cnt != 4 || ")]}'" != string(trash) {
+		return errors.New("unexpected response from Gitiles")
+	}
+	if err = json.NewDecoder(r.Body).Decode(result); err != nil {
+		return errors.Annotate(err, "failed to decode Gitiles response into %T", result).Err()
+	}
+	return nil
 }
diff --git a/common/api/gitiles/gitiles_test.go b/common/api/gitiles/gitiles_test.go
new file mode 100644
index 0000000..b5e2290
--- /dev/null
+++ b/common/api/gitiles/gitiles_test.go
@@ -0,0 +1,179 @@
+// Copyright 2017 The LUCI Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gitiles
+
+import (
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"golang.org/x/net/context"
+
+	. "github.com/smartystreets/goconvey/convey"
+)
+
+func TestRepoURL(t *testing.T) {
+	t.Parallel()
+	Convey("Malformed", t, func() {
+		f := func(arg string) {
+			So(ValidateRepoURL(arg), ShouldNotBeNil)
+			_, err := NormalizeRepoURL(arg)
+			So(err, ShouldNotBeNil)
+		}
+
+		f("wtf/\\is\this")
+		f("https://example.com/repo.git")
+		f("http://bad-protocol.googlesource.com/repo.git")
+		f("https://a.googlesource.com")
+		f("https://a.googlesource.com/")
+		f("a.googlesource.com/no-protocol.git")
+		f("https://a.googlesource.com/no-protocol#fragment")
+	})
+
+	Convey("OK", t, func() {
+		f := func(arg, exp string) {
+			So(ValidateRepoURL(arg), ShouldBeNil)
+			act, err := NormalizeRepoURL(arg)
+			So(err, ShouldBeNil)
+			So(act, ShouldEqual, exp)
+		}
+
+		f("https://chromium.googlesource.com/repo.git",
+			"https://chromium.googlesource.com/a/repo")
+		f("https://chromium.googlesource.com/repo/",
+			"https://chromium.googlesource.com/a/repo")
+		f("https://chromium.googlesource.com/a/repo",
+			"https://chromium.googlesource.com/a/repo")
+		f("https://chromium.googlesource.com/parent/repo.git/",
+			"https://chromium.googlesource.com/a/parent/repo")
+	})
+}
+
+func TestLog(t *testing.T) {
+	t.Parallel()
+	ctx := context.Background()
+
+	Convey("Log with bad URL", t, func() {
+		srv, c := newMockClient(func(w http.ResponseWriter, r *http.Request) {})
+		defer srv.Close()
+		_, err := c.Log(ctx, "bad://repo.git/", "master", 10)
+		So(err, ShouldNotBeNil)
+	})
+
+	Convey("Log w/o pages", t, func() {
+		srv, c := newMockClient(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(200)
+			w.Header().Set("Content-Type", "application/json")
+			fmt.Fprintf(w, ")]}'\n{\"log\": [%s, %s]}\n", fake_commit1_str, fake_commit2_str)
+		})
+		defer srv.Close()
+
+		Convey("Return All", func() {
+			commits, err := c.Log(ctx, "https://c.googlesource.com/repo",
+				"master..8de6836858c99e48f3c58164ab717bda728e95dd", 10)
+			So(err, ShouldBeNil)
+			So(len(commits), ShouldEqual, 2)
+			So(commits[0].Author.Name, ShouldEqual, "Author 1")
+			So(commits[1].Commit, ShouldEqual, "dc1dbf1aa56e4dd4cbfaab61c4d30a35adce5f40")
+		})
+
+		Convey("DO not exceed limit", func() {
+			commits, err := c.Log(ctx, "https://c.googlesource.com/repo",
+				"master..8de6836858c99e48f3c58164ab717bda728e95dd", 1)
+			So(err, ShouldBeNil)
+			So(len(commits), ShouldEqual, 1)
+			So(commits[0].Author.Name, ShouldEqual, "Author 1")
+		})
+	})
+
+	Convey("Log Paging", t, func() {
+		cursor_sent := ""
+		srv, c := newMockClient(func(w http.ResponseWriter, r *http.Request) {
+			w.WriteHeader(200)
+			w.Header().Set("Content-Type", "application/json")
+			if next := r.URL.Query().Get("s"); next == "" {
+				fmt.Fprintf(w, ")]}'\n{\"log\": [%s], \"next\": \"next_cursor_value\"}\n", fake_commit1_str)
+			} else {
+				cursor_sent = next
+				fmt.Fprintf(w, ")]}'\n{\"log\": [%s]}\n", fake_commit2_str)
+			}
+		})
+		defer srv.Close()
+
+		Convey("Page till no cursor", func() {
+			commits, err := c.Log(ctx, "https://c.googlesource.com/repo",
+				"master..8de6836858c99e48f3c58164ab717bda728e95dd", 10)
+			So(err, ShouldBeNil)
+			So(cursor_sent, ShouldEqual, "next_cursor_value")
+			So(len(commits), ShouldEqual, 2)
+			So(commits[0].Author.Name, ShouldEqual, "Author 1")
+			So(commits[1].Commit, ShouldEqual, "dc1dbf1aa56e4dd4cbfaab61c4d30a35adce5f40")
+		})
+
+		Convey("Page till limit", func() {
+			commits, err := c.Log(ctx, "https://c.googlesource.com/repo",
+				"master", 1)
+			So(err, ShouldBeNil)
+			So(cursor_sent, ShouldEqual, "")
+			So(len(commits), ShouldEqual, 1)
+			So(commits[0].Author.Name, ShouldEqual, "Author 1")
+		})
+	})
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+var (
+	fake_commit1_str = `{
+		"commit": "0b2c5409e58a71c691b05454b55cc5580cc822d1",
+		"tree": "3c6f95bc757698cd6aca3c49f88f640fd145ea69",
+		"parents": [ "dc1dbf1aa56e4dd4cbfaab61c4d30a35adce5f40" ],
+		"author": {
+			"name": "Author 1",
+			"email": "author1@example.com",
+			"time": "Mon Jul 17 15:02:43 2017"
+		},
+		"committer": {
+			"name": "Commit Bot",
+			"email": "commit-bot@chromium.org",
+			"time": "Mon Jul 17 15:02:43 2017"
+		},
+		"message": "Import wpt@d96d68ed964f9bfc2bb248c2d2fab7a8870dc685\\n\\nCr-Commit-Position: refs/heads/master@{#487078}"
+	}`
+	fake_commit2_str = `{
+		"commit": "dc1dbf1aa56e4dd4cbfaab61c4d30a35adce5f40",
+		"tree": "1ba2335c07915c31597b97a8d824aecc85a996f6",
+		"parents": ["8de6836858c99e48f3c58164ab717bda728e95dd"],
+		"author": {
+			"name": "Author 2",
+			"email": "author-2@example.com",
+			"time": "Mon Jul 17 15:01:13 2017"
+		},
+		"committer": {
+			"name": "Commit Bot",
+			"email": "commit-bot@chromium.org",
+			"time": "Mon Jul 17 15:01:13 2017"
+		},
+		"message": "[Web Payments] User weak ptr in Payment Request\u0027s error callback\\n\\nBug: 742329\\nReviewed-on: https://chromium-review.googlesource.com/570982\\nCr-Commit-Position: refs/heads/master@{#487077}"
+  }`
+)
+
+////////////////////////////////////////////////////////////////////////////////
+
+func newMockClient(handler func(w http.ResponseWriter, r *http.Request)) (*httptest.Server, *Client) {
+	srv := httptest.NewServer(http.HandlerFunc(handler))
+	return srv, &Client{Client: http.DefaultClient, mockRepoURL: srv.URL}
+}
diff --git a/milo/git/history.go b/milo/git/history.go
index efcfb20..dfed1c4 100644
--- a/milo/git/history.go
+++ b/milo/git/history.go
@@ -20,6 +20,7 @@
 	"encoding/hex"
 	"fmt"
 	"io/ioutil"
+	"net/http"
 	"regexp"
 
 	"golang.org/x/net/context"
@@ -32,6 +33,7 @@
 	"github.com/luci/luci-go/common/errors"
 	"github.com/luci/luci-go/common/logging"
 	"github.com/luci/luci-go/common/proto/google"
+	"github.com/luci/luci-go/server/auth"
 
 	milo "github.com/luci/luci-go/milo/api/proto"
 )
@@ -151,7 +153,14 @@
 	ret := &milo.ConsoleGitInfo{}
 	cacheKey := fmt.Sprintf("GetHistory|%s|%s|%d", url, commitish, limit)
 	err = protoCache(c, useCache, cacheKey, ret, func() error {
-		rawEntries, err := gitiles.Log(c, url, commitish, limit)
+		t, err := auth.GetRPCTransport(c, auth.AsSelf, auth.WithScopes(
+			"https://www.googleapis.com/auth/gerritcodereview",
+		))
+		if err != nil {
+			return errors.Annotate(err, "getting RPC Transport").Err()
+		}
+		g := &gitiles.Client{Client: &http.Client{Transport: t}}
+		rawEntries, err := g.Log(c, url, commitish, limit)
 		if err != nil {
 			return errors.Annotate(err, "GetHistory").Err()
 		}
diff --git a/server/auth/config.go b/server/auth/config.go
index 079e7df..1470829 100644
--- a/server/auth/config.go
+++ b/server/auth/config.go
@@ -32,7 +32,7 @@
 // various interfaces used by the library.
 //
 // It lives in the context and must be installed there by some root middleware
-// (via ModifyContext call).
+// (via ModifyConfig call).
 type Config struct {
 	// DBProvider is a callback that returns most recent DB instance.
 	//