resultdb/internal/services/artifactexporter/artifact_exporter.go - infra/luci/luci-go - Git at Google

 // Copyright 2024 The LUCI Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Package artifactexporter handles uploading artifacts to BigQuery.
 // This is the replacement of the legacy artifact exporter in bqexp
 package artifactexporter

 import (
 	"bufio"
 	"context"
 	"fmt"
 	"hash/fnv"
 	"io"
 	"sort"
 	"strings"
 	"unicode/utf8"

 	"cloud.google.com/go/spanner"
 	repb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2"
 	"google.golang.org/genproto/googleapis/bytestream"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/timestamppb"

 	"go.chromium.org/luci/common/errors"
 	"go.chromium.org/luci/common/logging"
 	"go.chromium.org/luci/common/sync/parallel"
 	"go.chromium.org/luci/common/tsmon/field"
 	"go.chromium.org/luci/common/tsmon/metric"
 	"go.chromium.org/luci/grpc/grpcutil"
 	"go.chromium.org/luci/server"
 	"go.chromium.org/luci/server/auth/realms"
 	"go.chromium.org/luci/server/span"
 	"go.chromium.org/luci/server/tq"

 	"go.chromium.org/luci/resultdb/bqutil"
 	"go.chromium.org/luci/resultdb/internal/artifactcontent"
 	"go.chromium.org/luci/resultdb/internal/artifacts"
 	"go.chromium.org/luci/resultdb/internal/config"
 	"go.chromium.org/luci/resultdb/internal/invocations"
 	"go.chromium.org/luci/resultdb/internal/spanutil"
 	"go.chromium.org/luci/resultdb/internal/tasks/taskspb"
 	"go.chromium.org/luci/resultdb/pbutil"
 	bqpb "go.chromium.org/luci/resultdb/proto/bq"
 	pb "go.chromium.org/luci/resultdb/proto/v1"
 )

 // MaxShardContentSize is the maximum content size in BQ row.
 // Artifacts content bigger than this size needs to be sharded.
 // Leave 10 KB for other fields, the rest is content.
 const MaxShardContentSize = bqutil.RowMaxBytes - 10*1024

 // MaxRBECasBatchSize is the batch size limit when we read artifact content.
 // TODO(nqmtuan): Call the Capacity API to find out the exact size limit for
 // batch operations.
 // For now, hardcode to be 2MB. It should be under the limit,
 // since BatchUpdateBlobs in BatchCreateArtifacts can handle up to 10MB.
 const MaxRBECasBatchSize = 2 * 1024 * 1024 // 2 MB

 // ArtifactRequestOverhead is the overhead (in bytes) applying to each artifact
 // when calculating the size.
 const ArtifactRequestOverhead = 100

 type Artifact struct {
 	InvocationID string
 	TestID       string
 	ResultID     string
 	ArtifactID   string
 	ContentType  string
 	Size         int64
 	RBECASHash   string
 	TestStatus   pb.TestStatus
 }

 // ExportArtifactsTask describes how to route export artifact task.
 var ExportArtifactsTask = tq.RegisterTaskClass(tq.TaskClass{
 	ID:            "export-artifacts",
 	Prototype:     &taskspb.ExportArtifacts{},
 	Kind:          tq.Transactional,
 	Queue:         "artifactexporter",
 	RoutingPrefix: "/internal/tasks/artifactexporter", // for routing to "artifactexporter" service
 })

 var (
 	ErrInvalidUTF8 = fmt.Errorf("invalid UTF-8 character")
 )

 var (
 	artifactExportCounter = metric.NewCounter(
 		"resultdb/artifacts/bqexport",
 		"The number of artifacts rows to export to BigQuery, grouped by project and status.",
 		nil,
 		// The LUCI Project.
 		field.String("project"),
 		// The status of the export.
 		// Possible values:
 		// - "success": The export was successful.
 		// - "failure_input": There was an error with the input artifact
 		// (e.g. artifact contains invalid UTF-8 character).
 		// - "failure_bq": There was an error with BigQuery (e.g. throttling, load shedding),
 		// which made the artifact failed to export.
 		field.String("status"),
 	)

 	artifactContentCounter = metric.NewCounter(
 		"resultdb/artifacts/content",
 		"The number of artifacts for a particular content type.",
 		nil,
 		// The LUCI Project.
 		field.String("project"),
 		// The status of the export.
 		// Possible values: "text", "nontext", "empty".
 		// We record the group instead of the actual value to prevent
 		// the explosion in cardinality.
 		field.String("content_type"),
 	)
 )

 // Options is artifact exporter configuration.
 type Options struct {
 	// ArtifactRBEInstance is the name of the RBE instance to use for artifact
 	// storage. Example: "projects/luci-resultdb/instances/artifacts".
 	ArtifactRBEInstance string
 }

 // BQExportClient is the interface for exporting artifacts.
 type BQExportClient interface {
 	InsertArtifactRows(ctx context.Context, rows []*bqpb.TextArtifactRow) error
 }

 type artifactExporter struct {
 	rbecasInstance string
 	// bytestreamClient is the client to stream big artifacts from RBE-CAS.
 	bytestreamClient bytestream.ByteStreamClient
 	// casClient is used to read artifacts in batch.
 	casClient repb.ContentAddressableStorageClient
 	// Client to export to BigQuery.
 	bqExportClient BQExportClient
 }

 // InitServer initializes a artifactexporter server.
 func InitServer(srv *server.Server, opts Options) error {
 	if opts.ArtifactRBEInstance == "" {
 		return errors.Reason("No rbe instance specified").Err()
 	}

 	conn, err := artifactcontent.RBEConn(srv.Context)
 	if err != nil {
 		return err
 	}

 	bqClient, err := NewClient(srv.Context, srv.Options.CloudProject)
 	if err != nil {
 		return errors.Annotate(err, "create bq export client").Err()
 	}

 	srv.RegisterCleanup(func(ctx context.Context) {
 		err := conn.Close()
 		if err != nil {
 			logging.Errorf(ctx, "Cleaning up artifact RBE connection: %s", err)
 		}
 		err = bqClient.Close()
 		if err != nil {
 			logging.Errorf(ctx, "Cleaning up BigQuery export client: %s", err)
 		}
 	})

 	ae := &artifactExporter{
 		rbecasInstance:   opts.ArtifactRBEInstance,
 		bytestreamClient: bytestream.NewByteStreamClient(conn),
 		casClient:        repb.NewContentAddressableStorageClient(conn),
 		bqExportClient:   bqClient,
 	}

 	ExportArtifactsTask.AttachHandler(func(ctx context.Context, msg proto.Message) error {
 		task := msg.(*taskspb.ExportArtifacts)
 		return ae.exportArtifacts(ctx, invocations.ID(task.InvocationId))
 	})
 	return nil
 }

 // Schedule schedules tasks for all the finalized invocations.
 func Schedule(ctx context.Context, invID invocations.ID) error {
 	return tq.AddTask(ctx, &tq.Task{
 		Title:   string(invID),
 		Payload: &taskspb.ExportArtifacts{InvocationId: string(invID)},
 	})
 }

 // exportArtifact reads all text artifacts (including artifact content
 // in RBE-CAS) for an invocation and exports to BigQuery.
 func (ae *artifactExporter) exportArtifacts(ctx context.Context, invID invocations.ID) error {
 	logging.Infof(ctx, "Exporting artifacts for invocation ID %s", invID)
 	shouldUpload, err := shouldUploadToBQ(ctx)
 	if err != nil {
 		return errors.Annotate(err, "getting config").Err()
 	}
 	if !shouldUpload {
 		logging.Infof(ctx, "Uploading to BigQuery is disabled")
 		return nil
 	}

 	ctx, cancel := span.ReadOnlyTransaction(ctx)
 	defer cancel()

 	// Get the invocation.
 	inv, err := invocations.Read(ctx, invID)
 	if err != nil {
 		return errors.Annotate(err, "error reading exported invocation").Err()
 	}
 	if inv.State != pb.Invocation_FINALIZED {
 		return errors.Reason("invocation not finalized").Err()
 	}

 	// Query for artifacts.
 	project, _ := realms.Split(inv.Realm)
 	artifacts, err := ae.queryTextArtifacts(ctx, invID, project)
 	if err != nil {
 		return errors.Annotate(err, "query text artifacts").Err()
 	}

 	logging.Infof(ctx, "Found %d text artifacts for invocation %v", len(artifacts), invID)

 	percent, err := percentOfArtifactsToBQ(ctx)
 	if err != nil {
 		return errors.Annotate(err, "read percent from config").Err()
 	}
 	artifacts, err = throttleArtifactsForBQ(artifacts, percent)
 	if err != nil {
 		return errors.Annotate(err, "throttle artifacts for bq").Err()
 	}

 	logging.Infof(ctx, "Found %d text artifact after throttling", len(artifacts))
 	if len(artifacts) == 0 {
 		return nil
 	}

 	// Get artifact content.
 	// Channel to push the artifact rows.
 	// In theory, some artifact content may be too big to
 	// fit into memory (we allow artifacts upto 640MB [1]).
 	// We need to "break" them in to sizeable chunks.
 	// [1] https://source.corp.google.com/h/chromium/infra/infra_superproject/+/main:data/k8s/projects/luci-resultdb/resultdb.star;l=307?q=max-artifact-content-stream-length
 	rowC := make(chan *bqpb.TextArtifactRow, 10)

 	err = parallel.FanOutIn(func(work chan<- func() error) {
 		// Download artifacts content and put the rows in a channel.
 		work <- func() error {
 			defer close(rowC)
 			err := ae.downloadMultipleArtifactContent(ctx, artifacts, inv, rowC, MaxShardContentSize, MaxRBECasBatchSize)
 			if err != nil {
 				return errors.Annotate(err, "download multiple artifact content").Err()
 			}
 			return nil
 		}
 		// Upload the rows to BQ.
 		work <- func() error {
 			err := ae.exportToBigQuery(ctx, rowC)
 			if err != nil {
 				return errors.Annotate(err, "export to bigquery").Err()
 			}
 			return nil
 		}
 	})

 	return err
 }

 // exportToBigQuery reads rows from channel rowC and export to BigQuery.
 // It groups the rows into chunks and appends to the default stream.
 // There is a caveat in using default stream.
 // If the exporter failed in the middle (with some data already exported to BQ),
 // and then retried, duplicated rows may be inserted into BQ.
 // Another alternative is to use pending type [1] instead of default stream,
 // which allows atomic commit operation. The problem is that it has the limit
 // of 10k CreateWriteStream limit per hour, which is not enough for the number
 // of invocations we have (1.5 mil/day).
 // Another alternative is add a field in the Artifact spanner table to mark which
 // rows have been exported.
 // [1] https://cloud.google.com/bigquery/docs/write-api#pending_type
 func (ae *artifactExporter) exportToBigQuery(ctx context.Context, rowC chan *bqpb.TextArtifactRow) error {
 	// rows contains the rows that will be sent to AppendRows.
 	rows := []*bqpb.TextArtifactRow{}
 	currentSize := 0
 	for row := range rowC {
 		// Group the rows together such that the total size of the batch
 		// does not exceed AppendRows max size.
 		// The bqutil package also does the batching, but we don't want to send
 		// a just slightly bigger group of row to it, to prevent a big batch and
 		// a tiny batch, so we make sure that the rows we send just fit in one batch.
 		rowSize := bqutil.RowSize(row)

 		// Exceed max batch size, send whatever we have.
 		if currentSize+rowSize > bqutil.RowMaxBytes {
 			err := ae.bqExportClient.InsertArtifactRows(ctx, rows)
 			if err != nil {
 				artifactExportCounter.Add(ctx, int64(len(rows)), row.Project, "failure_bq")
 				return errors.Annotate(err, "insert artifact rows").Err()
 			}
 			artifactExportCounter.Add(ctx, int64(len(rows)), row.Project, "success")
 			// Reset
 			rows = []*bqpb.TextArtifactRow{}
 			currentSize = 0
 		}

 		rows = append(rows, row)
 		currentSize += rowSize
 	}

 	// Upload the remaining rows.
 	if currentSize > 0 {
 		err := ae.bqExportClient.InsertArtifactRows(ctx, rows)
 		if err != nil {
 			artifactExportCounter.Add(ctx, int64(len(rows)), rows[0].Project, "failure_bq")
 			return errors.Annotate(err, "insert artifact rows").Err()
 		}
 		artifactExportCounter.Add(ctx, int64(len(rows)), rows[0].Project, "success")
 	}
 	return nil
 }

 // queryTextArtifacts queries all text artifacts contained in an invocation.
 // The query also join with TestResult table for test status.
 // The content of the artifact is not populated in this function,
 // this will keep the size of the slice small enough to fit in memory.
 func (ae *artifactExporter) queryTextArtifacts(ctx context.Context, invID invocations.ID, project string) ([]*Artifact, error) {
 	st := spanner.NewStatement(`
 		SELECT
 			tr.TestId,
 			tr.ResultId,
 			a.ArtifactId,
 			a.ContentType,
 			a.Size,
 			a.RBECASHash,
 			IFNULL(tr.Status, 0)
 		FROM Artifacts a
 		LEFT JOIN TestResults tr
 		ON a.InvocationId = tr.InvocationId
 		AND a.ParentId = CONCAT("tr/", tr.TestId , "/" , tr.ResultId)
 		WHERE a.InvocationId=@invID
 		ORDER BY tr.TestId, tr.ResultId, a.ArtifactId
 		`)
 	st.Params = spanutil.ToSpannerMap(map[string]any{
 		"invID": invID,
 	})

 	results := []*Artifact{}
 	it := span.Query(ctx, st)
 	var b spanutil.Buffer

 	err := it.Do(func(r *spanner.Row) error {
 		a := &Artifact{InvocationID: string(invID)}
 		err := b.FromSpanner(r, &a.TestID, &a.ResultID, &a.ArtifactID, &a.ContentType, &a.Size, &a.RBECASHash, &a.TestStatus)
 		if err != nil {
 			return errors.Annotate(err, "read row").Err()
 		}
 		if a.ContentType == "" {
 			artifactContentCounter.Add(ctx, 1, project, "empty")
 		} else {
 			if pbutil.IsTextArtifact(a.ContentType) {
 				artifactContentCounter.Add(ctx, 1, project, "text")
 				results = append(results, a)
 			} else {
 				artifactContentCounter.Add(ctx, 1, project, "nontext")
 			}
 		}

 		return nil
 	})
 	if err != nil {
 		return nil, errors.Annotate(err, "query artifact").Err()
 	}
 	return results, nil
 }

 // downloadMutipleArtifactContent downloads multiple artifact content
 // in parallel and creates TextArtifactRows for each rows.
 // The rows will be pushed in rowC.
 // Artifacts with content size smaller or equal than MaxRBECasBatchSize will be downloaded in batch.
 // Artifacts with content size bigger than MaxRBECasBatchSize will be downloaded in streaming manner.
 func (ae *artifactExporter) downloadMultipleArtifactContent(ctx context.Context, artifacts []*Artifact, inv *pb.Invocation, rowC chan *bqpb.TextArtifactRow, maxShardContentSize, maxRBECasBatchSize int) error {
 	project, _ := realms.Split(inv.Realm)

 	// Sort the artifacts by size, make it easier to do batching.
 	sort.Slice(artifacts, func(i, j int) bool {
 		return (artifacts[i].Size < artifacts[j].Size)
 	})

 	// Limit to 10 workers to avoid possible OOM.
 	return parallel.WorkPool(10, func(work chan<- func() error) {
 		// Smaller artifacts should be downloaded in batch.
 		currentBatchSize := 0
 		batch := []*Artifact{}
 		bigArtifactStartIndex := -1
 		for i, artifact := range artifacts {
 			if artifactSizeWithOverhead(artifact.Size) > maxRBECasBatchSize {
 				bigArtifactStartIndex = i
 				break
 			}
 			// Exceed batch limit, download whatever in batch.
 			if currentBatchSize+artifactSizeWithOverhead(artifact.Size) > maxRBECasBatchSize {
 				b := batch
 				work <- func() error {
 					err := ae.batchDownloadArtifacts(ctx, b, inv, rowC)
 					if err != nil {
 						return errors.Annotate(err, "batch download artifacts").Err()
 					}
 					return nil
 				}
 				// Reset
 				currentBatchSize = 0
 				batch = []*Artifact{}
 			}
 			batch = append(batch, artifact)
 			// Add ArtifactRequestOverhead bytes for the overhead of the request.
 			currentBatchSize += artifactSizeWithOverhead(artifact.Size)
 		}

 		// Download whatever left in batch.
 		if len(batch) > 0 {
 			b := batch
 			work <- func() error {
 				err := ae.batchDownloadArtifacts(ctx, b, inv, rowC)
 				if err != nil {
 					return errors.Annotate(err, "batch download artifacts").Err()
 				}
 				return nil
 			}
 		}

 		// Download the big artifacts in streaming manner.
 		if bigArtifactStartIndex > -1 {
 			for i := bigArtifactStartIndex; i < len(artifacts); i++ {
 				artifact := artifacts[i]
 				work <- func() error {
 					err := ae.streamArtifactContent(ctx, artifact, inv, rowC, maxShardContentSize)
 					if err != nil {
 						// We don't want to retry on this error. Just log a warning.
 						if errors.Is(err, ErrInvalidUTF8) {
 							logging.Fields{
 								"InvocationID": artifact.InvocationID,
 								"TestID":       artifact.TestID,
 								"ResultID":     artifact.ResultID,
 								"ArtifactID":   artifact.ArtifactID,
 							}.Warningf(ctx, "Test result artifact has invalid UTF-8")
 							artifactExportCounter.Add(ctx, 1, project, "failure_input")
 						} else {
 							return errors.Annotate(err, "download artifact content inv_id = %q test id =%q result_id=%q artifact_id = %q", artifact.InvocationID, artifact.TestID, artifact.ResultID, artifact.ArtifactID).Err()
 						}
 					}
 					return nil
 				}
 			}
 		}
 	})
 }

 func artifactSizeWithOverhead(artifactSize int64) int {
 	return int(artifactSize) + ArtifactRequestOverhead
 }

 // batchDownloadArtifacts downloads artifact content from RBE-CAS in batch.
 // It generates one or more TextArtifactRows for the content and push in rowC.
 func (ae *artifactExporter) batchDownloadArtifacts(ctx context.Context, batch []*Artifact, inv *pb.Invocation, rowC chan *bqpb.TextArtifactRow) error {
 	logging.Infof(ctx, "batch download artifacts for %d artifacts", len(batch))
 	req := &repb.BatchReadBlobsRequest{InstanceName: ae.rbecasInstance}
 	for _, artifact := range batch {
 		req.Digests = append(req.Digests, &repb.Digest{
 			Hash:      artifacts.TrimHashPrefix(artifact.RBECASHash),
 			SizeBytes: artifact.Size,
 		})
 	}
 	resp, err := ae.casClient.BatchReadBlobs(ctx, req)
 	if err != nil {
 		// The Invalid argument or ResourceExhausted suggest something is wrong with the
 		// input, so we should not retry in this case.
 		// ResourceExhausted may happen when we try to download more than the capacity.
 		if grpcutil.Code(err) == codes.InvalidArgument {
 			logging.Errorf(ctx, "BatchReadBlobs: invalid argument for invocation %q. Error: %v", inv.Name, err.Error())
 			return nil
 		}
 		if grpcutil.Code(err) == codes.ResourceExhausted {
 			logging.Errorf(ctx, "BatchReadBlobs: resource exhausted for invocation %q. Error: %v", inv.Name, err.Error())
 			return nil
 		}
 		return errors.Annotate(err, "batch read blobs").Err()
 	}
 	project, realm := realms.Split(inv.Realm)
 	for i, r := range resp.GetResponses() {
 		artifact := batch[i]
 		c := codes.Code(r.GetStatus().GetCode())
 		loggingFields := logging.Fields{
 			"InvocationID": artifact.InvocationID,
 			"TestID":       artifact.TestID,
 			"ResultID":     artifact.ResultID,
 			"ArtifactID":   artifact.ArtifactID,
 		}
 		// It's no use to retry if we get those code.
 		// We'll just log the error.
 		if c == codes.InvalidArgument {
 			loggingFields.Warningf(ctx, "Invalid artifact")
 			artifactExportCounter.Add(ctx, 1, project, "failure_input")
 			continue
 		}
 		if c == codes.NotFound {
 			loggingFields.Warningf(ctx, "Not found artifact")
 			artifactExportCounter.Add(ctx, 1, project, "failure_input")
 			continue
 		}

 		// Perhaps something is wrong with RBE, we should retry.
 		if c != codes.OK {
 			loggingFields.Errorf(ctx, "Error downloading artifact. Code = %v message = %q", c, r.Status.GetMessage())
 			return errors.Reason("downloading artifact").Err()
 		}
 		// Check data, make sure it is of valid UTF-8 format.
 		if !utf8.Valid(r.Data) {
 			loggingFields.Warningf(ctx, "Invalid UTF-8 content")
 			artifactExportCounter.Add(ctx, 1, project, "failure_input")
 			continue
 		}
 		// Everything is ok, send to rowC.
 		// This is guaranteed to be small artifact, so we need only 1 shard.
 		row := &bqpb.TextArtifactRow{
 			Project:             project,
 			Realm:               realm,
 			InvocationId:        string(artifact.InvocationID),
 			TestId:              artifact.TestID,
 			ResultId:            artifact.ResultID,
 			ArtifactId:          artifact.ArtifactID,
 			ContentType:         artifact.ContentType,
 			Content:             string(r.Data),
 			ArtifactContentSize: int32(artifact.Size),
 			ShardContentSize:    int32(artifact.Size),
 			TestStatus:          testStatusToString(artifact.TestStatus),
 			PartitionTime:       timestamppb.New(inv.CreateTime.AsTime()),
 			ArtifactShard:       fmt.Sprintf("%s:0", artifact.ArtifactID),
 		}
 		rowC <- row
 	}
 	return nil
 }

 // streamArtifactContent downloads artifact content from RBE-CAS in streaming manner.
 // It generates one or more TextArtifactRows for the content and push in rowC.
 func (ae *artifactExporter) streamArtifactContent(ctx context.Context, a *Artifact, inv *pb.Invocation, rowC chan *bqpb.TextArtifactRow, maxShardContentSize int) error {
 	ac := artifactcontent.Reader{
 		RBEInstance: ae.rbecasInstance,
 		Hash:        a.RBECASHash,
 		Size:        a.Size,
 	}

 	var str strings.Builder
 	shardID := 0
 	project, realm := realms.Split(inv.Realm)
 	input := func() *bqpb.TextArtifactRow {
 		return &bqpb.TextArtifactRow{
 			Project:             project,
 			Realm:               realm,
 			InvocationId:        string(a.InvocationID),
 			TestId:              a.TestID,
 			ResultId:            a.ResultID,
 			ArtifactId:          a.ArtifactID,
 			ShardId:             int32(shardID),
 			ContentType:         a.ContentType,
 			Content:             str.String(),
 			ArtifactContentSize: int32(a.Size),
 			ShardContentSize:    int32(str.Len()),
 			TestStatus:          testStatusToString(a.TestStatus),
 			PartitionTime:       timestamppb.New(inv.CreateTime.AsTime()),
 			ArtifactShard:       fmt.Sprintf("%s:%d", a.ArtifactID, shardID),
 			// We don't populated numshards here because we don't know
 			// exactly how many shards we need until we finish scanning.
 			// Still we are not sure if this field is useful (e.g. from bigquery, we can
 			// just query for all artifacts and get the max shardID).
 			NumShards: 0,
 		}
 	}

 	err := ac.DownloadRBECASContent(ctx, ae.bytestreamClient, func(ctx context.Context, pr io.Reader) error {
 		sc := bufio.NewScanner(pr)

 		// Read by runes, so we will know if the input contains invalid Unicode
 		// character, so we can exit early.
 		// The invalid characters will cause proto.Marshal to fail for the row,
 		// so we don't support it.
 		sc.Split(bufio.ScanRunes)

 		for sc.Scan() {
 			// Detect invalid rune. Just stop.
 			r, _ := utf8.DecodeRune(sc.Bytes())
 			if r == utf8.RuneError {
 				return ErrInvalidUTF8
 			}
 			if str.Len()+len(sc.Bytes()) > maxShardContentSize {
 				select {
 				case <-ctx.Done():
 					return ctx.Err()
 				case rowC <- input():
 				}
 				shardID++
 				str.Reset()
 			}
 			str.Write(sc.Bytes())
 		}
 		if err := sc.Err(); err != nil {
 			return errors.Annotate(err, "scanner error").Err()
 		}

 		if str.Len() > 0 {
 			select {
 			case <-ctx.Done():
 				return ctx.Err()
 			case rowC <- input():
 			}
 		}
 		return nil
 	})
 	if err != nil {
 		return errors.Annotate(err, "read artifact content").Err()
 	}
 	return err
 }

 // throttleArtifactsForBQ limits the artifacts being to BigQuery based on percentage.
 // It will allow us to roll out the feature slowly.
 func throttleArtifactsForBQ(artifacts []*Artifact, percent int) ([]*Artifact, error) {
 	results := []*Artifact{}
 	for _, artifact := range artifacts {
 		hashStr := fmt.Sprintf("%s%s", artifact.TestID, artifact.ArtifactID)
 		hashVal := hash64([]byte(hashStr)) % 100
 		// This is complement with the throttle function in BatchCreateArtifact.
 		// For example, if percent = 2, only choose hashVal = 98, 99
 		// This allows smoother roll out of the service.
 		if (hashVal + uint64(percent)) >= 100 {
 			results = append(results, artifact)
 		}
 	}
 	return results, nil
 }

 // hash64 returns a hash value (uint64) for a given string.
 func hash64(bt []byte) uint64 {
 	hasher := fnv.New64a()
 	hasher.Write(bt)
 	return hasher.Sum64()
 }

 func testStatusToString(status pb.TestStatus) string {
 	if status == pb.TestStatus_STATUS_UNSPECIFIED {
 		return ""
 	}
 	return pb.TestStatus_name[int32(status)]
 }

 // shouldUploadToBQ returns true if we should upload artifacts to BigQuery.
 func shouldUploadToBQ(ctx context.Context) (bool, error) {
 	cfg, err := config.GetServiceConfig(ctx)
 	if err != nil {
 		return false, errors.Annotate(err, "get service config").Err()
 	}
 	return cfg.GetBqArtifactExporterServiceConfig().GetEnabled(), nil
 }

 // percentOfArtifactsToBQ returns how many percents of artifact to be uploaded.
 // Return value is an integer between [0, 100].
 func percentOfArtifactsToBQ(ctx context.Context) (int, error) {
 	cfg, err := config.GetServiceConfig(ctx)
 	if err != nil {
 		return 0, errors.Annotate(err, "get service config").Err()
 	}
 	return int(cfg.GetBqArtifactExporterServiceConfig().GetExportPercent()), nil
 }