resultdb/internal/artifacts/artifact_line.go - infra/luci/luci-go - Git at Google

 // Copyright 2024 The LUCI Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package artifacts

 import (
 	"bytes"
 	"fmt"
 	"math"
 	"mime"
 	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
 	"time"

 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/timestamppb"

 	"go.chromium.org/luci/common/errors"

 	pb "go.chromium.org/luci/resultdb/proto/v1"
 )

 // ToLogLines retrieves and processes an artifact and returns
 // its content as a set of log lines.
 // It executes best effort extraction of the timestamp and the severity
 // of each log line.
 // The `year` is used as a fallback if the year was absent from
 // the log line's timestamp.
 // The max specified the maximum number of results to return,
 // if the max <= 0 it will return all lines.
 func ToLogLines(artifactID string, contentType string, content []byte, year, maxLines, maxBytes int) ([]*pb.ArtifactLine, error) {

 	isSupported := isLogSupportedArtifact(artifactID, contentType)

 	if !isSupported {
 		return nil, errors.New("unsupported file type")
 	}

 	linesContent := bytes.Split(content, []byte("\n"))

 	limit := len(linesContent)
 	if maxLines < limit && maxLines > 0 {
 		limit = maxLines
 	}

 	ret := make([]*pb.ArtifactLine, 0, limit)

 	var totalSize int

 	for index, lineContent := range linesContent[:limit] {
 		lineContentStr := string(lineContent)
 		timestamp, _ := extractTimestamp(lineContentStr, year)
 		severity := extractSeverity(lineContentStr)
 		line := &pb.ArtifactLine{
 			Number:    int64(index) + 1,
 			Timestamp: timestamp,
 			Severity:  severity,
 			Content:   lineContent,
 		}
 		totalSize += proto.Size(line)

 		if totalSize > maxBytes {
 			break
 		}

 		ret = append(ret, line)
 	}

 	// The file contains lines but no single line fits the max size.
 	if len(linesContent) > 0 && len(ret) == 0 {
 		return nil, errors.New(fmt.Sprintf("first file line content exceeds maximum size limit: %d bytes", maxBytes))
 	}
 	return ret, nil
 }

 var SupportedContentTypes = map[string]bool{
 	"application/octet-stream": true,
 	"application/x-gzip":       true,
 }

 func isLogSupportedArtifact(artifactID string, contentType string) bool {
 	return isSupportedContentType(contentType) && !isNonTextFile(artifactID)
 }

 func isSupportedContentType(contentType string) bool {
 	// If the content type is not available then
 	// we allow other checks to validate if the file is supported.
 	if contentType == "" {
 		return true
 	}
 	_, ok := SupportedContentTypes[contentType]
 	return strings.HasPrefix(contentType, "text") || ok
 }

 func isNonTextFile(filePath string) bool {
 	ext := filepath.Ext(filePath)
 	mimeType := mime.TypeByExtension(ext)
 	fmt.Println(filePath)
 	if ext != "" {
 		return mimeType != "" && !strings.HasPrefix(mimeType, "text/")
 	} else {
 		return false
 	}
 }

 type logTimestamp struct {
 	// The regexp for the supported log timestamp
 	Regexp *regexp.Regexp
 	// The time layout defining the parser format used by time parser lib
 	Layout string
 	// Specific time zone, e.g. America/Los_Angeles
 	Zone string
 	// Specifies whether the timestamp is in sec.millisec epoch format
 	IsEpoch bool
 	// Specifies whether the year is missing from the timestamp
 	YearMissing bool
 }

 const (
 	RFC3339FullDate = "2006-01-02"
 	PstTimeZone     = "America/Los_Angeles"
 	UtcTimeZone     = "UTC"
 )

 // List of supported log timestamps.
 var supportedLogTimestamps = []logTimestamp{
 	// tast and upstart log style: 2021-12-17T07:33:58.266026Z
 	{
 		Regexp: regexp.MustCompile(
 			`(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}).(\d{3,6})[A-Z]`),
 		Layout: "2006-01-02T15:04:05.999999Z07:00",
 		Zone:   UtcTimeZone,
 	},
 	// gziped files timestamps: 03-12 19:08:48.511
 	{
 		Regexp: regexp.MustCompile(
 			`^(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}).(\d{3})`),
 		Layout:      "2006/01-02 15:04:05.000",
 		Zone:        UtcTimeZone,
 		YearMissing: true,
 	},
 	// tast steam out timestamps: 2022-10-18 15:10:19
 	{
 		Regexp:      regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})`),
 		Layout:      "2006-01-02 15:04:05",
 		Zone:        PstTimeZone,
 		YearMissing: false,
 	},
 	// tast log.txt timestamps: 2022/10/18 15:10:19
 	{
 		Regexp:      regexp.MustCompile(`(\d{4})\/(\d{2})\/(\d{2}) (\d{2}):(\d{2}):(\d{2})`),
 		Layout:      "2006/01/02 15:04:05",
 		Zone:        PstTimeZone,
 		YearMissing: false,
 	},
 	// <tast_test>_logs.txt timestamps: Jun  9 22:15:15
 	{
 		Regexp:      regexp.MustCompile(`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)  (\d{1,}) (\d{2}):(\d{2}):(\d{2})`),
 		Layout:      "2006/Jan  2 15:04:05",
 		Zone:        PstTimeZone,
 		YearMissing: true,
 	},
 	// Format for timestamps like: 12/16 22:34:29.742
 	{
 		Regexp:      regexp.MustCompile(`(\d{2})\/(\d{2}) (\d{2}):(\d{2}):(\d{2})(?:.(\d+))?`),
 		Layout:      "2006/01/02 15:04:05.000",
 		Zone:        PstTimeZone,
 		YearMissing: true,
 	},
 	// ISO UTC format: 2019-09-30T09:49:45.355431+00:00
 	{
 		Regexp: regexp.MustCompile(
 			`(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}).(\d{3})\d+[\+\-]\d{2}:\d{2}`),
 		Layout: "2006-01-02T15:04:05.000000-07:00",
 		Zone:   UtcTimeZone,
 	},
 	// chrome log style timestamps, eg: 1119/064727.250343
 	{
 		Regexp:      regexp.MustCompile(`(\d{2})(\d{2})\/(\d{2})(\d{2})(\d{2})\.(\d{6})`),
 		Layout:      "2006/0102/150405.000000",
 		Zone:        PstTimeZone,
 		YearMissing: true,
 	},
 	// audit.log style timestamps: msg=audit(1571729288.142:53)
 	{
 		Regexp:      regexp.MustCompile(`msg=audit\((\d+)\.(\d+):(\d+)`),
 		IsEpoch:     true,
 		Zone:        UtcTimeZone,
 		YearMissing: true,
 	},
 	// journal.log style dates: Nov 01 02:58:43
 	{
 		Regexp: regexp.MustCompile(
 			`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ([0-9]{1,}) ([0-9]{2}):([0-9]{2}):([0-9]{2})`),
 		Layout:      "2006/Jan 02 15:04:05",
 		Zone:        PstTimeZone,
 		YearMissing: true,
 	},
 	// vmlog timestamps: 0620/065447
 	{
 		Regexp:      regexp.MustCompile(`(\d{2})(\d{2})/(\d{2})(\d{2})(\d{2})`),
 		Layout:      "2006/0102/150405",
 		Zone:        PstTimeZone,
 		YearMissing: true,
 	},
 	// labstation timestamps: Jun19 20:18
 	{
 		Regexp: regexp.MustCompile(
 			`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)([0-9]{2}) ([0-9]{2}):([0-9]{2})`),
 		Layout:      "2006/Jan02 15:04",
 		Zone:        PstTimeZone,
 		YearMissing: true,
 	},
 }

 func extractTimestamp(line string, year int) (*timestamppb.Timestamp, error) {
 	for _, l := range supportedLogTimestamps {
 		m := l.Regexp.FindStringSubmatch(line)

 		// If timestamp is already converted to millisec epoc, skip the time
 		// parser
 		if l.IsEpoch {
 			if len(m) > 3 {
 				// Convert string to int base 10 with 64 bit size
 				sec, err := strconv.ParseInt(m[1], 10, 64)
 				if err != nil {
 					break
 				}

 				milliSec, err := strconv.ParseInt(m[2], 10, 64)
 				if err != nil {
 					break
 				}

 				nanoSec, err := strconv.ParseInt(m[3], 10, 64)
 				if err != nil {
 					break
 				}

 				nanoSec += milliSec * int64(math.Pow10(6))
 				return timestamppb.New(time.Unix(sec, nanoSec)), nil
 			}
 		} else {
 			if len(m) > 1 {
 				ts := m[0]
 				if l.YearMissing {
 					ts = fmt.Sprintf("%d/%s", year, ts)
 				}
 				t, err := time.ParseInLocation(l.Layout, ts, zone(l.Zone))
 				if err != nil {
 					return nil, err
 				}
 				return timestamppb.New(t), nil
 			}
 		}
 	}
 	return nil, errors.New("invalid timestamp format provided.")
 }

 // zone gets the timestamp location from a specific zone name.
 // If an invalid zone is provided it defaults to UTC.
 func zone(name string) *time.Location {
 	zone, err := time.LoadLocation(name)
 	if err != nil {
 		return time.UTC
 	}
 	return zone
 }

 // severityRegexes are the recognized log severity patterns found in
 // most of the artifacts that we process, severity extraction is best effort
 // and more severities can be added in the future.
 //
 // Matches the following surrounded by whitespace,
 // the full wrods can also be surrounded by colon `:` character:
 // * FATAL = FATAL or F.
 // * ERROR = ERROR or ERR or E.
 // * WARNING = WARNING or WARNIN or WARNI or WARN OR W.
 // * NOTICE = NOTICE or NOTIC or N.
 // * INFO = INFO or I.
 // * DEBUG = DEBUG or D.
 // * VERBOSE = VERBOSE or VERBOSE1 or V.
 var severityRegexes = []regexp.Regexp{
 	*regexp.MustCompile(`\s*\:*\(?(FATAL)\)?\:*[\s\|]+|.+\s(F)\s.+`),
 	*regexp.MustCompile(`\s*\:*\(?(ERROR|ERR)\)?\:*[\s\|]+|.+\s(E)\s.+`),
 	*regexp.MustCompile(`\s*\:*\(?(WARNING|WARNIN|WARNI)\)?\:*[\s\|]+|.+\s(W)\s.+`),
 	*regexp.MustCompile(`\s*\:*\(?(NOTICE|NOTIC)\)?\:*[\s\|]+|.+\s(N)\s.+`),
 	*regexp.MustCompile(`\s*\:*\(?(INFO)\)?\:*[\s\|]+|.+\s(I)\s.+`),
 	*regexp.MustCompile(`\s*\:*\(?(DEBUG)\)?\:*[\s|]+|.+\s(D)\s.+`),
 	*regexp.MustCompile(`\s*\:*\(?(VERBOSE|VERBOSE1)\)?\:*[\s|]+|.+\s(V)\s.+`),
 }

 // Function to extract the log level
 func extractSeverity(logLine string) pb.ArtifactLine_Severity {
 	var severity string
 	for _, severityRegex := range severityRegexes {
 		match := severityRegex.FindStringSubmatch(logLine)
 		if len(match) > 1 {
 			// Find the non-empty capturing group
 			for _, group := range match[1:] {
 				if len(group) > 0 {
 					// Break on the first one as the severity
 					// is generally found at the beginning of the line.
 					severity = string(group)
 					break
 				}
 			}
 		}
 	}

 	if severity != "" {
 		switch {
 		case strings.HasPrefix(severity, "F"):
 			return pb.ArtifactLine_FATAL
 		case strings.HasPrefix(severity, "E"):
 			return pb.ArtifactLine_ERROR
 		case strings.HasPrefix(severity, "W"):
 			return pb.ArtifactLine_WARNING
 		case strings.HasPrefix(severity, "N"):
 			return pb.ArtifactLine_NOTICE
 		case strings.HasPrefix(severity, "I"):
 			return pb.ArtifactLine_INFO
 		case strings.HasPrefix(severity, "D"):
 			return pb.ArtifactLine_DEBUG
 		case strings.HasPrefix(severity, "V"):
 			return pb.ArtifactLine_VERBOSE
 		}
 	}
 	return pb.ArtifactLine_SEVERITY_UNSPECIFIED
 }
	// Copyright 2024 The LUCI Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package artifacts

	import (
	"bytes"
	"fmt"
	"math"
	"mime"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"
	"time"

	"google.golang.org/protobuf/proto"
	"google.golang.org/protobuf/types/known/timestamppb"

	"go.chromium.org/luci/common/errors"

	pb "go.chromium.org/luci/resultdb/proto/v1"
	)

	// ToLogLines retrieves and processes an artifact and returns
	// its content as a set of log lines.
	// It executes best effort extraction of the timestamp and the severity
	// of each log line.
	// The `year` is used as a fallback if the year was absent from
	// the log line's timestamp.
	// The max specified the maximum number of results to return,
	// if the max <= 0 it will return all lines.
	func ToLogLines(artifactID string, contentType string, content []byte, year, maxLines, maxBytes int) ([]*pb.ArtifactLine, error) {

	isSupported := isLogSupportedArtifact(artifactID, contentType)

	if !isSupported {
	return nil, errors.New("unsupported file type")
	}

	linesContent := bytes.Split(content, []byte("\n"))

	limit := len(linesContent)
	if maxLines < limit && maxLines > 0 {
	limit = maxLines
	}

	ret := make([]*pb.ArtifactLine, 0, limit)

	var totalSize int

	for index, lineContent := range linesContent[:limit] {
	lineContentStr := string(lineContent)
	timestamp, _ := extractTimestamp(lineContentStr, year)
	severity := extractSeverity(lineContentStr)
	line := &pb.ArtifactLine{
	Number: int64(index) + 1,
	Timestamp: timestamp,
	Severity: severity,
	Content: lineContent,
	}
	totalSize += proto.Size(line)

	if totalSize > maxBytes {
	break
	}

	ret = append(ret, line)
	}

	// The file contains lines but no single line fits the max size.
	if len(linesContent) > 0 && len(ret) == 0 {
	return nil, errors.New(fmt.Sprintf("first file line content exceeds maximum size limit: %d bytes", maxBytes))
	}
	return ret, nil
	}

	var SupportedContentTypes = map[string]bool{
	"application/octet-stream": true,
	"application/x-gzip": true,
	}

	func isLogSupportedArtifact(artifactID string, contentType string) bool {
	return isSupportedContentType(contentType) && !isNonTextFile(artifactID)
	}

	func isSupportedContentType(contentType string) bool {
	// If the content type is not available then
	// we allow other checks to validate if the file is supported.
	if contentType == "" {
	return true
	}
	_, ok := SupportedContentTypes[contentType]
	return strings.HasPrefix(contentType, "text") \|\| ok
	}

	func isNonTextFile(filePath string) bool {
	ext := filepath.Ext(filePath)
	mimeType := mime.TypeByExtension(ext)
	fmt.Println(filePath)
	if ext != "" {
	return mimeType != "" && !strings.HasPrefix(mimeType, "text/")
	} else {
	return false
	}
	}

	type logTimestamp struct {
	// The regexp for the supported log timestamp
	Regexp *regexp.Regexp
	// The time layout defining the parser format used by time parser lib
	Layout string
	// Specific time zone, e.g. America/Los_Angeles
	Zone string
	// Specifies whether the timestamp is in sec.millisec epoch format
	IsEpoch bool
	// Specifies whether the year is missing from the timestamp
	YearMissing bool
	}

	const (
	RFC3339FullDate = "2006-01-02"
	PstTimeZone = "America/Los_Angeles"
	UtcTimeZone = "UTC"
	)

	// List of supported log timestamps.
	var supportedLogTimestamps = []logTimestamp{
	// tast and upstart log style: 2021-12-17T07:33:58.266026Z
	{
	Regexp: regexp.MustCompile(
	`(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}).(\d{3,6})[A-Z]`),
	Layout: "2006-01-02T15:04:05.999999Z07:00",
	Zone: UtcTimeZone,
	},
	// gziped files timestamps: 03-12 19:08:48.511
	{
	Regexp: regexp.MustCompile(
	`^(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}).(\d{3})`),
	Layout: "2006/01-02 15:04:05.000",
	Zone: UtcTimeZone,
	YearMissing: true,
	},
	// tast steam out timestamps: 2022-10-18 15:10:19
	{
	Regexp: regexp.MustCompile(`(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})`),
	Layout: "2006-01-02 15:04:05",
	Zone: PstTimeZone,
	YearMissing: false,
	},
	// tast log.txt timestamps: 2022/10/18 15:10:19
	{
	Regexp: regexp.MustCompile(`(\d{4})\/(\d{2})\/(\d{2}) (\d{2}):(\d{2}):(\d{2})`),
	Layout: "2006/01/02 15:04:05",
	Zone: PstTimeZone,
	YearMissing: false,
	},
	// <tast_test>_logs.txt timestamps: Jun 9 22:15:15
	{
	Regexp: regexp.MustCompile(`(Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec) (\d{1,}) (\d{2}):(\d{2}):(\d{2})`),
	Layout: "2006/Jan 2 15:04:05",
	Zone: PstTimeZone,
	YearMissing: true,
	},
	// Format for timestamps like: 12/16 22:34:29.742
	{
	Regexp: regexp.MustCompile(`(\d{2})\/(\d{2}) (\d{2}):(\d{2}):(\d{2})(?:.(\d+))?`),
	Layout: "2006/01/02 15:04:05.000",
	Zone: PstTimeZone,
	YearMissing: true,
	},
	// ISO UTC format: 2019-09-30T09:49:45.355431+00:00
	{
	Regexp: regexp.MustCompile(
	`(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}).(\d{3})\d+[\+\-]\d{2}:\d{2}`),
	Layout: "2006-01-02T15:04:05.000000-07:00",
	Zone: UtcTimeZone,
	},
	// chrome log style timestamps, eg: 1119/064727.250343
	{
	Regexp: regexp.MustCompile(`(\d{2})(\d{2})\/(\d{2})(\d{2})(\d{2})\.(\d{6})`),
	Layout: "2006/0102/150405.000000",
	Zone: PstTimeZone,
	YearMissing: true,
	},
	// audit.log style timestamps: msg=audit(1571729288.142:53)
	{
	Regexp: regexp.MustCompile(`msg=audit\((\d+)\.(\d+):(\d+)`),
	IsEpoch: true,
	Zone: UtcTimeZone,
	YearMissing: true,
	},
	// journal.log style dates: Nov 01 02:58:43
	{
	Regexp: regexp.MustCompile(
	`(Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec) ([0-9]{1,}) ([0-9]{2}):([0-9]{2}):([0-9]{2})`),
	Layout: "2006/Jan 02 15:04:05",
	Zone: PstTimeZone,
	YearMissing: true,
	},
	// vmlog timestamps: 0620/065447
	{
	Regexp: regexp.MustCompile(`(\d{2})(\d{2})/(\d{2})(\d{2})(\d{2})`),
	Layout: "2006/0102/150405",
	Zone: PstTimeZone,
	YearMissing: true,
	},
	// labstation timestamps: Jun19 20:18
	{
	Regexp: regexp.MustCompile(
	`(Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)([0-9]{2}) ([0-9]{2}):([0-9]{2})`),
	Layout: "2006/Jan02 15:04",
	Zone: PstTimeZone,
	YearMissing: true,
	},
	}

	func extractTimestamp(line string, year int) (*timestamppb.Timestamp, error) {
	for _, l := range supportedLogTimestamps {
	m := l.Regexp.FindStringSubmatch(line)

	// If timestamp is already converted to millisec epoc, skip the time
	// parser
	if l.IsEpoch {
	if len(m) > 3 {
	// Convert string to int base 10 with 64 bit size
	sec, err := strconv.ParseInt(m[1], 10, 64)
	if err != nil {
	break
	}

	milliSec, err := strconv.ParseInt(m[2], 10, 64)
	if err != nil {
	break
	}

	nanoSec, err := strconv.ParseInt(m[3], 10, 64)
	if err != nil {
	break
	}

	nanoSec += milliSec * int64(math.Pow10(6))
	return timestamppb.New(time.Unix(sec, nanoSec)), nil
	}
	} else {
	if len(m) > 1 {
	ts := m[0]
	if l.YearMissing {
	ts = fmt.Sprintf("%d/%s", year, ts)
	}
	t, err := time.ParseInLocation(l.Layout, ts, zone(l.Zone))
	if err != nil {
	return nil, err
	}
	return timestamppb.New(t), nil
	}
	}
	}
	return nil, errors.New("invalid timestamp format provided.")
	}

	// zone gets the timestamp location from a specific zone name.
	// If an invalid zone is provided it defaults to UTC.
	func zone(name string) *time.Location {
	zone, err := time.LoadLocation(name)
	if err != nil {
	return time.UTC
	}
	return zone
	}

	// severityRegexes are the recognized log severity patterns found in
	// most of the artifacts that we process, severity extraction is best effort
	// and more severities can be added in the future.
	//
	// Matches the following surrounded by whitespace,
	// the full wrods can also be surrounded by colon `:` character:
	// * FATAL = FATAL or F.
	// * ERROR = ERROR or ERR or E.
	// * WARNING = WARNING or WARNIN or WARNI or WARN OR W.
	// * NOTICE = NOTICE or NOTIC or N.
	// * INFO = INFO or I.
	// * DEBUG = DEBUG or D.
	// * VERBOSE = VERBOSE or VERBOSE1 or V.
	var severityRegexes = []regexp.Regexp{
	regexp.MustCompile(`\s\:\(?(FATAL)\)?\:[\s\\|]+\|.+\s(F)\s.+`),
	regexp.MustCompile(`\s\:\(?(ERROR\|ERR)\)?\:[\s\\|]+\|.+\s(E)\s.+`),
	regexp.MustCompile(`\s\:\(?(WARNING\|WARNIN\|WARNI)\)?\:[\s\\|]+\|.+\s(W)\s.+`),
	regexp.MustCompile(`\s\:\(?(NOTICE\|NOTIC)\)?\:[\s\\|]+\|.+\s(N)\s.+`),
	regexp.MustCompile(`\s\:\(?(INFO)\)?\:[\s\\|]+\|.+\s(I)\s.+`),
	regexp.MustCompile(`\s\:\(?(DEBUG)\)?\:[\s\|]+\|.+\s(D)\s.+`),
	regexp.MustCompile(`\s\:\(?(VERBOSE\|VERBOSE1)\)?\:[\s\|]+\|.+\s(V)\s.+`),
	}

	// Function to extract the log level
	func extractSeverity(logLine string) pb.ArtifactLine_Severity {
	var severity string
	for _, severityRegex := range severityRegexes {
	match := severityRegex.FindStringSubmatch(logLine)
	if len(match) > 1 {
	// Find the non-empty capturing group
	for _, group := range match[1:] {
	if len(group) > 0 {
	// Break on the first one as the severity
	// is generally found at the beginning of the line.
	severity = string(group)
	break
	}
	}
	}
	}

	if severity != "" {
	switch {
	case strings.HasPrefix(severity, "F"):
	return pb.ArtifactLine_FATAL
	case strings.HasPrefix(severity, "E"):
	return pb.ArtifactLine_ERROR
	case strings.HasPrefix(severity, "W"):
	return pb.ArtifactLine_WARNING
	case strings.HasPrefix(severity, "N"):
	return pb.ArtifactLine_NOTICE
	case strings.HasPrefix(severity, "I"):
	return pb.ArtifactLine_INFO
	case strings.HasPrefix(severity, "D"):
	return pb.ArtifactLine_DEBUG
	case strings.HasPrefix(severity, "V"):
	return pb.ArtifactLine_VERBOSE
	}
	}
	return pb.ArtifactLine_SEVERITY_UNSPECIFIED
	}