// Copyright 2018 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package interpreter contains customized Starlark interpreter.
//
// It is an opinionated wrapper around the basic single-file interpreter
// provided by go.starlark.net library. It implements 'load' and a new built-in
// 'exec' in a specific way to support running Starlark programs that consist of
// many files that reference each other, thus allowing decomposing code into
// small logical chunks and enabling code reuse.
//
//
// Modules and packages
//
// Two main new concepts are modules and packages. A package is a collection
// of Starlark files living under the same root. A module is just one such
// Starlark file. Furthermore, modules can either be "library-like" (executed
// via 'load' statement) or "script-like" (executed via 'exec' function).
//
// Library-like modules can load other library-like modules via 'load', but may
// not call 'exec'. Script-like modules may use both 'load' and 'exec'.
//
// Modules within a single package can refer to each other (in 'load' and
// 'exec') using their relative or absolute (if start with "//") paths.
//
// Packages also have identifiers, though they are local to the interpreter
// (e.g. not defined anywhere in the package itself). For that reason they are
// called "package aliases".
//
// Modules from one package may refer to modules from another package by using
// the following syntax:
//
//   load("@<package alias>//<path within the package>", ...)
//
// Presently the mapping between a package alias (e.g. 'stdlib') and package's
// source code (a collection of *.star files under a single root) is static and
// supplied by Go code that uses the interpreter. This may change in the future
// to allow loading packages dynamically, e.g. over the network.
//
//
// Special packages
//
// There are two special package aliases recognized natively by the interpreter:
// '__main__' and 'stdlib'.
//
// '__main__' is a conventional name of the package with top level user-supplied
// code. When printing module paths in stack traces and error message,
// "@__main__//<path>" are shown as simply "//<path>" to avoid confusing users
// who don't know what "@<alias>//" might mean. There's no other special
// handling.
//
// Module '@stdlib//builtins.star' (if present) is loaded before any other code.
// All its exported symbols that do not start with '_' are made available in the
// global namespace of all other modules (except ones loaded by
// '@stdlib//builtins.star' itself). This allows to implement in Starlark
// built-in global functions exposed by the interpreter.
//
//
// Exec'ing modules
//
// The built-in function 'exec' can be used to execute a module for its side
// effects and get its global dict as a return value. It works similarly to
// 'load', except it doesn't import any symbols into the caller's namespace, and
// it is not idempotent: calling 'exec' on already executed module is an error.
//
// Each module is executed with its own instance of starlark.Thread. Modules
// are either loaded as libraries (via 'load' call or LoadModule) or executed
// as scripts (via 'exec' or ExecModule), but not both. Attempting to load
// a module that was previous exec'ed (and vice versa) is an error.
//
// Consequently, each Starlark thread created by the nterpreter is either
// executing some 'load' or some 'exec'. This distinction is available to
// builtins through GetThreadKind function. Some builtins may check it. For
// example, a builtin that mutates a global state may return an error when it is
// called from a 'load' thread.
//
// Dicts of modules loaded via 'load' are reused, e.g. if two different scripts
// load the exact same module, they'll get the exact same symbols as a result.
// The loaded code always executes only once. The interpreter MAY load modules
// in parallel in the future, libraries must not rely on their loading order and
// must not have side effects.
//
// On the other hand, modules executed via 'exec' are guaranteed to be executed
// sequentially, and only once. Thus 'exec'-ed scripts essentially form a tree,
// traversed exactly once in the depth first order.
//
//
// Built-in symbols
//
// In addition to 'exec' builtin implemented by the interpreter itself, users
// of the interpreter can also supply arbitrary "predeclared" symbols they want
// to be available in the global scope of all modules. Predeclared symbols and
// '@stdlib//builtins.star' module explained above, are the primary mechanisms
// of making the interpreter do something useful.
package interpreter

import (
	"context"
	"errors"
	"fmt"
	"os"
	"path"
	"strings"

	"go.starlark.net/starlark"
	"go.starlark.net/starlarkstruct"
)

var (
	// ErrNoModule is returned by loaders when they can't find a source code of
	// the requested module. It is also returned by LoadModule when it can't find
	// a module within an existing package.
	ErrNoModule = errors.New("no such module")

	// ErrNoPackage is returned by LoadModule when it can't find a package.
	ErrNoPackage = errors.New("no such package")
)

// ThreadKind is enumeration describing possible uses of a thread.
type ThreadKind int

const (
	// ThreadUnknown indicates a thread used in some custom way, not via
	// LoadModule or ExecModule.
	ThreadUnknown ThreadKind = iota

	// ThreadLoading indicates a thread that is evaluating a module referred to by
	// some load(...) statement.
	ThreadLoading

	// ThreadExecing indicates a thread that is evaluating a module referred to by
	// some exec(...) statement.
	ThreadExecing
)

const (
	// MainPkg is an alias of the package with user-supplied code.
	MainPkg = "__main__"
	// StdlibPkg is an alias of the package with the standard library.
	StdlibPkg = "stdlib"
)

const (
	// Key of context.Context inside starlark.Thread's local store.
	threadCtxKey = "interpreter.Context"
	// Key with *Interpreter that created the thread.
	threadIntrKey = "interpreter.Interpreter"
	// Key with TheadKind of the thread.
	threadKindKey = "interpreter.ThreadKind"
	// Key with the ModuleKey of the currently executing module.
	threadModKey = "interpreter.ModuleKey"
)

// Context returns a context of the thread created through Interpreter.
//
// Panics if the Starlark thread wasn't created through Interpreter.Thread().
func Context(th *starlark.Thread) context.Context {
	if ctx := th.Local(threadCtxKey); ctx != nil {
		return ctx.(context.Context)
	}
	panic("not an Interpreter thread, no context in its locals")
}

// GetThreadInterpreter returns Interpreter that created the Starlark thread.
//
// Panics if the Starlark thread wasn't created through Interpreter.Thread().
func GetThreadInterpreter(th *starlark.Thread) *Interpreter {
	if intr := th.Local(threadIntrKey); intr != nil {
		return intr.(*Interpreter)
	}
	panic("not an Interpreter thread, no Interpreter in its locals")
}

// GetThreadKind tells what sort of thread 'th' is: it is either inside some
// load(...), some exec(...) or some custom call (probably a callback called
// from native code).
//
// Panics if the Starlark thread wasn't created through Interpreter.Thread().
func GetThreadKind(th *starlark.Thread) ThreadKind {
	if k := th.Local(threadKindKey); k != nil {
		return k.(ThreadKind)
	}
	panic("not an Interpreter thread, no ThreadKind in its locals")
}

// GetThreadModuleKey returns a ModuleKey with the location of the module being
// processed by a current load(...) or exec(...) statement.
//
// It has no relation to the module that holds the top-level stack frame. For
// example, if a currently loading module 'A' calls a function in module 'B' and
// this function calls GetThreadModuleKey, it will see module 'A' as the result,
// even though the call goes through code in module 'B'.
//
// Returns nil if the current thread is not executing any load(...) or
// exec(...), i.e. it has ThreadUnknown kind.
func GetThreadModuleKey(th *starlark.Thread) *ModuleKey {
	if modKey, ok := th.Local(threadModKey).(ModuleKey); ok {
		return &modKey
	}
	return nil
}

// Loader knows how to load modules of some concrete package.
//
// It takes a module path relative to the package and returns either module's
// dict (e.g. for go native modules) or module's source code, to be interpreted.
//
// Returns ErrNoModule if there's no such module in the package.
//
// The source code is returned as 'string' to guarantee immutability and
// allowing efficient use of string Go constants.
type Loader func(path string) (dict starlark.StringDict, src string, err error)

// Interpreter knows how to execute starlark modules that can load or execute
// other starlark modules.
type Interpreter struct {
	// Predeclared is a dict with predeclared symbols that are available globally.
	//
	// They are available when loading stdlib and when executing user modules. Can
	// be used to extend the interpreter with native Go functions.
	Predeclared starlark.StringDict

	// Packages is a mapping from a package alias to a loader with package code.
	//
	// Users of Interpreter are expected to supply a loader for at least __main__
	// package.
	Packages map[string]Loader

	// Logger is called by Starlark's print(...) function.
	//
	// The callback takes the position in the starlark source code where
	// print(...) was called and the message it received.
	//
	// The default implementation just prints the message to stderr.
	Logger func(file string, line int, message string)

	// ThreadModifier is called whenever interpreter makes a new starlark.Thread.
	//
	// It can inject additional state into thread locals. Useful when hooking up
	// a thread to starlarktest's reporter in unit tests.
	ThreadModifier func(th *starlark.Thread)

	// PreExec is called before launching code through some 'exec' or ExecModule.
	//
	// It may modify the thread or some other global state in preparation for
	// executing a script.
	//
	// 'load' calls do not trigger PreExec/PostExec hooks.
	PreExec func(th *starlark.Thread, module ModuleKey)

	// PostExec is called after finishing running code through some 'exec' or
	// ExecModule.
	//
	// It is always called, even if the 'exec' failed. May restore the state
	// modified by PreExec. Note that PreExec/PostExec calls can nest (if an
	// 'exec'-ed script calls 'exec' itself).
	//
	// 'load' calls do not trigger PreExec/PostExec hooks.
	PostExec func(th *starlark.Thread, module ModuleKey)

	modules map[ModuleKey]*loadedModule // cache of the loaded modules
	execed  map[ModuleKey]struct{}      // a set of modules that were ever exec'ed
	visited []ModuleKey                 // all modules, in order of visits
	globals starlark.StringDict         // global symbols exposed to all modules
}

// ModuleKey is a key of a module within a cache of loaded modules.
//
// It identifies a package and a file within the package.
type ModuleKey struct {
	Package string // a package name, e.g. "stdlib"
	Path    string // path within the package, e.g. "abc/script.star"
}

// String returns a fully-qualified module name to use in error messages.
//
// If is either "@pkg//path" or just "//path" if pkg is "__main__". We omit the
// name of the top-level package with user-supplied code ("__main__") to avoid
// confusing users who are oblivious of packages.
func (key ModuleKey) String() string {
	pkg := ""
	if key.Package != MainPkg {
		pkg = "@" + key.Package
	}
	return pkg + "//" + key.Path
}

// makeModuleKey takes '[@pkg]//<path>' or '<path>', parses and normalizes it.
//
// Converts the path to be relative to the package root. Does some light
// validation, in particular checking the resulting path is doesn't star with
// '../'. Module loaders are expected to validate module paths more rigorously
// (since they interpret them anyway).
//
// 'th' is used to get the name of the currently executing package and a path to
// the currently executing module within it. It is required if 'ref' is not
// given as an absolute path (i.e. does NOT look like '@pkg//path').
func makeModuleKey(ref string, th *starlark.Thread) (key ModuleKey, err error) {
	defer func() {
		if err == nil && (strings.HasPrefix(key.Path, "../") || key.Path == "..") {
			err = errors.New("outside the package root")
		}
	}()

	// 'th' can be nil here if makeModuleKey is called by LoadModule or
	// ExecModule: they are entry points into Starlark code, there's no thread
	// yet when they start.
	var current *ModuleKey
	if th != nil {
		current = GetThreadModuleKey(th)
	}

	// Absolute paths start with '//' or '@'. Everything else is a relative path.
	hasPkg := strings.HasPrefix(ref, "@")
	isAbs := hasPkg || strings.HasPrefix(ref, "//")

	if !isAbs {
		if current == nil {
			err = errors.New("can't resolve relative module path: no current module information in thread locals")
		} else {
			key = ModuleKey{
				Package: current.Package,
				Path:    path.Join(path.Dir(current.Path), ref),
			}
		}
		return
	}

	idx := strings.Index(ref, "//")
	if idx == -1 || (!hasPkg && idx != 0) {
		err = errors.New("a module path should be either '//<path>', '<path>' or '@<package>//<path>'")
		return
	}

	if hasPkg {
		if key.Package = ref[1:idx]; key.Package == "" {
			err = errors.New("a package alias can't be empty")
			return
		}
	}
	key.Path = path.Clean(ref[idx+2:])

	// Grab the package name from thread locals, if given.
	if !hasPkg {
		if current == nil {
			err = errors.New("no current package name in thread locals")
		} else {
			key.Package = current.Package
		}
	}
	return
}

// loadedModule represents an executed starlark module.
type loadedModule struct {
	dict starlark.StringDict // global dict of the module after we executed it
	err  error               // non-nil if this module could not be loaded
}

// Init initializes the interpreter and loads '@stdlib//builtins.star'.
//
// Registers whatever was passed via Predeclared plus 'exec'. Then loads
// '@stdlib//builtins.star', which may define more symbols or override already
// defined ones. Whatever symbols not starting with '_' end up in the global
// dict of '@stdlib//builtins.star' module will become available as global
// symbols in all modules.
//
// The context ends up available to builtins through Context(...).
func (intr *Interpreter) Init(ctx context.Context) error {
	intr.modules = map[ModuleKey]*loadedModule{}
	intr.execed = map[ModuleKey]struct{}{}

	intr.globals = make(starlark.StringDict, len(intr.Predeclared)+1)
	for k, v := range intr.Predeclared {
		intr.globals[k] = v
	}
	intr.globals["exec"] = intr.execBuiltin()

	// Load the stdlib, if any.
	top, err := intr.LoadModule(ctx, StdlibPkg, "builtins.star")
	if err != nil && err != ErrNoModule && err != ErrNoPackage {
		return err
	}
	for k, v := range top {
		if !strings.HasPrefix(k, "_") {
			intr.globals[k] = v
		}
	}
	return nil
}

// LoadModule finds and loads a starlark module, returning its dict.
//
// This is similar to load(...) statement: caches the result of the execution.
// Modules are always loaded only once.
//
// 'pkg' is a package alias, it will be used to lookup the package loader in
// intr.Packages. 'path' is a module path (without leading '//') within
// the package.
//
// The context ends up available to builtins through Context(...).
func (intr *Interpreter) LoadModule(ctx context.Context, pkg, path string) (starlark.StringDict, error) {
	key, err := makeModuleKey(fmt.Sprintf("@%s//%s", pkg, path), nil)
	if err != nil {
		return nil, err
	}

	// If the module has been 'exec'-ed previously it is not allowed to be loaded.
	// Modules are either 'library-like' or 'script-like', not both.
	if _, yes := intr.execed[key]; yes {
		return nil, errors.New("the module has been exec'ed before and therefore is not loadable")
	}

	switch m, ok := intr.modules[key]; {
	case m != nil: // already loaded or attempted and failed
		return m.dict, m.err
	case ok:
		// This module is being loaded right now, Starlark stack trace will show
		// the sequence of load(...) calls that led to this cycle.
		return nil, errors.New("cycle in the module dependency graph")
	}

	// Add a placeholder to indicate we are loading this module to detect cycles.
	intr.modules[key] = nil

	m := &loadedModule{
		err: fmt.Errorf("panic when loading %q", key), // overwritten on non-panic
	}
	defer func() { intr.modules[key] = m }()

	m.dict, m.err = intr.runModule(ctx, key, ThreadLoading)
	return m.dict, m.err
}

// ExecModule finds and executes a starlark module, returning its dict.
//
// This is similar to exec(...) statement: always executes the module.
//
// 'pkg' is a package alias, it will be used to lookup the package loader in
// intr.Packages. 'path' is a module path (without leading '//') within
// the package.
//
// The context ends up available to builtins through Context(...).
func (intr *Interpreter) ExecModule(ctx context.Context, pkg, path string) (starlark.StringDict, error) {
	key, err := makeModuleKey(fmt.Sprintf("@%s//%s", pkg, path), nil)
	if err != nil {
		return nil, err
	}

	// If the module has been loaded previously it is not allowed to be 'exec'-ed.
	// Modules are either 'library-like' or 'script-like', not both.
	if _, yes := intr.modules[key]; yes {
		return nil, errors.New("the module has been loaded before and therefore is not executable")
	}

	// Reexecing a module is forbidden.
	if _, yes := intr.execed[key]; yes {
		return nil, errors.New("the module has already been executed, 'exec'-ing same code twice is forbidden")
	}
	intr.execed[key] = struct{}{}

	// Actually execute the code.
	return intr.runModule(ctx, key, ThreadExecing)
}

// Visited returns a list of modules visited by the interpreter.
//
// Includes both loaded and execed modules, successfully or not.
func (intr *Interpreter) Visited() []ModuleKey {
	return intr.visited
}

// LoadSource returns a body of a file inside a package.
//
// It doesn't have to be a Starlark file, can be any text file as long as
// the corresponding package Loader can find it.
//
// 'ref' is either an absolute reference to the file, in the same format as
// accepted by 'load' and 'exec' (i.e. "[@pkg]//path"), or a path relative to
// the currently executing module (i.e. just "path").
//
// Only Starlark threads started via LoadModule or ExecModule can be used with
// this function. Other threads don't have enough context to resolve paths
// correctly.
func (intr *Interpreter) LoadSource(th *starlark.Thread, ref string) (string, error) {
	if kind := GetThreadKind(th); kind != ThreadLoading && kind != ThreadExecing {
		return "", errors.New("wrong kind of thread (not enough information to " +
			"resolve the file reference), only threads that do 'load' and 'exec' can call this function")
	}

	target, err := makeModuleKey(ref, th)
	if err != nil {
		return "", err
	}

	dict, src, err := intr.invokeLoader(target)
	if err == nil && dict != nil {
		err = fmt.Errorf("it is a native Go module")
	}
	if err != nil {
		// Avoid term "module" since LoadSource is often used to load non-star
		// files.
		if err == ErrNoModule {
			err = fmt.Errorf("no such file")
		}
		return "", fmt.Errorf("cannot load %s: %s", target, err)
	}
	return src, nil
}

// Thread creates a new Starlark thread associated with the given context.
//
// Thread() can be used, for example, to invoke callbacks registered by the
// loaded Starlark code.
//
// The context ends up available to builtins through Context(...).
//
// The returned thread has no implementation of load(...) or exec(...). Use
// LoadModule or ExecModule to load top-level Starlark code instead. Note that
// load(...) statements are forbidden inside Starlark functions anyway.
func (intr *Interpreter) Thread(ctx context.Context) *starlark.Thread {
	th := &starlark.Thread{
		Print: func(th *starlark.Thread, msg string) {
			position := th.CallFrame(1).Pos
			if intr.Logger != nil {
				intr.Logger(position.Filename(), int(position.Line), msg)
			} else {
				fmt.Fprintf(os.Stderr, "[%s:%d] %s\n", position.Filename(), position.Line, msg)
			}
		},
	}
	th.SetLocal(threadCtxKey, ctx)
	th.SetLocal(threadIntrKey, intr)
	th.SetLocal(threadKindKey, ThreadUnknown)
	if intr.ThreadModifier != nil {
		intr.ThreadModifier(th)
	}
	return th
}

// execBuiltin returns exec(...) builtin symbol.
func (intr *Interpreter) execBuiltin() *starlark.Builtin {
	return starlark.NewBuiltin("exec", func(th *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
		var module starlark.String
		if err := starlark.UnpackArgs("exec", args, kwargs, "module", &module); err != nil {
			return nil, err
		}
		mod := module.GoString()

		// Only threads started via 'exec' (or equivalently ExecModule) can exec
		// other scripts. Modules that are loaded via load(...), or custom callbacks
		// from native code aren't allowed to call exec, since exec's impurity may
		// lead to unexpected results.
		if GetThreadKind(th) != ThreadExecing {
			return nil, fmt.Errorf("exec %s: forbidden in this context, only exec'ed scripts can exec other scripts", mod)
		}

		// See also th.Load in runModule.
		key, err := makeModuleKey(mod, th)
		if err != nil {
			return nil, err
		}
		dict, err := intr.ExecModule(Context(th), key.Package, key.Path)
		if err != nil {
			// Starlark stringifies errors using Error(). For EvalError, Error()
			// string is just a root cause, it does not include the backtrace where
			// the execution failed. Preserve it explicitly by sticking the backtrace
			// into the error message.
			//
			// Note that returning 'err' as is will preserve the backtrace inside
			// the execed module, but we'll lose the backtrace of the 'exec' call
			// itself, since EvalError object will just bubble to the top of the Go
			// call stack unmodified.
			if evalErr, ok := err.(*starlark.EvalError); ok {
				return nil, fmt.Errorf("exec %s failed: %s", mod, evalErr.Backtrace())
			}
			return nil, fmt.Errorf("cannot exec %s: %s", mod, err)
		}

		return starlarkstruct.FromStringDict(starlark.String("execed"), dict), nil
	})
}

// invokeLoader loads the module via the loader associated with the package.
func (intr *Interpreter) invokeLoader(key ModuleKey) (dict starlark.StringDict, src string, err error) {
	loader, ok := intr.Packages[key.Package]
	if !ok {
		return nil, "", ErrNoPackage
	}
	return loader(key.Path)
}

// runModule really loads and executes the module, used by both LoadModule and
// ExecModule.
func (intr *Interpreter) runModule(ctx context.Context, key ModuleKey, kind ThreadKind) (starlark.StringDict, error) {
	// Grab the source code.
	dict, src, err := intr.invokeLoader(key)
	switch {
	case err != nil:
		return nil, err
	case dict != nil:
		// This is a native module constructed in Go, no need to interpret it.
		return dict, nil
	}

	// Otherwise make a thread for executing the code. We do not reuse threads
	// between modules. All global state is passed through intr.globals,
	// intr.modules and ctx.
	th := intr.Thread(ctx)
	th.Load = func(th *starlark.Thread, module string) (starlark.StringDict, error) {
		key, err := makeModuleKey(module, th)
		if err != nil {
			return nil, err
		}
		dict, err := intr.LoadModule(ctx, key.Package, key.Path)
		// See comment in execBuiltin about why we extract EvalError backtrace into
		// new error.
		if evalErr, ok := err.(*starlark.EvalError); ok {
			err = fmt.Errorf("%s", evalErr.Backtrace())
		}
		return dict, err
	}

	// Let builtins know what this thread is doing. Some calls (most notably Exec
	// itself) are allowed only from exec'ing threads, not from load'ing ones.
	th.SetLocal(threadKindKey, kind)
	// Let builtins (and in particular makeModuleKey and LoadSource) know the
	// package and the module that the thread executes.
	th.SetLocal(threadModKey, key)

	if kind == ThreadExecing {
		if intr.PreExec != nil {
			intr.PreExec(th, key)
		}
		if intr.PostExec != nil {
			defer intr.PostExec(th, key)
		}
	}

	// Record we've been here.
	intr.visited = append(intr.visited, key)

	// Execute the module. It may 'load' or 'exec' other modules inside, which
	// will either call LoadModule or ExecModule.
	//
	// Use user-friendly module name (with omitted "@__main__") for error messages
	// and stack traces to avoid confusing the user.
	return starlark.ExecFile(th, key.String(), src, intr.globals)
}
