blob: da44ed0017e6216f4918601d7c279967c057ad85 [file] [log] [blame]
package cdp_test
import (
"context"
"encoding/json"
"errors"
"fmt"
"time"
"github.com/mafredri/cdp"
"github.com/mafredri/cdp/devtool"
"github.com/mafredri/cdp/protocol/dom"
"github.com/mafredri/cdp/protocol/network"
"github.com/mafredri/cdp/protocol/page"
"github.com/mafredri/cdp/protocol/runtime"
"github.com/mafredri/cdp/rpcc"
"golang.org/x/sync/errgroup"
)
// Cookie represents a browser cookie.
type Cookie struct {
URL string `json:"url"`
Name string `json:"name"`
Value string `json:"value"`
}
// DocumentInfo contains information about the document.
type DocumentInfo struct {
Title string `json:"title"`
}
var (
MyURL = "https://google.com"
Cookies = []Cookie{
{MyURL, "myauth", "myvalue"},
{MyURL, "mysetting1", "myvalue1"},
{MyURL, "mysetting2", "myvalue2"},
{MyURL, "mysetting3", "myvalue3"},
}
)
func Example_advanced() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
devt := devtool.New("http://localhost:9222")
pt, err := devt.Get(ctx, devtool.Page)
if err != nil {
return
}
// Connect to WebSocket URL (page) that speaks the Chrome DevTools Protocol.
conn, err := rpcc.DialContext(ctx, pt.WebSocketDebuggerURL)
if err != nil {
fmt.Println(err)
return
}
defer conn.Close() // Cleanup.
// Create a new CDP Client that uses conn.
c := cdp.NewClient(conn)
// Give enough capacity to avoid blocking any event listeners
abort := make(chan error, 2)
// Watch the abort channel.
go func() {
select {
case <-ctx.Done():
case err := <-abort:
fmt.Printf("aborted: %s\n", err.Error())
cancel()
}
}()
// Setup event handlers early because domain events can be sent as
// soon as Enable is called on the domain.
if err = abortOnErrors(ctx, c, abort); err != nil {
fmt.Println(err)
return
}
if err = runBatch(
// Enable all the domain events that we're interested in.
func() error { return c.DOM.Enable(ctx) },
func() error { return c.Network.Enable(ctx, nil) },
func() error { return c.Page.Enable(ctx) },
func() error { return c.Runtime.Enable(ctx) },
func() error { return setCookies(ctx, c.Network, Cookies...) },
); err != nil {
fmt.Println(err)
return
}
domLoadTimeout := 5 * time.Second
err = navigate(ctx, c.Page, MyURL, domLoadTimeout)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("Navigated to: %s\n", MyURL)
// Parse information from the document by evaluating JavaScript.
expression := `
new Promise((resolve, reject) => {
setTimeout(() => {
const title = document.querySelector('title').innerText;
resolve({title});
}, 500);
});
`
evalArgs := runtime.NewEvaluateArgs(expression).SetAwaitPromise(true).SetReturnByValue(true)
eval, err := c.Runtime.Evaluate(ctx, evalArgs)
if err != nil {
fmt.Println(err)
return
}
var info DocumentInfo
if err = json.Unmarshal(eval.Result.Value, &info); err != nil {
fmt.Println(err)
return
}
fmt.Printf("Document title: %q\n", info.Title)
// Fetch the document root node.
doc, err := c.DOM.GetDocument(ctx, nil)
if err != nil {
fmt.Println(err)
return
}
// Fetch all <script> and <noscript> elements so we can delete them.
scriptIDs, err := c.DOM.QuerySelectorAll(ctx, dom.NewQuerySelectorAllArgs(doc.Root.NodeID, "script, noscript"))
if err != nil {
fmt.Println(err)
return
}
if err = removeNodes(ctx, c.DOM, scriptIDs.NodeIDs...); err != nil {
fmt.Println(err)
return
}
}
func abortOnErrors(ctx context.Context, c *cdp.Client, abort chan<- error) error {
exceptionThrown, err := c.Runtime.ExceptionThrown(ctx)
if err != nil {
return err
}
loadingFailed, err := c.Network.LoadingFailed(ctx)
if err != nil {
return err
}
go func() {
defer exceptionThrown.Close() // Cleanup.
defer loadingFailed.Close()
for {
select {
// Check for exceptions so we can abort as soon
// as one is encountered.
case <-exceptionThrown.Ready():
ev, err := exceptionThrown.Recv()
if err != nil {
// This could be any one of: stream closed,
// connection closed, context deadline or
// unmarshal failed.
abort <- err
return
}
// Ruh-roh! Let the caller know something went wrong.
abort <- ev.ExceptionDetails
// Check for non-canceled resources that failed
// to load.
case <-loadingFailed.Ready():
ev, err := loadingFailed.Recv()
if err != nil {
abort <- err
return
}
// For now, most optional fields are pointers
// and must be checked for nil.
canceled := ev.Canceled != nil && *ev.Canceled
if !canceled {
abort <- fmt.Errorf("request %s failed: %s", ev.RequestID, ev.ErrorText)
}
}
}
}()
return nil
}
// setCookies sets all the provided cookies.
func setCookies(ctx context.Context, net cdp.Network, cookies ...Cookie) error {
var cmds []runBatchFunc
for _, c := range cookies {
args := network.NewSetCookieArgs(c.Name, c.Value).SetURL(c.URL)
cmds = append(cmds, func() error {
reply, err := net.SetCookie(ctx, args)
if err != nil {
return err
}
if !reply.Success {
return errors.New("could not set cookie")
}
return nil
})
}
return runBatch(cmds...)
}
// navigate to the URL and wait for DOMContentEventFired. An error is
// returned if timeout happens before DOMContentEventFired.
func navigate(ctx context.Context, pageClient cdp.Page, url string, timeout time.Duration) error {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
// Make sure Page events are enabled.
err := pageClient.Enable(ctx)
if err != nil {
return err
}
// Open client for DOMContentEventFired to block until DOM has fully loaded.
domContentEventFired, err := pageClient.DOMContentEventFired(ctx)
if err != nil {
return err
}
defer domContentEventFired.Close()
_, err = pageClient.Navigate(ctx, page.NewNavigateArgs(url))
if err != nil {
return err
}
_, err = domContentEventFired.Recv()
return err
}
// removeNodes deletes all provided nodeIDs from the DOM.
func removeNodes(ctx context.Context, domClient cdp.DOM, nodes ...dom.NodeID) error {
var rmNodes []runBatchFunc
for _, id := range nodes {
arg := dom.NewRemoveNodeArgs(id)
rmNodes = append(rmNodes, func() error { return domClient.RemoveNode(ctx, arg) })
}
return runBatch(rmNodes...)
}
// runBatchFunc is the function signature for runBatch.
type runBatchFunc func() error
// runBatch runs all functions simultaneously and waits until
// execution has completed or an error is encountered.
func runBatch(fn ...runBatchFunc) error {
eg := errgroup.Group{}
for _, f := range fn {
eg.Go(f)
}
return eg.Wait()
}