tools/site_compare/commands/scrape.py - chromium/src.git - Git at Google

 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Command for scraping images from a URL or list of URLs.

 Prerequisites:
   1. The command_line package from tools/site_compare
   2. Either the IE BHO or Firefox extension (or both)

 Installation:
   1. Build the IE BHO, or call regsvr32 on a prebuilt binary
   2. Add a file called "measurepageloadtimeextension@google.com" to
      the default Firefox profile directory under extensions, containing
      the path to the Firefox extension root

 Invoke with the command line arguments as documented within
 the command line.
 """

 import command_line

 from drivers import windowing
 from utils import browser_iterate

 def CreateCommand(cmdline):
   """Inserts the command and arguments into a command line for parsing."""
   cmd = cmdline.AddCommand(
     ["scrape"],
     "Scrapes an image from a URL or series of URLs.",
     None,
     ExecuteScrape)

   browser_iterate.SetupIterationCommandLine(cmd)
   cmd.AddArgument(
     ["-log", "--logfile"], "File to write text output", type="string")
   cmd.AddArgument(
     ["-out", "--outdir"], "Directory to store scrapes", type="string", required=True)


 def ExecuteScrape(command):
   """Executes the Scrape command."""

   def ScrapeResult(url, proc, wnd, result):
     """Capture and save the scrape."""
     if log_file: log_file.write(result)

     # Scrape the page
     image = windowing.ScrapeWindow(wnd)
     filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp")
     image.save(filename)

   if command["--logfile"]: log_file = open(command["--logfile"], "w")
   else: log_file = None

   browser_iterate.Iterate(command, ScrapeResult)

   # Close the log file and return. We're done.
   if log_file: log_file.close()
	# Copyright (c) 2011 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Command for scraping images from a URL or list of URLs.

	Prerequisites:
	1. The command_line package from tools/site_compare
	2. Either the IE BHO or Firefox extension (or both)

	Installation:
	1. Build the IE BHO, or call regsvr32 on a prebuilt binary
	2. Add a file called "measurepageloadtimeextension@google.com" to
	the default Firefox profile directory under extensions, containing
	the path to the Firefox extension root

	Invoke with the command line arguments as documented within
	the command line.
	"""

	import command_line

	from drivers import windowing
	from utils import browser_iterate

	def CreateCommand(cmdline):
	"""Inserts the command and arguments into a command line for parsing."""
	cmd = cmdline.AddCommand(
	["scrape"],
	"Scrapes an image from a URL or series of URLs.",
	None,
	ExecuteScrape)

	browser_iterate.SetupIterationCommandLine(cmd)
	cmd.AddArgument(
	["-log", "--logfile"], "File to write text output", type="string")
	cmd.AddArgument(
	["-out", "--outdir"], "Directory to store scrapes", type="string", required=True)


	def ExecuteScrape(command):
	"""Executes the Scrape command."""

	def ScrapeResult(url, proc, wnd, result):
	"""Capture and save the scrape."""
	if log_file: log_file.write(result)

	# Scrape the page
	image = windowing.ScrapeWindow(wnd)
	filename = windowing.URLtoFilename(url, command["--outdir"], ".bmp")
	image.save(filename)

	if command["--logfile"]: log_file = open(command["--logfile"], "w")
	else: log_file = None

	browser_iterate.Iterate(command, ScrapeResult)

	# Close the log file and return. We're done.
	if log_file: log_file.close()