| # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Component for automatically creating masks of changing areas of a website. |
| |
| Works by repeated invokation of a browser and scraping of the resulting page. |
| Areas that differ will be added to the auto-generated mask. The mask generator |
| considers the mask complete when further scrapes fail to produce any differences |
| in the mask. |
| """ |
| |
| import os # Functions for walking the directory tree |
| import tempfile # Get a temporary directory to hold intermediates |
| import time # Used for sleep() and naming masks by time |
| |
| import command_line |
| import drivers |
| from PIL import Image |
| from PIL import ImageChops |
| import scrapers |
| |
| |
| def CreateCommand(cmdline): |
| """Inserts the command and arguments into a command line for parsing.""" |
| cmd = cmdline.AddCommand( |
| ["maskmaker"], |
| "Automatically generates a mask from a list of URLs", |
| ValidateMaskmaker, |
| ExecuteMaskmaker) |
| |
| cmd.AddArgument( |
| ["-bp", "--browserpath"], "Full path to browser's executable", |
| type="readfile", metaname="PATH") |
| cmd.AddArgument( |
| ["-b", "--browser"], "Which browser to use", type="string", |
| default="chrome") |
| cmd.AddArgument( |
| ["-bv", "--browserver"], "Version of the browser", metaname="VERSION") |
| cmd.AddArgument( |
| ["-o", "--outdir"], "Directory to store generated masks", metaname="DIR", |
| required=True) |
| cmd.AddArgument( |
| ["-u", "--url"], "URL to compare") |
| cmd.AddArgument( |
| ["-l", "--list"], "List of URLs to compare", type="readfile") |
| cmd.AddMutualExclusion(["--url", "--list"]) |
| cmd.AddArgument( |
| ["-s", "--startline"], "First line of URL list", type="int") |
| cmd.AddArgument( |
| ["-e", "--endline"], "Last line of URL list (exclusive)", type="int") |
| cmd.AddArgument( |
| ["-c", "--count"], "Number of lines of URL file to use", type="int") |
| cmd.AddDependency("--startline", "--list") |
| cmd.AddRequiredGroup(["--url", "--list"]) |
| cmd.AddDependency("--endline", "--list") |
| cmd.AddDependency("--count", "--list") |
| cmd.AddMutualExclusion(["--count", "--endline"]) |
| cmd.AddDependency("--count", "--startline") |
| cmd.AddArgument( |
| ["-t", "--timeout"], "Amount of time (seconds) to wait for browser to " |
| "finish loading", |
| type="int", default=60) |
| cmd.AddArgument( |
| ["-w", "--wait"], |
| "Amount of time (in seconds) to wait between successive scrapes", |
| type="int", default=60) |
| cmd.AddArgument( |
| ["-sc", "--scrapes"], |
| "Number of successive scrapes which must result in no change to a mask " |
| "before mask creation is considered complete", type="int", default=10) |
| cmd.AddArgument( |
| ["-sz", "--size"], "Browser window size", default=(800, 600), type="coords") |
| cmd.AddArgument(["-sd", "--scrapedir"], "Directory to store scrapes") |
| cmd.AddArgument( |
| ["-gu", "--giveup"], |
| "Number of times to scrape before giving up", type="int", default=50) |
| cmd.AddArgument( |
| ["-th", "--threshhold"], |
| "Percentage of different pixels (0-100) above which the scrape will be" |
| "discarded and the mask not updated.", type="int", default=100) |
| cmd.AddArgument( |
| ["--er", "--errors"], |
| "Number of times a scrape can fail before giving up on the URL.", |
| type="int", default=1) |
| |
| |
| def ValidateMaskmaker(command): |
| """Validate the arguments to maskmaker. Raises ParseError if failed.""" |
| executables = [".exe", ".com", ".bat"] |
| if command["--browserpath"]: |
| if os.path.splitext(command["--browserpath"])[1].lower() not in executables: |
| raise command_line.ParseError("Browser filename must be an executable") |
| |
| |
| def ExecuteMaskmaker(command): |
| """Performs automatic mask generation.""" |
| |
| # Get the list of URLs to generate masks for |
| class MaskmakerURL(object): |
| """Helper class for holding information about a URL passed to maskmaker.""" |
| __slots__ = ['url', 'consecutive_successes', 'errors'] |
| def __init__(self, url): |
| self.url = url |
| self.consecutive_successes = 0 |
| self.errors = 0 |
| |
| if command["--url"]: |
| url_list = [MaskmakerURL(command["--url"])] |
| else: |
| startline = command["--startline"] |
| if command["--count"]: |
| endline = startline+command["--count"] |
| else: |
| endline = command["--endline"] |
| url_list = [MaskmakerURL(url.strip()) for url in |
| open(command["--list"], "r").readlines()[startline:endline]] |
| |
| complete_list = [] |
| error_list = [] |
| |
| outdir = command["--outdir"] |
| scrapes = command["--scrapes"] |
| errors = command["--errors"] |
| size = command["--size"] |
| scrape_pass = 0 |
| |
| scrapedir = command["--scrapedir"] |
| if not scrapedir: scrapedir = tempfile.gettempdir() |
| |
| # Get the scraper |
| scraper = scrapers.GetScraper((command["--browser"], command["--browserver"])) |
| |
| # Repeatedly iterate through the list of URLs until either every URL has |
| # a successful mask or too many errors, or we've exceeded the giveup limit |
| while url_list and scrape_pass < command["--giveup"]: |
| # Scrape each URL |
| for url in url_list: |
| print "Processing %r..." % url.url |
| mask_filename = drivers.windowing.URLtoFilename(url.url, outdir, ".bmp") |
| |
| # Load the existing mask. This is in a loop so we can try to recover |
| # from error conditions |
| while True: |
| try: |
| mask = Image.open(mask_filename) |
| if mask.size != size: |
| print " %r already exists and is the wrong size! (%r vs %r)" % ( |
| mask_filename, mask.size, size) |
| mask_filename = "%s_%r%s" % ( |
| mask_filename[:-4], size, mask_filename[-4:]) |
| print " Trying again as %r..." % mask_filename |
| continue |
| break |
| except IOError: |
| print " %r does not exist, creating" % mask_filename |
| mask = Image.new("1", size, 1) |
| mask.save(mask_filename) |
| |
| # Find the stored scrape path |
| mask_scrape_dir = os.path.join( |
| scrapedir, os.path.splitext(os.path.basename(mask_filename))[0]) |
| drivers.windowing.PreparePath(mask_scrape_dir) |
| |
| # Find the baseline image |
| mask_scrapes = os.listdir(mask_scrape_dir) |
| mask_scrapes.sort() |
| |
| if not mask_scrapes: |
| print " No baseline image found, mask will not be updated" |
| baseline = None |
| else: |
| baseline = Image.open(os.path.join(mask_scrape_dir, mask_scrapes[0])) |
| |
| mask_scrape_filename = os.path.join(mask_scrape_dir, |
| time.strftime("%y%m%d-%H%M%S.bmp")) |
| |
| # Do the scrape |
| result = scraper.Scrape( |
| [url.url], mask_scrape_dir, size, (0, 0), |
| command["--timeout"], path=command["--browserpath"], |
| filename=mask_scrape_filename) |
| |
| if result: |
| # Return value other than None means an error |
| print " Scrape failed with error '%r'" % result |
| url.errors += 1 |
| if url.errors >= errors: |
| print " ** Exceeded maximum error count for this URL, giving up" |
| continue |
| |
| # Load the new scrape |
| scrape = Image.open(mask_scrape_filename) |
| |
| # Calculate the difference between the new scrape and the baseline, |
| # subject to the current mask |
| if baseline: |
| diff = ImageChops.multiply(ImageChops.difference(scrape, baseline), |
| mask.convert(scrape.mode)) |
| |
| # If the difference is none, there's nothing to update |
| if max(diff.getextrema()) == (0, 0): |
| print " Scrape identical to baseline, no change in mask" |
| url.consecutive_successes += 1 |
| if url.consecutive_successes >= scrapes: |
| print " ** No change for %r scrapes, done!" % scrapes |
| else: |
| # convert the difference to black and white, then change all |
| # black pixels (where the scrape and the baseline were identical) |
| # to white, all others (where the scrape and the baseline differed) |
| # to black. |
| # |
| # Since the below command is a little unclear, here's how it works. |
| # 1. convert("L") converts the RGB image to grayscale |
| # 2. point() maps grayscale values (or the individual channels) |
| # of an RGB image) to different ones. Because it operates on |
| # individual channels, the grayscale conversion from step 1 |
| # is necessary. |
| # 3. The "1" second parameter to point() outputs the result as |
| # a monochrome bitmap. If the original RGB image were converted |
| # directly to monochrome, PIL would dither it. |
| diff = diff.convert("L").point([255]+[0]*255, "1") |
| |
| # count the number of different pixels |
| diff_pixels = diff.getcolors()[0][0] |
| |
| # is this too much? |
| diff_pixel_percent = diff_pixels * 100.0 / (mask.size[0]*mask.size[1]) |
| if diff_pixel_percent > command["--threshhold"]: |
| print (" Scrape differed from baseline by %.2f percent, ignoring" |
| % diff_pixel_percent) |
| else: |
| print " Scrape differed in %d pixels, updating mask" % diff_pixels |
| mask = ImageChops.multiply(mask, diff) |
| mask.save(mask_filename) |
| |
| # reset the number of consecutive "good" scrapes |
| url.consecutive_successes = 0 |
| |
| # Remove URLs whose mask is deemed done |
| complete_list.extend( |
| [url for url in url_list if url.consecutive_successes >= scrapes]) |
| error_list.extend( |
| [url for url in url_list if url.errors >= errors]) |
| url_list = [ |
| url for url in url_list if |
| url.consecutive_successes < scrapes and |
| url.errors < errors] |
| |
| scrape_pass += 1 |
| print "**Done with scrape pass %d\n" % scrape_pass |
| |
| if scrape_pass >= command["--giveup"]: |
| print "**Exceeded giveup threshhold. Giving up." |
| else: |
| print "Waiting %d seconds..." % command["--wait"] |
| time.sleep(command["--wait"]) |
| |
| print |
| print "*** MASKMAKER COMPLETE ***" |
| print "Summary report:" |
| print " %d masks successfully generated" % len(complete_list) |
| for url in complete_list: |
| print " ", url.url |
| print " %d masks failed with too many errors" % len(error_list) |
| for url in error_list: |
| print " ", url.url |
| if scrape_pass >= command["--giveup"]: |
| print (" %d masks were not completed before " |
| "reaching the giveup threshhold" % len(url_list)) |
| for url in url_list: |
| print " ", url.url |