blob: 9a68b2aec9d063a5c3d39d92e7ce3eba4810e7ee [file] [log] [blame]
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from datetime import datetime
import logging
import os
import re
import requests
import sys
import time
import urllib
from urlparse import urlparse
import mozinfo
import errors
from parser import DirectoryParser
from timezones import PacificTimezone
from utils import urljoin
APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird')
# Some applications contain all locales in a single build
APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec')
# Used if the application is named differently than the subfolder on the server
APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'}
# Base URL for the path to all builds
BASE_URL = 'https://archive.mozilla.org/pub/'
# Chunk size when downloading a file
CHUNK_SIZE = 16 * 1024
DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk',
'android-api-11': 'apk',
'android-x86': 'apk',
'linux': 'tar.bz2',
'linux64': 'tar.bz2',
'mac': 'dmg',
'mac64': 'dmg',
'win32': 'exe',
'win64': 'exe'}
PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm',
'android-api-11': r'android-arm',
'android-x86': r'android-i386',
'linux': r'linux-i686',
'linux64': r'linux-x86_64',
'mac': r'mac',
'mac64': r'mac(64)?',
'win32': r'win32',
'win64': r'win64(-x86_64)?'}
class Scraper(object):
"""Generic class to download an application from the Mozilla server"""
def __init__(self, destination=None, platform=None,
application='firefox', locale=None, extension=None,
username=None, password=None,
retry_attempts=0, retry_delay=10.,
is_stub_installer=False, timeout=None,
log_level='INFO',
base_url=BASE_URL):
# Private properties for caching
self._filename = None
self._binary = None
self.destination = destination or os.getcwd()
if not locale:
if application in APPLICATIONS_MULTI_LOCALE:
self.locale = 'multi'
else:
self.locale = 'en-US'
else:
self.locale = locale
self.platform = platform or self.detect_platform()
self.session = requests.Session()
if (username, password) != (None, None):
self.session.auth = (username, password)
self.retry_attempts = retry_attempts
self.retry_delay = retry_delay
self.is_stub_installer = is_stub_installer
self.timeout_download = timeout
# this is the timeout used in requests.get. Unlike "auth",
# it does not work if we attach it on the session, so we handle
# it independently.
self.timeout_network = 60.
logging.basicConfig(format=' %(levelname)s | %(message)s')
self.logger = logging.getLogger(self.__module__)
self.logger.setLevel(log_level)
# build the base URL
self.application = application
self.base_url = '%s/' % urljoin(
base_url,
APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application)
)
if extension:
self.extension = extension
else:
if self.application in APPLICATIONS_MULTI_LOCALE and \
self.platform in ('win32', 'win64'):
# builds for APPLICATIONS_MULTI_LOCALE only exist in zip
self.extension = 'zip'
else:
self.extension = DEFAULT_FILE_EXTENSIONS[self.platform]
attempt = 0
while True:
attempt += 1
try:
self.get_build_info()
break
except (errors.NotFoundError, requests.exceptions.RequestException), e:
if self.retry_attempts > 0:
# Log only if multiple attempts are requested
self.logger.warning("Build not found: '%s'" % e.message)
self.logger.info('Will retry in %s seconds...' %
(self.retry_delay))
time.sleep(self.retry_delay)
self.logger.info("Retrying... (attempt %s)" % attempt)
if attempt >= self.retry_attempts:
if hasattr(e, 'response') and \
e.response.status_code == 404:
message = "Specified build has not been found"
raise errors.NotFoundError(message, e.response.url)
else:
raise
def _create_directory_parser(self, url):
return DirectoryParser(url,
session=self.session,
timeout=self.timeout_network)
@property
def binary(self):
"""Return the name of the build"""
attempt = 0
while self._binary is None:
attempt += 1
try:
# Retrieve all entries from the remote virtual folder
parser = self._create_directory_parser(self.path)
if not parser.entries:
raise errors.NotFoundError('No entries found', self.path)
# Download the first matched directory entry
pattern = re.compile(self.binary_regex, re.IGNORECASE)
for entry in parser.entries:
try:
self._binary = pattern.match(entry).group()
break
except:
# No match, continue with next entry
continue
else:
raise errors.NotFoundError("Binary not found in folder",
self.path)
except (errors.NotFoundError, requests.exceptions.RequestException), e:
if self.retry_attempts > 0:
# Log only if multiple attempts are requested
self.logger.warning("Build not found: '%s'" % e.message)
self.logger.info('Will retry in %s seconds...' %
(self.retry_delay))
time.sleep(self.retry_delay)
self.logger.info("Retrying... (attempt %s)" % attempt)
if attempt >= self.retry_attempts:
if hasattr(e, 'response') and \
e.response.status_code == 404:
message = "Specified build has not been found"
raise errors.NotFoundError(message, self.path)
else:
raise
return self._binary
@property
def binary_regex(self):
"""Return the regex for the binary filename"""
raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
@property
def url(self):
"""Return the URL of the build"""
return urljoin(self.path, self.binary)
@property
def path(self):
"""Return the path to the build folder"""
return urljoin(self.base_url, self.path_regex)
@property
def path_regex(self):
"""Return the regex for the path to the build folder"""
raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
@property
def platform_regex(self):
"""Return the platform fragment of the URL"""
return PLATFORM_FRAGMENTS[self.platform]
@property
def filename(self):
"""Return the local filename of the build"""
if self._filename is None:
if os.path.splitext(self.destination)[1]:
# If the filename has been given make use of it
target_file = self.destination
else:
# Otherwise create it from the build details
target_file = os.path.join(self.destination,
self.build_filename(self.binary))
self._filename = os.path.abspath(target_file)
return self._filename
def get_build_info(self):
"""Returns additional build information in subclasses if necessary"""
pass
def build_filename(self, binary):
"""Return the proposed filename with extension for the binary"""
raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
def detect_platform(self):
"""Detect the current platform"""
# For Mac and Linux 32bit we do not need the bits appended
if mozinfo.os == 'mac' or \
(mozinfo.os == 'linux' and mozinfo.bits == 32):
return mozinfo.os
else:
return "%s%d" % (mozinfo.os, mozinfo.bits)
def download(self):
"""Download the specified file"""
def total_seconds(td):
# Keep backward compatibility with Python 2.6 which doesn't have
# this method
if hasattr(td, 'total_seconds'):
return td.total_seconds()
else:
return (td.microseconds +
(td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6
attempt = 0
# Don't re-download the file
if os.path.isfile(os.path.abspath(self.filename)):
self.logger.info("File has already been downloaded: %s" %
(self.filename))
return self.filename
directory = os.path.dirname(self.filename)
if not os.path.isdir(directory):
os.makedirs(directory)
self.logger.info('Downloading from: %s' %
(urllib.unquote(self.url)))
self.logger.info('Saving as: %s' % self.filename)
tmp_file = self.filename + ".part"
while True:
attempt += 1
try:
start_time = datetime.now()
# Enable streaming mode so we can download content in chunks
r = self.session.get(self.url, stream=True)
r.raise_for_status()
content_length = r.headers.get('Content-length')
# ValueError: Value out of range if only total_size given
if content_length:
total_size = int(content_length.strip())
max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE
bytes_downloaded = 0
with open(tmp_file, 'wb') as f:
for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''):
f.write(chunk)
bytes_downloaded += CHUNK_SIZE
t1 = total_seconds(datetime.now() - start_time)
if self.timeout_download and \
t1 >= self.timeout_download:
raise errors.TimeoutError
break
except (requests.exceptions.RequestException, errors.TimeoutError), e:
if tmp_file and os.path.isfile(tmp_file):
os.remove(tmp_file)
if self.retry_attempts > 0:
# Log only if multiple attempts are requested
self.logger.warning('Download failed: "%s"' % str(e))
self.logger.info('Will retry in %s seconds...' %
(self.retry_delay))
time.sleep(self.retry_delay)
self.logger.info("Retrying... (attempt %s)" % attempt)
if attempt >= self.retry_attempts:
raise
time.sleep(self.retry_delay)
os.rename(tmp_file, self.filename)
return self.filename
def show_matching_builds(self, builds):
"""Output the matching builds"""
self.logger.info('Found %s build%s: %s' % (
len(builds),
len(builds) > 1 and 's' or '',
len(builds) > 10 and
' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or
', '.join(builds)))
class DailyScraper(Scraper):
"""Class to download a daily build from the Mozilla server"""
def __init__(self, branch='mozilla-central', build_id=None, date=None,
build_number=None, *args, **kwargs):
self.branch = branch
self.build_id = build_id
self.date = date
self.build_number = build_number
Scraper.__init__(self, *args, **kwargs)
def get_build_info(self):
"""Defines additional build information"""
# Internally we access builds via index
if self.build_number is not None:
self.build_index = int(self.build_number) - 1
else:
self.build_index = None
if self.build_id:
# A build id has been specified. Split up its components so the
# date and time can be extracted:
# '20111212042025' -> '2011-12-12 04:20:25'
self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S')
elif self.date:
# A date (without time) has been specified. Use its value and the
# build index to find the requested build for that day.
try:
self.date = datetime.strptime(self.date, '%Y-%m-%d')
except:
raise ValueError('%s is not a valid date' % self.date)
else:
# If no build id nor date have been specified the latest available
# build of the given branch has to be identified. We also have to
# retrieve the date of the build via its build id.
self.date = self.get_latest_build_date()
self.builds, self.build_index = self.get_build_info_for_date(
self.date, self.build_index)
def get_latest_build_date(self):
""" Returns date of latest available nightly build."""
if self.application not in ('fennec'):
url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch)
else:
url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' %
(self.branch, self.platform))
self.logger.info('Retrieving the build status file from %s' % url)
parser = self._create_directory_parser(url)
parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
if not parser.entries:
message = 'Status file for %s build cannot be found' % \
self.platform_regex
raise errors.NotFoundError(message, url)
# Read status file for the platform, retrieve build id,
# and convert to a date
headers = {'Cache-Control': 'max-age=0'}
r = self.session.get(url + parser.entries[-1], headers=headers)
try:
r.raise_for_status()
return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S')
finally:
r.close()
def is_build_dir(self, folder_name):
"""Return whether or not the given dir contains a build."""
# Cannot move up to base scraper due to parser.entries call in
# get_build_info_for_date (see below)
url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, folder_name)
if self.application in APPLICATIONS_MULTI_LOCALE \
and self.locale != 'multi':
url = '%s/' % urljoin(url, self.locale)
parser = self._create_directory_parser(url)
pattern = re.compile(self.binary_regex, re.IGNORECASE)
for entry in parser.entries:
try:
pattern.match(entry).group()
return True
except:
# No match, continue with next entry
continue
return False
def get_build_info_for_date(self, date, build_index=None):
url = urljoin(self.base_url, self.monthly_build_list_regex)
has_time = date and date.time()
self.logger.info('Retrieving list of builds from %s' % url)
parser = self._create_directory_parser(url)
regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % {
'DATE': date.strftime('%Y-%m-%d'),
'BRANCH': self.branch,
# ensure to select the correct subfolder for localized builds
'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?',
'PLATFORM': '' if self.application not in (
'fennec') else '-' + self.platform
}
parser.entries = parser.filter(regex)
parser.entries = parser.filter(self.is_build_dir)
if has_time:
# If a time is included in the date, use it to determine the
# build's index
regex = r'.*%s.*' % date.strftime('%H-%M-%S')
parser.entries = parser.filter(regex)
if not parser.entries:
date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d'
message = 'Folder for builds on %s has not been found' % \
self.date.strftime(date_format)
raise errors.NotFoundError(message, url)
# If no index has been given, set it to the last build of the day.
self.show_matching_builds(parser.entries)
# If no index has been given, set it to the last build of the day.
if build_index is None:
# Find the most recent non-empty entry.
build_index = len(parser.entries)
for build in reversed(parser.entries):
build_index -= 1
if not build_index or self.is_build_dir(build):
break
self.logger.info('Selected build: %s' % parser.entries[build_index])
return (parser.entries, build_index)
@property
def binary_regex(self):
"""Return the regex for the binary"""
regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
regex_suffix = {'android-api-9': r'\.%(EXT)s$',
'android-api-11': r'\.%(EXT)s$',
'android-x86': r'\.%(EXT)s$',
'linux': r'\.%(EXT)s$',
'linux64': r'\.%(EXT)s$',
'mac': r'\.%(EXT)s$',
'mac64': r'\.%(EXT)s$',
'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
regex = regex_base_name + regex_suffix[self.platform]
return regex % {'APP': self.application,
'LOCALE': self.locale,
'PLATFORM': self.platform_regex,
'EXT': self.extension,
'STUB': '-stub' if self.is_stub_installer else ''}
def build_filename(self, binary):
"""Return the proposed filename with extension for the binary"""
try:
# Get exact timestamp of the build to build the local file name
folder = self.builds[self.build_index]
timestamp = re.search('([\d\-]+)-\D.*', folder).group(1)
except:
# If it's not available use the build's date
timestamp = self.date.strftime('%Y-%m-%d')
return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % {
'TIMESTAMP': timestamp,
'BRANCH': self.branch,
'NAME': binary}
@property
def monthly_build_list_regex(self):
"""Return the regex for the folder containing builds of a month."""
# Regex for possible builds for the given date
return r'nightly/%(YEAR)s/%(MONTH)s/' % {
'YEAR': self.date.year,
'MONTH': str(self.date.month).zfill(2)}
@property
def path_regex(self):
"""Return the regex for the path to the build folder"""
try:
path = '%s/' % urljoin(self.monthly_build_list_regex,
self.builds[self.build_index])
if self.application in APPLICATIONS_MULTI_LOCALE \
and self.locale != 'multi':
path = '%s/' % urljoin(path, self.locale)
return path
except:
folder = urljoin(self.base_url, self.monthly_build_list_regex)
raise errors.NotFoundError("Specified sub folder cannot be found",
folder)
class DirectScraper(Scraper):
"""Class to download a file from a specified URL"""
def __init__(self, url, *args, **kwargs):
self._url = url
Scraper.__init__(self, *args, **kwargs)
@property
def filename(self):
if os.path.splitext(self.destination)[1]:
# If the filename has been given make use of it
target_file = self.destination
else:
# Otherwise determine it from the url.
parsed_url = urlparse(self.url)
source_filename = (parsed_url.path.rpartition('/')[-1] or
parsed_url.hostname)
target_file = os.path.join(self.destination, source_filename)
return os.path.abspath(target_file)
@property
def url(self):
return self._url
class ReleaseScraper(Scraper):
"""Class to download a release build from the Mozilla server"""
def __init__(self, version, *args, **kwargs):
self.version = version
Scraper.__init__(self, *args, **kwargs)
@property
def binary_regex(self):
"""Return the regex for the binary"""
regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$',
'linux64': r'^%(APP)s-.*\.%(EXT)s$',
'mac': r'^%(APP)s.*\.%(EXT)s$',
'mac64': r'^%(APP)s.*\.%(EXT)s$',
'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$',
'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'}
return regex[self.platform] % {
'APP': self.application,
'EXT': self.extension,
'STUB': 'Stub' if self.is_stub_installer else ''}
@property
def path_regex(self):
"""Return the regex for the path to the build folder"""
regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/'
return regex % {'LOCALE': self.locale,
'PLATFORM': self.platform_regex,
'VERSION': self.version}
@property
def platform_regex(self):
"""Return the platform fragment of the URL"""
if self.platform == 'win64':
return self.platform
return PLATFORM_FRAGMENTS[self.platform]
def build_filename(self, binary):
"""Return the proposed filename with extension for the binary"""
template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \
'.%(EXT)s'
return template % {'APP': self.application,
'VERSION': self.version,
'LOCALE': self.locale,
'PLATFORM': self.platform,
'STUB': '-stub' if self.is_stub_installer else '',
'EXT': self.extension}
class ReleaseCandidateScraper(ReleaseScraper):
"""Class to download a release candidate build from the Mozilla server"""
def __init__(self, version, build_number=None, *args, **kwargs):
self.version = version
self.build_number = build_number
Scraper.__init__(self, *args, **kwargs)
def get_build_info(self):
"""Defines additional build information"""
# Internally we access builds via index
url = urljoin(self.base_url, self.candidate_build_list_regex)
self.logger.info('Retrieving list of candidate builds from %s' % url)
parser = self._create_directory_parser(url)
if not parser.entries:
message = 'Folder for specific candidate builds at %s has not' \
'been found' % url
raise errors.NotFoundError(message, url)
self.show_matching_builds(parser.entries)
self.builds = parser.entries
self.build_index = len(parser.entries) - 1
if self.build_number and \
('build%s' % self.build_number) in self.builds:
self.builds = ['build%s' % self.build_number]
self.build_index = 0
self.logger.info('Selected build: build%s' % self.build_number)
else:
self.logger.info('Selected build: build%d' %
(self.build_index + 1))
@property
def candidate_build_list_regex(self):
"""Return the regex for the folder which contains the builds of
a candidate build."""
# Regex for possible builds for the given date
return r'candidates/%(VERSION)s-candidates/' % {
'VERSION': self.version}
@property
def path_regex(self):
"""Return the regex for the path to the build folder"""
regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/'
return regex % {'PREFIX': self.candidate_build_list_regex,
'BUILD': self.builds[self.build_index],
'LOCALE': self.locale,
'PLATFORM': self.platform_regex}
@property
def platform_regex(self):
"""Return the platform fragment of the URL"""
if self.platform == 'win64':
return self.platform
return PLATFORM_FRAGMENTS[self.platform]
def build_filename(self, binary):
"""Return the proposed filename with extension for the binary"""
template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \
'%(PLATFORM)s%(STUB)s.%(EXT)s'
return template % {'APP': self.application,
'VERSION': self.version,
'BUILD': self.builds[self.build_index],
'LOCALE': self.locale,
'PLATFORM': self.platform,
'STUB': '-stub' if self.is_stub_installer else '',
'EXT': self.extension}
def download(self):
"""Download the specified file"""
try:
# Try to download the signed candidate build
Scraper.download(self)
except errors.NotFoundError, e:
self.logger.exception(str(e))
class TinderboxScraper(Scraper):
"""Class to download a tinderbox build from the Mozilla server.
There are two ways to specify a unique build:
1. If the date (%Y-%m-%d) is given and build_number is given where
the build_number is the index of the build on the date
2. If the build timestamp (UNIX) is given, and matches a specific build.
"""
def __init__(self, branch='mozilla-central', build_number=None, date=None,
debug_build=False, *args, **kwargs):
self.branch = branch
self.build_number = build_number
self.debug_build = debug_build
self.date = date
self.timestamp = None
# Currently any time in RelEng is based on the Pacific time zone.
self.timezone = PacificTimezone()
Scraper.__init__(self, *args, **kwargs)
def get_build_info(self):
"Defines additional build information"
# Internally we access builds via index
if self.build_number is not None:
self.build_index = int(self.build_number) - 1
else:
self.build_index = None
if self.date is not None:
try:
# date is provided in the format 2013-07-23
self.date = datetime.strptime(self.date, '%Y-%m-%d')
except:
try:
# date is provided as a unix timestamp
datetime.fromtimestamp(float(self.date))
self.timestamp = self.date
except:
raise ValueError('%s is not a valid date' % self.date)
self.locale_build = self.locale != 'en-US'
# For localized builds we do not have to retrieve the list of builds
# because only the last build is available
if not self.locale_build:
self.builds, self.build_index = self.get_build_info_for_index(
self.build_index)
@property
def binary_regex(self):
"""Return the regex for the binary"""
regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
regex_suffix = {'linux': r'.*\.%(EXT)s$',
'linux64': r'.*\.%(EXT)s$',
'mac': r'.*\.%(EXT)s$',
'mac64': r'.*\.%(EXT)s$',
'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
regex = regex_base_name + regex_suffix[self.platform]
return regex % {'APP': self.application,
'LOCALE': self.locale,
'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
'STUB': '-stub' if self.is_stub_installer else '',
'EXT': self.extension}
def build_filename(self, binary):
"""Return the proposed filename with extension for the binary"""
return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % {
'TIMESTAMP': self.timestamp + '-' if self.timestamp else '',
'BRANCH': self.branch,
'DEBUG': '-debug' if self.debug_build else '',
'NAME': binary}
@property
def build_list_regex(self):
"""Return the regex for the folder which contains the list of builds"""
regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/'
return regex % {
'BRANCH': self.branch,
'PLATFORM': '' if self.locale_build else self.platform_regex,
'L10N': 'l10n' if self.locale_build else '',
'DEBUG': '-debug' if self.debug_build else ''}
def date_matches(self, timestamp):
"""
Determines whether the timestamp date is equal to the argument date
"""
if self.date is None:
return False
timestamp = datetime.fromtimestamp(float(timestamp), self.timezone)
if self.date.date() == timestamp.date():
return True
return False
def detect_platform(self):
"""Detect the current platform"""
platform = Scraper.detect_platform(self)
# On OS X we have to special case the platform detection code and
# fallback to 64 bit builds for the en-US locale
if mozinfo.os == 'mac' and self.locale == 'en-US' and \
mozinfo.bits == 64:
platform = "%s%d" % (mozinfo.os, mozinfo.bits)
return platform
def is_build_dir(self, folder_name):
"""Return whether or not the given dir contains a build."""
# Cannot move up to base scraper due to parser.entries call in
# get_build_info_for_index (see below)
url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name)
if self.application in APPLICATIONS_MULTI_LOCALE \
and self.locale != 'multi':
url = '%s/' % urljoin(url, self.locale)
parser = self._create_directory_parser(url)
pattern = re.compile(self.binary_regex, re.IGNORECASE)
for entry in parser.entries:
try:
pattern.match(entry).group()
return True
except:
# No match, continue with next entry
continue
return False
def get_build_info_for_index(self, build_index=None):
url = urljoin(self.base_url, self.build_list_regex)
self.logger.info('Retrieving list of builds from %s' % url)
parser = self._create_directory_parser(url)
parser.entries = parser.filter(r'^\d+$')
if self.timestamp:
# If a timestamp is given, retrieve the folder with the timestamp
# as name
parser.entries = self.timestamp in parser.entries and \
[self.timestamp]
elif self.date:
# If date is given, retrieve the subset of builds on that date
parser.entries = filter(self.date_matches, parser.entries)
if not parser.entries:
message = 'No builds have been found'
raise errors.NotFoundError(message, url)
self.show_matching_builds(parser.entries)
# If no index has been given, set it to the last build of the day.
if build_index is None:
# Find the most recent non-empty entry.
build_index = len(parser.entries)
for build in reversed(parser.entries):
build_index -= 1
if not build_index or self.is_build_dir(build):
break
self.logger.info('Selected build: %s' % parser.entries[build_index])
return (parser.entries, build_index)
@property
def path_regex(self):
"""Return the regex for the path to the build folder"""
if self.locale_build:
return self.build_list_regex
return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_index])
@property
def platform_regex(self):
"""Return the platform fragment of the URL"""
PLATFORM_FRAGMENTS = {'linux': 'linux',
'linux64': 'linux64',
'mac': 'macosx64',
'mac64': 'macosx64',
'win32': 'win32',
'win64': 'win64'}
return PLATFORM_FRAGMENTS[self.platform]
class TryScraper(Scraper):
"Class to download a try build from the Mozilla server."
def __init__(self, changeset=None, debug_build=False, *args, **kwargs):
self.debug_build = debug_build
self.changeset = changeset
Scraper.__init__(self, *args, **kwargs)
def get_build_info(self):
"Defines additional build information"
self.builds, self.build_index = self.get_build_info_for_index()
@property
def binary_regex(self):
"""Return the regex for the binary"""
regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
regex_suffix = {'linux': r'.*\.%(EXT)s$',
'linux64': r'.*\.%(EXT)s$',
'mac': r'.*\.%(EXT)s$',
'mac64': r'.*\.%(EXT)s$',
'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$',
'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'}
regex = regex_base_name + regex_suffix[self.platform]
return regex % {'APP': self.application,
'LOCALE': self.locale,
'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
'STUB': '-stub' if self.is_stub_installer else '',
'EXT': self.extension}
def build_filename(self, binary):
"""Return the proposed filename with extension for the binary"""
return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % {
'CHANGESET': self.changeset,
'DEBUG': '-debug' if self.debug_build else '',
'NAME': binary}
@property
def build_list_regex(self):
"""Return the regex for the folder which contains the list of builds"""
return 'try-builds/'
def detect_platform(self):
"""Detect the current platform"""
platform = Scraper.detect_platform(self)
# On OS X we have to special case the platform detection code and
# fallback to 64 bit builds for the en-US locale
if mozinfo.os == 'mac' and self.locale == 'en-US' and \
mozinfo.bits == 64:
platform = "%s%d" % (mozinfo.os, mozinfo.bits)
return platform
def get_build_info_for_index(self, build_index=None):
url = urljoin(self.base_url, self.build_list_regex)
self.logger.info('Retrieving list of builds from %s' % url)
parser = self._create_directory_parser(url)
parser.entries = parser.filter('.*-%s$' % self.changeset)
if not parser.entries:
raise errors.NotFoundError('No builds have been found', url)
self.show_matching_builds(parser.entries)
self.logger.info('Selected build: %s' % parser.entries[0])
return (parser.entries, 0)
@property
def path_regex(self):
"""Return the regex for the path to the build folder"""
build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % {
'PLATFORM': self.platform_regex,
'DEBUG': '-debug' if self.debug_build else ''}
return urljoin(self.build_list_regex,
self.builds[self.build_index],
build_dir)
@property
def platform_regex(self):
"""Return the platform fragment of the URL"""
PLATFORM_FRAGMENTS = {'linux': 'linux',
'linux64': 'linux64',
'mac': 'macosx64',
'mac64': 'macosx64',
'win32': 'win32',
'win64': 'win64'}
return PLATFORM_FRAGMENTS[self.platform]