| # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import base64 |
| import xml.dom.minidom as minidom |
| from xml.parsers.expat import ExpatError |
| |
| import crash_utils |
| from repository_parser_interface import ParserInterface |
| |
| FILE_CHANGE_TYPE_MAP = { |
| 'add': 'A', |
| 'copy': 'C', |
| 'delete': 'D', |
| 'modify': 'M', |
| 'rename': 'R' |
| } |
| |
| |
| def _ConvertToFileChangeType(file_action): |
| # TODO(stgao): verify impact on code that checks the file change type. |
| return file_action[0].upper() |
| |
| |
| class GitParser(ParserInterface): |
| """Parser for Git repository in googlesource. |
| |
| Attributes: |
| parsed_deps: A map from component path to its repository name, regression, |
| etc. |
| url_parts_map: A map from url type to its url parts. This parts are added |
| the base url to form different urls. |
| """ |
| |
| def __init__(self, parsed_deps, url_parts_map): |
| self.component_to_url_map = parsed_deps |
| self.url_parts_map = url_parts_map |
| |
| def ParseChangelog(self, component_path, range_start, range_end): |
| file_to_revision_map = {} |
| revision_map = {} |
| base_url = self.component_to_url_map[component_path]['repository'] |
| changelog_url = base_url + self.url_parts_map['changelog_url'] |
| revision_url = base_url + self.url_parts_map['revision_url'] |
| |
| # Retrieve data from the url, return empty maps if fails. Html url is a\ |
| # url where the changelog can be parsed from html. |
| url = changelog_url % (range_start, range_end) |
| html_url = url + '?pretty=fuller' |
| response = crash_utils.GetDataFromURL(html_url) |
| if not response: |
| return (revision_map, file_to_revision_map) |
| |
| # Parse xml out of the returned string. If it failes, Try parsing |
| # from JSON objects. |
| try: |
| dom = minidom.parseString(response) |
| except ExpatError: |
| self.ParseChangelogFromJSON(range_start, range_end, changelog_url, |
| revision_url, revision_map, |
| file_to_revision_map) |
| return (revision_map, file_to_revision_map) |
| |
| # The revisions information are in from the third divs to the second |
| # to last one. |
| divs = dom.getElementsByTagName('div')[2:-1] |
| pres = dom.getElementsByTagName('pre') |
| uls = dom.getElementsByTagName('ul') |
| |
| # Divs, pres and uls each contain revision information for one CL, so |
| # they should have same length. |
| if not divs or len(divs) != len(pres) or len(pres) != len(uls): |
| self.ParseChangelogFromJSON(range_start, range_end, changelog_url, |
| revision_url, revision_map, |
| file_to_revision_map) |
| return (revision_map, file_to_revision_map) |
| |
| # Iterate through divs and parse revisions |
| for (div, pre, ul) in zip(divs, pres, uls): |
| # Create new revision object for each revision. |
| revision = {} |
| |
| # There must be three <tr>s. If not, this page is wrong. |
| trs = div.getElementsByTagName('tr') |
| if len(trs) != 3: |
| continue |
| |
| # Retrieve git hash. |
| githash = trs[0].getElementsByTagName('a')[0].firstChild.nodeValue |
| |
| # Retrieve and set author. |
| author = trs[1].getElementsByTagName( |
| 'td')[0].firstChild.nodeValue.split('<')[0] |
| revision['author'] = author |
| revision['time'] = trs[1].getElementsByTagName( |
| 'td')[1].firstChild.nodeValue |
| |
| # Retrive and set message. |
| revision['message'] = pre.firstChild.nodeValue |
| |
| # Set url of this CL. |
| revision_url_part = self.url_parts_map['revision_url'] % githash |
| revision['url'] = base_url + revision_url_part |
| |
| # Go through changed files, they are in li. |
| lis = ul.getElementsByTagName('li') |
| for li in lis: |
| # Retrieve path and action of the changed file |
| file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue |
| file_change_type = li.getElementsByTagName('span')[ |
| 0].getAttribute('class') |
| |
| # Normalize file action so that it is same as SVN parser. |
| file_change_type = _ConvertToFileChangeType(file_change_type) |
| |
| # Add the changed file to the map. |
| if file_path not in file_to_revision_map: |
| file_to_revision_map[file_path] = [] |
| file_to_revision_map[file_path].append((githash, file_change_type)) |
| |
| # Add this revision object to the map. |
| revision_map[githash] = revision |
| |
| # Parse one revision for the start range, because googlesource does not |
| # include the start of the range. |
| self.ParseRevision(revision_url, range_start, revision_map, |
| file_to_revision_map) |
| |
| return (revision_map, file_to_revision_map) |
| |
| def ParseChangelogFromJSON(self, range_start, range_end, changelog_url, |
| revision_url, revision_map, file_to_revision_map): |
| """Parses changelog by going over the JSON file. |
| |
| Args: |
| range_start: Starting range of the regression. |
| range_end: Ending range of the regression. |
| changelog_url: The url to retrieve changelog from. |
| revision_url: The url to retrieve individual revision from. |
| revision_map: A map from a git hash number to its revision information. |
| file_to_revision_map: A map from file to a git hash in which it occurs. |
| """ |
| # Compute URLs from given range, and retrieves changelog. Stop if it fails. |
| changelog_url %= (range_start, range_end) |
| json_url = changelog_url + '?format=json' |
| response = crash_utils.GetDataFromURL(json_url) |
| if not response: |
| return |
| |
| # Parse changelog from the returned object. The returned string should |
| # start with ")}]'\n", so start from the 6th character. |
| revisions = crash_utils.LoadJSON(response[5:]) |
| if not revisions: |
| return |
| |
| # Parse individual revision in the log. |
| for revision in revisions['log']: |
| githash = revision['commit'] |
| self.ParseRevision(revision_url, githash, revision_map, |
| file_to_revision_map) |
| |
| # Parse the revision with range_start, because googlesource ignores |
| # that one. |
| self.ParseRevision(revision_url, range_start, revision_map, |
| file_to_revision_map) |
| |
| def ParseRevision(self, revision_url, githash, revision_map, |
| file_to_revision_map): |
| |
| # Retrieve data from the URL, return if it fails. |
| url = revision_url % githash |
| response = crash_utils.GetDataFromURL(url + '?format=json') |
| if not response: |
| return |
| |
| # Load JSON object from the string. If it fails, terminate the function. |
| json_revision = crash_utils.LoadJSON(response[5:]) |
| if not json_revision: |
| return |
| |
| # Create a map representing object and get githash from the JSON object. |
| revision = {} |
| githash = json_revision['commit'] |
| |
| # Set author, message and URL of this CL. |
| revision['author'] = json_revision['author']['name'] |
| revision['time'] = json_revision['author']['time'] |
| revision['message'] = json_revision['message'] |
| revision['url'] = url |
| |
| # Iterate through the changed files. |
| for diff in json_revision['tree_diff']: |
| file_path = diff['new_path'] |
| file_change_type = diff['type'] |
| |
| # Normalize file action so that it fits with svn_repository_parser. |
| file_change_type = _ConvertToFileChangeType(file_change_type) |
| |
| # Add the file to the map. |
| if file_path not in file_to_revision_map: |
| file_to_revision_map[file_path] = [] |
| file_to_revision_map[file_path].append((githash, file_change_type)) |
| |
| # Add this CL to the map. |
| revision_map[githash] = revision |
| |
| return |
| |
| def ParseLineDiff(self, path, component, file_change_type, githash): |
| changed_line_numbers = [] |
| changed_line_contents = [] |
| base_url = self.component_to_url_map[component]['repository'] |
| backup_url = (base_url + self.url_parts_map['revision_url']) % githash |
| |
| # If the file is added (not modified), treat it as if it is not changed. |
| if file_change_type in ('A', 'C', 'R'): |
| # TODO(stgao): Maybe return whole file change for Add, Rename, and Copy? |
| return (backup_url, changed_line_numbers, changed_line_contents) |
| |
| # Retrieves the diff data from URL, and if it fails, return emptry lines. |
| url = (base_url + self.url_parts_map['diff_url']) % (githash, path) |
| data = crash_utils.GetDataFromURL(url + '?format=text') |
| if not data: |
| return (backup_url, changed_line_numbers, changed_line_contents) |
| |
| # Decode the returned object to line diff info |
| diff = base64.b64decode(data).splitlines() |
| |
| # Iterate through the lines in diff. Set current line to -1 so that we know |
| # that current line is part of the diff chunk. |
| current_line = -1 |
| for line in diff: |
| line = line.strip() |
| |
| # If line starts with @@, a new chunk starts. |
| if line.startswith('@@'): |
| current_line = int(line.split('+')[1].split(',')[0]) |
| |
| # If we are in a chunk. |
| elif current_line != -1: |
| # If line is either added or modified. |
| if line.startswith('+'): |
| changed_line_numbers.append(current_line) |
| changed_line_contents.append(line[2:]) |
| |
| # Do not increment current line if the change is 'delete'. |
| if not line.startswith('-'): |
| current_line += 1 |
| |
| # Return url without '?format=json' |
| return (url, changed_line_numbers, changed_line_contents) |
| |
| def ParseBlameInfo(self, component, file_path, line, revision): |
| base_url = self.component_to_url_map[component]['repository'] |
| |
| # Retrieve blame JSON file from googlesource. If it fails, return None. |
| url_part = self.url_parts_map['blame_url'] % (revision, file_path) |
| blame_url = base_url + url_part |
| json_string = crash_utils.GetDataFromURL(blame_url) |
| if not json_string: |
| return |
| |
| # Parse JSON object from the string. The returned string should |
| # start with ")}]'\n", so start from the 6th character. |
| annotation = crash_utils.LoadJSON(json_string[5:]) |
| if not annotation: |
| return |
| |
| # Go through the regions, which is a list of consecutive lines with same |
| # author/revision. |
| for blame_line in annotation['regions']: |
| start = blame_line['start'] |
| count = blame_line['count'] |
| |
| # For each region, check if the line we want the blame info of is in this |
| # region. |
| if start <= line and line <= start + count - 1: |
| # If we are in the right region, get the information from the line. |
| revision = blame_line['commit'] |
| author = blame_line['author']['name'] |
| revision_url_parts = self.url_parts_map['revision_url'] % revision |
| revision_url = base_url + revision_url_parts |
| # TODO(jeun): Add a way to get content from JSON object. |
| content = None |
| |
| (revision_info, _) = self.ParseChangelog(component, revision, revision) |
| message = revision_info[revision]['message'] |
| time = revision_info[revision]['time'] |
| return (content, revision, author, revision_url, message, time) |
| |
| # Return none if the region does not exist. |
| return None |