blob: 478dccbf8df52cc6e4651fbf66410b489f8537d5 [file] [log] [blame]
#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Code for representing and manipulating handlers in app.yaml.
App.yaml requires developers to list handlers - specifications for how URL
requests in their app are handled. This module contains classes for
representing handlers, as well as functions for creating handlers from
specifications in appengine-web.xml and web.xml, manipulating and matching
paths, and ordering them correctly so that the yaml file preserves the semantics
of the user-specified xml.
In this module:
Handler: Ancestor class that provides pattern matching and other utilities.
SimpleHandler: A representation of path handling specified in XML - a path
along with properties detailing how it is handled.
OverlappedHandler: A representation of combinations of paths specified by
users in Xml. OverlappedHandlers combine properties, such as security
settings, from the SimpleHandlers that they are made up of.
GetOrderedIntersection: Returns an ordered list of Handlers that can be
written directly to Yaml.
"""
import fnmatch
import itertools
import re
class Handler(object):
"""Ancestor class for Handler manipulation. Patterns are globs.
(http://en.wikipedia.org/wiki/Glob_(programming)).
"""
ALL_PROPERTIES = [
'expiration',
'http_headers',
'required_role',
'transport_guarantee',
'type',
'welcome'
]
def __init__(self, pattern):
self.pattern = pattern
def _GetPattern(self):
return self._pattern
def _SetPattern(self, the_pattern):
self._pattern = the_pattern
self._regex = re.compile(re.escape(the_pattern).replace('\\*', '.*') + '$')
self.is_literal = '*' not in the_pattern
pattern = property(_GetPattern, _SetPattern)
@property
def regex(self):
return self._regex
def Regexify(self):
"""Returns a regex-looking string to write to Yaml."""
return self.pattern.replace('.', '\\.').replace('*', '.*')
def MatchesString(self, pattern_str):
"""Returns true if input path string is matched by glob pattern."""
return self._regex.match(pattern_str) is not None
def MatchesAll(self, other_glob):
"""Returns True if self matches everything other_glob matches."""
return self.MatchesString(other_glob.pattern)
def HasMoreSpecificPatternThan(self, other_handler):
"""Returns True if self is more specific than other_handler.
Priority in determining specificity is first determined by literal-ness,
second by length. This is according to the Java servlet spec for
mapping URL paths.
Args:
other_handler: another handler to compare against.
Returns:
True if self.pattern is a literal and other_handler.pattern is not,
False if vice versa, and otherwise True if self.pattern is longer.
"""
if self.is_literal != other_handler.is_literal:
return self.is_literal
return len(self.pattern) > len(other_handler.pattern)
def __eq__(self, other_handler):
return (isinstance(other_handler, Handler) and
self.__dict__ == other_handler.__dict__)
def IsFullyHandledBy(self, other_handler):
"""Returns True if self specifies something unique.
For example, If we have a Handler with pattern "foo*bar"
which has properties {'type': 'static'}, and other_handler
has pattern "foo*" with the same properties, then
other_handler does everything that self does.
Args:
other_handler: other handler to be matched against
Returns:
Boolean value of whether other_handler fully handles self.
"""
return (other_handler.MatchesAll(self) and
self._PropertiesMatch(other_handler))
def _PropertiesMatch(self, other):
"""Returns True if other.properties is superset of self.properties."""
for prop in Handler.ALL_PROPERTIES:
if self.GetProperty(prop) not in (None, other.GetProperty(prop)):
return False
return True
def _MakeHandlerList(*pattern_strings):
return [SimpleHandler(a) for a in pattern_strings]
class SimpleHandler(Handler):
"""Subclass of Handler which includes user-defined settings with urls.
SimpleHandlers should be treated as immutable.
"""
def __init__(self, pattern, properties=None):
super(SimpleHandler, self).__init__(pattern)
if properties:
self.properties = properties
else:
self.properties = {}
def __hash__(self):
return hash((self.pattern, tuple(sorted(self.properties.items()))))
def GetProperty(self, prop, default=None):
return self.properties.get(prop, default)
def CreateOverlappedHandler(self):
"""Creates a Combined Handler with self's pattern and self as a child."""
return OverlappedHandler(self.pattern, matchers=[self])
class OverlappedHandler(Handler):
"""Subclass of Handler which allows for the combination of SimpleHandlers.
An intuitive way to think about globbed patterns is as sets - for example, the
pattern "admin/*" is the set of all paths that are in the admin/ directory,
and the pattern "*.txt" is the set of all paths to text files. An
OverlappedHandler is designed to describe the intersection of the sets
of paths - ie the set of paths that is matched by EVERY one its
handler patterns.
In the Xml files, paths are specified in different places - in servlet
patterns, in static files includes, in security constraint specifications,
etc. There is often some overlap between the paths that are specified by
these patterns. Since App.Yaml does not have separate places to specify how
different paths are handled, but rather just lists paths along with a bunch
of ways that the paths is handled, we need to make sure that these properties
for various paths are being combined at some point in the translation process.
Thus an OverlappedHandler holds a list of "matchers" - ie. handlers with more
general patterns, to choose properties from. OverlappedHandlers do not
explicitly specify properties but rather choose the value from the most
specific "matcher" with the value of that property specified. The matchers
are SimpleHandlers.
Attributes:
pattern: inherited from Handler.
matchers: A list of SimpleHandlers. Matchers are handlers which happen to
match OverlappedHandler's pattern and whose properties are thus necessary
to track in order to make sure that OverlappedHandler's pattern is handled
correctly.
"""
def __init__(self, pattern, matchers=()):
super(OverlappedHandler, self).__init__(pattern)
self.matchers = []
for sub_handler in matchers:
self.AddMatchingHandler(sub_handler)
def GetProperty(self, prop, default=None):
"""Returns the property value of matcher with most specific pattern."""
largest_handler = None
prop_value = default
for sub_handler in self.matchers:
if sub_handler.GetProperty(prop) is not None:
if (not largest_handler or
sub_handler.HasMoreSpecificPatternThan(largest_handler)):
largest_handler = sub_handler
prop_value = sub_handler.GetProperty(prop)
return prop_value
def __eq__(self, other_handler):
return (isinstance(other_handler, OverlappedHandler) and
self.pattern == other_handler.pattern and
set(self.matchers) == set(other_handler.matchers))
def AddMatchingHandler(self, matcher):
"""Flattens the handler if it is overlapped and adds to matchers."""
if isinstance(matcher, SimpleHandler):
self.matchers.append(matcher)
else:
self.matchers.extend(matcher.matchers)
def GetOrderedIntersection(handler_list):
"""Implements algorithm for combining and reordering Handlers.
GetOrderedIntersection performs the heavy lifting of converting a randomly
ordered list of Handlers (globbed patterns, each with various potentially
conflicting properties attached), into an ordered list of handlers with
property values resolved.
The purpose of this process is to convert the Web.xml URL mapping scheme to
that of app.yaml. In Web.xml the most specific path, according to
literal-ness and length, is chosen. In app.yaml the first listed matching
path is chosen. Thus to preserve user preferences through the translation
process we order the patterns from specific to general.
For example, if three handlers are given as input (in any order):
"/admin/*" (security=admin)
"/*.png" (type=static)
"*" (type=dynamic, security=required)
we want to get this ordered list as output:
1. "/admin/*.png" (security=admin, type=static)
2. "/admin/*" (security=admin, type=dynamic)
3. "/*.png" (security=required, type=static)
4. "*" (type=dynamic, security=required).
so that the properties of any given path are those of the longest matching
path. The SimpleHandler and OverlappedHandler classes provide the logic for
attaching globbed patterns to the right properties and resolving potential
property value conflicts.
Args:
handler_list: List of SimpleHandlers in arbitrary order.
Returns:
An ordered list of OverlappedHandlers and SimpleHandlers. See the above
example for what this would look like.
"""
results = _Intersect(handler_list)
results = sorted(results, key=lambda h: h.pattern)
_ReorderHandlers(results)
_GivePropertiesFromGeneralToSpecific(results)
return _RemoveRedundantHandlers(results)
def _RemoveRedundantHandlers(handler_list):
"""Removes duplicated or unnecessary handlers from the list.
If a handler's pattern and functionality are fully matched by another handler
with a more general pattern, we do not need the first handler. At the same
time, we remove duplicates.
Args:
handler_list: list of ordered handlers with possibly redundant entries.
Returns:
new list which contains entries of handler_list, except redundant ones.
"""
no_duplicates = []
patterns_found_so_far = set()
for i in xrange(len(handler_list)):
current_handler = handler_list[i]
matched_by_later = False
for j in xrange(i + 1, len(handler_list)):
if current_handler.IsFullyHandledBy(handler_list[j]):
matched_by_later = True
break
if (not matched_by_later and
current_handler.pattern not in patterns_found_so_far):
no_duplicates.append(current_handler)
patterns_found_so_far.add(current_handler.pattern)
return no_duplicates
def _ReorderHandlers(handler_list):
"""Reorders handlers from specific to general for writing to yaml file.
This is a topological sort - ie. it makes sure that elements related to
each other are ordered correctly. In this case, we want to make sure that
any Handler with a pattern that matches all of the Handlers of another pattern
occurs later. Thus, we want to make sure that "foo*" occurs after "foo*bar",
but it does not matter how it is ordered relative to "*baz", since they have
an empty intersection of patterns.
The problem with using Python's built-in sorted is that it relies on the
class's less-than operator. We want an ordering such that
(handler1 < handler2) iff (not handler1.MatchesAll(handler2))
Then, since "foo*" and "*baz" do not contain each other, foo* < *baz == True
and *baz < foo* == True. This is a problem because Python's sorted does not
explicitly compare every pair of elements, but operates under the assumption
that if a < b and b < c, then a < c. Therefore, if we have
a = "foo*bar", b = "*baz", c = "foo*"
then a < b == True and b < c == True, so a < c is assumed to be True. This
often leads to wrong orderings.
Therefore, this function performs a topological sort
(http://en.wikipedia.org/wiki/Topological_sorting), reordering only those
patterns where one matches all of the other.
This is an in-place sort.
Args:
handler_list: Unordered list of handlers.
"""
for i, j in itertools.combinations(xrange(len(handler_list)), 2):
if handler_list[i].MatchesAll(handler_list[j]):
handler_list[i], handler_list[j] = handler_list[j], handler_list[i]
def _GivePropertiesFromGeneralToSpecific(handler_list):
"""Makes sure that handlers have all properties of more general ones.
Ex. Since "*" matches everything "admin/*" matches, we want everything
matched by "admin/*" to have all the properties specified to "*".
Therefore we give properties from the "*" handler to the "admin/*" handler.
If the "*" handler is a SimpleHandler, it carries its own properties, so it
becomes a child of the "admin/*" handler. Otherwise, its properties are
define by its children, so its children are copied to the "admin/*"
handler.
This is an in-place mutation of the list.
Args:
handler_list: List of ordered Handlers.
"""
for i, j in itertools.combinations(xrange(len(handler_list)), 2):
if handler_list[j].MatchesAll(handler_list[i]):
if isinstance(handler_list[i], SimpleHandler):
handler_list[i] = handler_list[i].CreateOverlappedHandler()
handler_list[i].AddMatchingHandler(handler_list[j])
def _Intersect(handler_list):
"""Returns an unordered list of all possible intersections of handlers."""
if not handler_list:
return set()
handlers = set([handler_list[0]])
for input_handler in handler_list[1:]:
new_handlers = set()
for g in handlers:
new_handlers |= _IntersectTwoHandlers(input_handler, g)
handlers = new_handlers
return list(handlers)
def _IntersectTwoHandlers(first_handler, second_handler):
"""Returns intersections of first_handler and second_handler patterns."""
shared_prefix = _SharedPrefix(first_handler.pattern, second_handler.pattern)
if shared_prefix:
return _HandleCommonPrefix(first_handler, second_handler, shared_prefix)
shared_suffix = _SharedSuffix(first_handler.pattern, second_handler.pattern)
if shared_suffix:
return _HandleCommonSuffix(first_handler, second_handler, shared_suffix)
handler_set = set()
handler_set |= _HandleWildcardCases(first_handler, second_handler)
handler_set |= _HandleWildcardCases(second_handler, first_handler)
handler_set |= set([first_handler, second_handler])
return handler_set
def _HandleWildcardCases(first_handler, second_handler):
"""Handle cases with trailing and leading wildcards.
This function finds the set of intersections of two handlers where one has a
leading wildcard (eg. *foo) in its pattern and at least one has a trailing
wildcard (eg. baz*) in its pattern. The arguments are not symmetric.
Args:
first_handler: A SimpleHandler instance
second_handler: A SimpleHandler instance
Returns:
A set of intersection patterns of the two Handlers. Example: If the
pattern of first_handler is abc* and that of the second is *xyz, we return
the intersection of the patterns, abc*xyz. Find more examples in the inline
comments.
"""
merged_handlers = set()
if len(first_handler.pattern) <= 1 or len(second_handler.pattern) <= 1:
return merged_handlers
if (first_handler.pattern[-1], second_handler.pattern[0]) != ('*', '*'):
return merged_handlers
first_no_star = first_handler.pattern[:-1]
merged_handlers.add(SimpleHandler(first_no_star + second_handler.pattern))
if second_handler.MatchesString(first_no_star):
merged_handlers.add(SimpleHandler(first_no_star))
return merged_handlers
def _HandleCommonPrefix(first_handler, second_handler, common_prefix):
"""Strips common literal prefix from handlers and intersects the substrings.
Ex. "abc" and "a*c" become "a | bc" and "a | *c". We find the set of
intersections of "bc" and "*c" and prepend "a" onto each member of that set.
By common literal prefix, we mean a prefix of the two patterns that contains
no wildcard characters; any string matched by either of the patterns must
begin with that prefix.
Args:
first_handler: A SimpleHandler.
second_handler: A SimpleHandler.
common_prefix: The shared literal prefix of the patterns of the two
handlers.
Returns:
The set of intersections of first_handler and second_handler. This is done
by stripping the common prefix to create new SimpleHandlers which we call
_IntersectTwoHandlers on, and then prepend the prefix to each member of that
set.
"""
stripped_first_handler = SimpleHandler(
first_handler.pattern[len(common_prefix):], first_handler.properties)
stripped_second_handler = SimpleHandler(
second_handler.pattern[len(common_prefix):], second_handler.properties)
stripped_handlers = _IntersectTwoHandlers(stripped_first_handler,
stripped_second_handler)
handlers = set()
for stripped_handler in stripped_handlers:
handlers.add(SimpleHandler(common_prefix + stripped_handler.pattern,
stripped_handler.properties))
return handlers
def _HandleCommonSuffix(first_handler, second_handler, common_suffix):
"""Strips matching suffix from handlers and intersects the substrings."""
stripped_first_handler = SimpleHandler(
first_handler.pattern[:-len(common_suffix)], first_handler.properties)
stripped_second_handler = SimpleHandler(
second_handler.pattern[:-len(common_suffix)], second_handler.properties)
stripped_handlers = _IntersectTwoHandlers(
stripped_first_handler, stripped_second_handler)
handlers = set()
for stripped_handler in stripped_handlers:
handlers.add(SimpleHandler(stripped_handler.pattern + common_suffix,
stripped_handler.properties))
return handlers
def _SharedPrefix(pattern1, pattern2):
"""Returns the shared prefix of two patterns.
Args:
pattern1: A handler's pattern string
pattern2: A handler's pattern string
Returns:
The shared prefix of the two patterns, up to the index of the first
wildcard of either pattern. For example, the shared prefix of "a*bd" and
"ac*d" is a. The shared prefix of "*x" and "y" is the empty string. The
shared prefix of "john" and "johnny" is the empty string, and the shared
prefix of "bc*" and "c*" is also the empty string.
"""
first_star1 = (pattern1 + '*').find('*')
first_star2 = (pattern2 + '*').find('*')
if (first_star1, first_star2) != (len(pattern1), len(pattern2)):
min_star = min(first_star1, first_star2)
if min_star and pattern1[:min_star] == pattern2[:min_star]:
return pattern1[:min_star]
return ''
def _SharedSuffix(pattern1, pattern2):
"""Returns the shared suffix of two patterns."""
return _SharedPrefix(pattern1[::-1], pattern2[::-1])[::-1]