blob: a934bf3823a03b1f16db8b10371a386d2383dd39 [file] [log] [blame]
#!/usr/bin/env python2.5
#
# Copyright 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""List objects represent a Google safe browsing blacklist."""
import hashprefix_trie
import util
import logging
import urlparse
class List(object):
"""
This represents a google safebrowsing list.
"""
def __init__(self, name):
self._name = name
# Time this list was last successfully updated from a download request.
self._update_time = None
# Trie that maps hashprefix to AddEntries.
self._prefix_trie = hashprefix_trie.HashprefixTrie()
# Map of addchunknum to map of hashprefix to AddEntry.
# Keys are only deleted from this when we get an AddDel.
self._chunknum_map = {}
# Maps addchunknum -> prefix -> SubEntry
self._subbed = {}
# Map of subchunknum to a list of SubEntry. Sometimes different subchunks
# will sub the same expression. In that case, _subbed will reference the
# most recent subchunk, and _subchunks will store all of the subchunks.
self._subchunks = {}
def Name(self):
return self._name
def SetUpdateTime(self, timestamp):
self._update_time = timestamp
def UpdateTime(self):
return self._update_time
def AddChunkMap(self):
"""
Returns the mapping of add chunks -> prefix -> AddEntry
"""
return self._chunknum_map
def SubChunkMap(self):
"""
Returns the mapping of sub chunks -> SubEntry
"""
return self._subchunks
def NumPrefixes(self):
"""
Return the number of prefixes in this list.
"""
return self._prefix_trie.Size()
def GetPrefixMatches(self, fullhash):
"""
Returns all AddEntry objects whose hash is a prefix of the given fullhash.
"""
return self._prefix_trie.GetPrefixMatches(fullhash)
def GotAddChunk(self, chunknum):
return self._chunknum_map.has_key(chunknum)
def GotSubChunk(self, chunknum):
return self._subchunks.has_key(chunknum)
def AddFullHash(self, fullhash, addchunknum, timestamp):
"""
Add the full hash for an existing prefix.
Return True if the expression was actually added, or False if it was
previously subbed, or if no prefix for fullhash has been received.
"""
for entry in self._prefix_trie.GetPrefixMatches(fullhash):
if entry.AddChunkNum() == addchunknum:
entry.SetFullHash(fullhash, timestamp)
return True
return False
def AddPrefix(self, hash, addchunknum):
"""Try to add a prefix for the list.
Args:
hash: either a hash-prefix or a full-hash.
addchunknum: the add chunk number.
Return:
True if the expression was added, or False if it was
previously subbed.
"""
if util.IsFullHash(hash):
prefix, fullhash = hash, hash
else:
prefix, fullhash = hash, None
# Check to see whether that add entry was previously subbed.
sub_entry = self._subbed.get(addchunknum, {}).get(prefix, None)
if sub_entry:
# This expression has been subbed.
logging.debug('Presubbed: %s:%d:%s', self.Name(), addchunknum,
util.Bin2Hex(prefix))
# We have to create an empty add chunk in case it doesn't exist so that we
# record that we have received the chunk.
self._chunknum_map.setdefault(addchunknum, {})
# We no longer need this sub entry since we received its corresponding add
# entry.
del self._subbed[addchunknum][prefix]
if not self._subbed[addchunknum]:
del self._subbed[addchunknum]
self._subchunks[sub_entry.SubNum()].remove(sub_entry)
return False
chunknum_prefixes = self._chunknum_map.setdefault(addchunknum, {})
if prefix in chunknum_prefixes:
logging.warning('Prefix %s already added from add chunk %d. Ignoring',
util.Bin2Hex(prefix), addchunknum)
return False
add_entry = AddEntry(prefix, addchunknum, fullhash=fullhash)
chunknum_prefixes[prefix] = add_entry
self._prefix_trie.Insert(prefix, add_entry)
return True
def RemovePrefix(self, prefix, subchunknum, addchunknum):
"""
Return True iff there is a prefix to remove.
"""
logmsg = '%s:%d:%s' % (self.Name(), addchunknum, util.Bin2Hex(prefix))
logging.debug('attempted sub: %s', logmsg)
# Lets see if we already have the corresponding add entry.
if addchunknum in self._chunknum_map:
# We have to create an empty sub chunk in case it does not exist so that
# we record that we received the sub chunk.
self._subchunks.setdefault(subchunknum, [])
# If an add entry exists we need to remove it. If the entry does not
# exist but the add chunk is empty we don't have to do anything.
add_entry = self._chunknum_map[addchunknum].get(prefix, None)
if add_entry is not None:
logging.debug('successful sub: %s', logmsg)
self._prefix_trie.Delete(prefix, add_entry)
# Now delete entry from the chunknum map as well.
del self._chunknum_map[addchunknum][prefix]
elif self._chunknum_map[addchunknum]:
# The prefix does not exist in this add chunk and the add chunk is not
# empty. This should never happen.
logging.warning('Unable to remove missing prefix:%s sub:%d add:%s',
util.Bin2Hex(prefix), subchunknum, addchunknum)
return False
return True
# We have not yet received the corresponding add entry. Store the
# sub entry for later.
entry = SubEntry(prefix, subchunknum, addchunknum)
self._subbed.setdefault(addchunknum, {})[prefix] = entry
self._subchunks.setdefault(subchunknum, []).append(entry)
return False
def AddEmptyAddChunk(self, addchunknum):
"""
Adds the addchunknum to the list of known chunks but without any associated
data. If data currently exists for the chunk it is removed.
"""
if self.DeleteAddChunk(addchunknum):
logging.debug("Removing data that was associated with add chunk %d" %
addchunknum)
self._chunknum_map[addchunknum] = {}
def AddEmptySubChunk(self, subchunknum):
"""
Adds the subchunknum to the list of known chunks but without any associated
data. If data currently exists for the chunk it is removed.
"""
if subchunknum in self._subchunks:
self.DeleteSubChunk(subchunknum)
self._subchunks[subchunknum] = []
def DeleteAddChunk(self, addchunknum):
# No matter what, we remove sub expressions that point to this chunk as they
# will never need to be applied.
if addchunknum in self._subbed:
for sub_entry in self._subbed[addchunknum].itervalues():
# Remove the sub entry from the subchunks map.
self._subchunks[sub_entry.SubNum()].remove(sub_entry)
del self._subbed[addchunknum]
if addchunknum not in self._chunknum_map:
# Never received or already AddDel-ed this add chunk.
return False
# Remove entries from _chunknum_map
chunknum_prefixes = self._chunknum_map[addchunknum]
del self._chunknum_map[addchunknum]
if not len(chunknum_prefixes):
# Add chunk was already empty.
return True
# Remove entries from _prefix_trie
for prefix, add_entry in chunknum_prefixes.iteritems():
self._prefix_trie.Delete(prefix, add_entry)
return True
def DeleteSubChunk(self, subchunknum):
"""Deletes the sub chunk with the given sub chunk number.
Returns:
True iff the sub chunk was removed. Note: this method returns true when
an empty sub chunk gets removed.
"""
if subchunknum not in self._subchunks:
return False
entries = self._subchunks.pop(subchunknum)
for entry in entries:
del self._subbed[entry.AddNum()][entry.Prefix()]
if not self._subbed[entry.AddNum()]:
# No more subs for that add chunk.
del self._subbed[entry.AddNum()]
return True
def DownloadRequest(self, should_mac=False):
"""
Return the state of this List as a string as required for download requests.
"""
addnums = self._chunknum_map.keys()
addnums.sort()
subnums = self._subchunks.keys()
subnums.sort()
dlreq = '%s;' % (self.Name(),)
if addnums:
dlreq = '%sa:%s' % (dlreq, self._GetRangeStr(addnums))
if subnums:
if addnums:
dlreq = '%s:' % (dlreq,)
dlreq = '%ss:%s' % (dlreq, self._GetRangeStr(subnums))
if should_mac:
if addnums or subnums:
dlreq = '%s:mac' % (dlreq,)
else:
dlreq = '%smac' % (dlreq,)
return dlreq
def _GetRangeStr(self, nums):
"""
nums: sorted list of integers.
"""
if len(nums) == 0:
return ''
output = []
i = 0
while i < len(nums):
output.append(str(nums[i]))
use_range = False
while i < len(nums) - 1 and nums[i + 1] - nums[i] == 1:
i += 1
use_range = True
if use_range:
output.append('-')
output.append(str(nums[i]))
if i < len(nums) - 1:
output.append(',')
i += 1
return ''.join(output)
class AddEntry(object):
__slots__ = ('_prefix', '_addchunknum', '_fulllength', '_gethash_timestamp')
def __init__(self, prefix, addchunknum, fullhash=None):
"""
Create an add entry with the given prefix and addchunknum. Fullhash
is set to the full-length hash, if one is present for this entry.
"""
self._prefix = prefix
self._addchunknum = addchunknum
# Full length hash associated with this AddEntry, if any.
self._fulllength = fullhash
# Timestamp associated with the most recent gethash response that set
# self._fulllength, if any.
self._gethash_timestamp = None
def __str__(self):
p = self._prefix
if p is not None:
p = util.Bin2Hex(p)
f = self._fulllength
if f is not None:
f = util.Bin2Hex(f)
return 'AddEntry(%s, %s, %d)' % (p, f, self._addchunknum)
def __eq__(self, other):
return str(self) == str(other)
def __repr__(self):
return self.__str__()
def __cmp__(self, other):
if self._addchunknum == other._addchunknum:
if self._prefix == other._prefix:
return cmp(self._fulllength, other._fulllength)
return cmp(self._prefix, other._prefix)
return cmp(self._addchunknum, other._addchunknum)
def Prefix(self):
return self._prefix
def FullHash(self):
"""
Return the full length hash if we have it. Otherwise, None.
"""
return self._fulllength
def GetHashTimestamp(self):
return self._gethash_timestamp
def SetFullHash(self, fullhash, timestamp):
self._fulllength = fullhash
self._gethash_timestamp = timestamp
def AddChunkNum(self):
return self._addchunknum
class SubEntry(object):
__slots__ = ('_prefix', '_subnum', '_addnum')
def __init__(self, hash_prefix, subchunknum, addchunknum):
"""
hash_prefix: None to sub a full domain add.
"""
self._prefix = hash_prefix
self._subnum = subchunknum
self._addnum = addchunknum
def __str__(self):
return 'SubEntry(%s, sub:%d, add:%d)' % (util.Bin2Hex(self.Prefix()),
self.SubNum(), self.AddNum())
def __cmp__(self, other):
if self._prefix == other._prefix:
if self._subnum == other._subnum:
return cmp(self._addnum, other._addnum)
return cmp(self._subnum, other._subnum)
return cmp(self._prefix, other._prefix)
def Prefix(self):
return self._prefix
def SubNum(self):
return self._subnum
def AddNum(self):
return self._addnum