blob: 81ed3deb86fd5287ce866eccc310307f37668cc9 [file] [log] [blame]
#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Generate Datastore Stats over Dev mode appserver's datastore."""
import datetime
import logging
from google.appengine.api import datastore
from google.appengine.api import datastore_admin
from google.appengine.api import datastore_types
from google.appengine.api import users
from google.appengine.ext.db import stats
DELETE_BATCH_SIZE = 100
_GLOBAL_KEY = (stats.GlobalStat, 'total_entity_usage', '')
_PROPERTY_TYPE_TO_DSS_NAME = {
unicode: ('String', 'STRING'),
bool: ('Boolean', 'BOOLEAN'),
long: ('Integer', 'INT64'),
type(None): ('NULL', 'NULL'),
float: ('Float', 'DOUBLE'),
datastore_types.Key: ('Key', 'REFERENCE'),
datastore_types.Blob: ('Blob', 'STRING'),
datastore_types.EmbeddedEntity: ('EmbeddedEntity', 'STRING'),
datastore_types.ByteString: ('ShortBlob', 'STRING'),
datastore_types.Text: ('Text', 'STRING'),
users.User: ('User', 'USER'),
datastore_types.Category: ('Category', 'STRING'),
datastore_types.Link: ('Link', 'STRING'),
datastore_types.Email: ('Email', 'STRING'),
datetime.datetime: ('Date/Time', 'INT64'),
datastore_types.GeoPt: ('GeoPt', 'POINT'),
datastore_types.IM: ('IM', 'STRING'),
datastore_types.PhoneNumber: ('PhoneNumber', 'STRING'),
datastore_types.PostalAddress: ('PostalAddress', 'STRING'),
datastore_types.Rating: ('Rating', 'INT64'),
datastore_types.BlobKey: ('BlobKey', 'STRING'),
}
class DatastoreStatsProcessor(object):
"""Generates datastore stats for an app's an datastore entities."""
def __init__(self, _app=None):
self.app_id = datastore_types.ResolveAppId(_app)
self.whole_app_stats = {}
self.namespace_stats = {}
self.found_non_empty_namespace = False
self.old_stat_keys = []
self.timestamp = datetime.datetime.utcnow()
def __ScanAllNamespaces(self):
"""Scans all the namespaces and processes each namespace."""
namespace_query = datastore.Query('__namespace__', _app=self.app_id)
for namespace_entity in namespace_query.Run():
name = namespace_entity.key().name()
if name is None:
name = ''
self.__ProcessNamespace(name)
def __ProcessNamespace(self, namespace):
"""Process all the entities in a given namespace."""
all_query = datastore.Query(namespace=namespace, _app=self.app_id)
for entity in all_query.Run():
self.found_non_empty_namespace |= (namespace != '')
proto = entity.ToPb()
proto_size = len(proto.SerializeToString())
if entity.key().kind() in stats._DATASTORE_STATS_CLASSES_BY_KIND:
stat_kind = stats._DATASTORE_STATS_CLASSES_BY_KIND[entity.key().kind()]
self.old_stat_keys.append(entity.key())
self.__AggregateTotal(proto_size, entity.key(), proto, namespace,
stat_kind)
else:
self.__ProcessUserEntity(proto_size, entity.key(), proto, namespace)
def __GetPropertyIndexStat(self, namespace, kind_name,
entity_key_size, prop):
"""Return the size and count of indexes for a property of an EntityProto."""
property_index_size = (len(self.app_id) + len(kind_name) +
len(prop.value().SerializeToString()) +
len(namespace) + entity_key_size)
return (property_index_size, 2)
def __GetTypeIndexStat(self, namespace, kind_name, entity_key_size):
"""Return the size and count of indexes by type of an EntityProto."""
type_index_size = (len(self.app_id) + len(kind_name) + entity_key_size
+ len(namespace))
return (type_index_size, 1)
def __ProcessUserEntity(self, proto_size, key, proto, namespace):
"""Increment datastore stats for a non stats record."""
self.__AggregateTotal(proto_size, key, proto, namespace, None)
kind_name = key.kind()
entity_key_size = (len(proto.key().app()) + len(namespace) +
len(proto.key().path().SerializeToString()) +
len(proto.entity_group().SerializeToString()))
self.__AggregateCompositeIndices(proto, namespace, kind_name,
entity_key_size)
type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,
kind_name,
entity_key_size)
property_index_count = 0
property_index_size = 0
for prop_list in (proto.property_list(), proto.raw_property_list()):
for prop in prop_list:
index_size, index_count = self.__GetPropertyIndexStat(namespace,
kind_name,
entity_key_size,
prop)
property_index_size += index_size
property_index_count += index_count
builtin_index_size = type_index_size + property_index_size
builtin_index_count = type_index_count + property_index_count
self.__Increment(self.whole_app_stats, 1,
(stats.KindStat, kind_name, ''),
proto_size,
builtin_index_count=builtin_index_count,
builtin_index_size=builtin_index_size,
kind_name=kind_name)
self.__Increment(self.namespace_stats, 1,
(stats.NamespaceKindStat, kind_name, namespace),
proto_size,
builtin_index_count=builtin_index_count,
builtin_index_size=builtin_index_size,
kind_name=kind_name)
if key.parent() is None:
whole_app_model = stats.KindRootEntityStat
namespace_model = stats.NamespaceKindRootEntityStat
else:
whole_app_model = stats.KindNonRootEntityStat
namespace_model = stats.NamespaceKindNonRootEntityStat
self.__Increment(self.whole_app_stats, 1,
(whole_app_model, kind_name, ''),
proto_size,
kind_name=kind_name)
self.__Increment(self.namespace_stats, 1,
(namespace_model, kind_name, namespace),
proto_size,
kind_name=kind_name)
self.__ProcessProperties(
kind_name,
namespace,
entity_key_size,
(proto.property_list(), proto.raw_property_list()))
def __ProcessProperties(self, kind_name, namespace, entity_key_size,
prop_lists):
for prop_list in prop_lists:
for prop in prop_list:
try:
value = datastore_types.FromPropertyPb(prop)
self.__AggregateProperty(kind_name, namespace, entity_key_size,
prop, value)
except (AssertionError, AttributeError, TypeError, ValueError), e:
logging.error('Cannot process property %r, exception %s' %
(prop, e))
def __AggregateProperty(self, kind_name, namespace, entity_key_size,
prop, value):
property_name = prop.name()
property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][0]
index_property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][1]
size = len(prop.SerializeToString())
index_size, index_count = self.__GetPropertyIndexStat(namespace, kind_name,
entity_key_size, prop)
self.__Increment(self.whole_app_stats, 1,
(stats.PropertyTypeStat, property_type, ''),
size,
builtin_index_count=0,
builtin_index_size=0,
property_type=property_type)
self.__Increment(self.whole_app_stats, 0,
(stats.PropertyTypeStat, index_property_type, ''),
0,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_type=index_property_type)
self.__Increment(self.namespace_stats, 1,
(stats.NamespacePropertyTypeStat,
property_type, namespace),
size,
builtin_index_count=0,
builtin_index_size=0,
property_type=property_type)
self.__Increment(self.namespace_stats, 0,
(stats.NamespacePropertyTypeStat,
index_property_type, namespace),
0,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_type=index_property_type)
self.__Increment(self.whole_app_stats, 1,
(stats.KindPropertyTypeStat,
property_type + '_' + kind_name, ''),
size,
builtin_index_count=0,
builtin_index_size=0,
property_type=property_type, kind_name=kind_name)
self.__Increment(self.whole_app_stats, 0,
(stats.KindPropertyTypeStat,
index_property_type + '_' + kind_name, ''),
0,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_type=index_property_type, kind_name=kind_name)
self.__Increment(self.namespace_stats, 1,
(stats.NamespaceKindPropertyTypeStat,
property_type + '_' + kind_name, namespace),
size,
builtin_index_count=0,
builtin_index_size=0,
property_type=property_type, kind_name=kind_name)
self.__Increment(self.namespace_stats, 0,
(stats.NamespaceKindPropertyTypeStat,
index_property_type + '_' + kind_name, namespace),
0,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_type=index_property_type, kind_name=kind_name)
self.__Increment(self.whole_app_stats, 1,
(stats.KindPropertyNameStat,
property_name + '_' + kind_name, ''),
size,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_name=property_name, kind_name=kind_name)
self.__Increment(self.namespace_stats, 1,
(stats.NamespaceKindPropertyNameStat,
property_name + '_' + kind_name, namespace),
size,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_name=property_name, kind_name=kind_name)
self.__Increment(self.whole_app_stats, 1,
(stats.KindPropertyNamePropertyTypeStat,
property_type + '_' + property_name + '_' + kind_name,
''), size,
builtin_index_count=0,
builtin_index_size=0,
property_type=property_type,
property_name=property_name, kind_name=kind_name)
self.__Increment(self.whole_app_stats, 0,
(stats.KindPropertyNamePropertyTypeStat,
index_property_type + '_' + property_name + '_' +
kind_name,
''), 0,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_type=index_property_type,
property_name=property_name, kind_name=kind_name)
self.__Increment(self.namespace_stats, 1,
(stats.NamespaceKindPropertyNamePropertyTypeStat,
property_type + '_' + property_name + '_' + kind_name,
namespace),
size,
builtin_index_count=0,
builtin_index_size=0,
property_type=property_type,
property_name=property_name, kind_name=kind_name)
self.__Increment(self.namespace_stats, 0,
(stats.NamespaceKindPropertyNamePropertyTypeStat,
index_property_type + '_' + property_name + '_' +
kind_name,
namespace),
0,
builtin_index_count=index_count,
builtin_index_size=index_size,
property_type=index_property_type,
property_name=property_name, kind_name=kind_name)
def __GetCompositeIndexStat(self, definition, proto, namespace, kind_name,
entity_key_size):
"""Get statistics of composite index for a index definition of an entity."""
property_list = proto.property_list()
property_count = []
property_size = []
index_count = 1
for indexed_prop in definition.property_list():
name = indexed_prop.name()
count = 0
prop_size = 0
for prop in property_list:
if prop.name() == name:
count += 1
prop_size += len(prop.SerializeToString())
property_count.append(count)
property_size.append(prop_size)
index_count *= count
if index_count == 0:
return (0, 0)
index_only_size = 0
for i in range(len(property_size)):
index_only_size += property_size[i] * (index_count / property_count[i])
index_size = (index_count * (entity_key_size + len(kind_name) +
len(self.app_id) + len(namespace)) +
index_only_size * 2)
return (index_size, index_count)
def __AggregateCompositeIndices(self, proto, namespace, kind_name,
entity_key_size):
"""Aggregate statistics of composite indexes for an entity."""
composite_indices = datastore_admin.GetIndices(self.app_id)
for index in composite_indices:
definition = index.definition()
if kind_name != definition.entity_type():
continue
index_size, index_count = self.__GetCompositeIndexStat(definition, proto,
namespace,
kind_name,
entity_key_size)
if index_count == 0:
continue
name_id = namespace
if not name_id:
name_id = 1
self.__Increment(self.whole_app_stats, 0, _GLOBAL_KEY, 0,
composite_index_count=index_count,
composite_index_size=index_size)
self.__Increment(self.whole_app_stats, 0,
(stats.NamespaceStat, name_id, ''), 0,
composite_index_count=index_count,
composite_index_size=index_size,
subject_namespace=namespace)
self.__Increment(self.namespace_stats, 0,
(stats.NamespaceGlobalStat, 'total_entity_usage',
namespace), 0,
composite_index_count=index_count,
composite_index_size=index_size)
self.__Increment(self.whole_app_stats, 0,
(stats.KindStat, kind_name, ''), 0,
composite_index_count=index_count,
composite_index_size=index_size,
kind_name=kind_name)
self.__Increment(self.namespace_stats, 0,
(stats.NamespaceKindStat, kind_name, namespace), 0,
composite_index_count=index_count,
composite_index_size=index_size,
kind_name=kind_name)
index_id = index.id()
self.__Increment(self.whole_app_stats, index_count,
(stats.KindCompositeIndexStat,
kind_name + '_%s' % index_id, ''), index_size,
kind_name=kind_name, index_id=index_id)
self.__Increment(self.namespace_stats, index_count,
(stats.NamespaceKindCompositeIndexStat,
kind_name + '_%s' % index_id, namespace), index_size,
kind_name=kind_name, index_id=index_id)
def __AggregateTotal(self, size, key, proto, namespace, stat_kind):
"""Aggregate total datastore stats."""
kind_name = key.kind()
entity_key_size = (len(proto.key().app()) +
len(proto.key().path().SerializeToString()) +
len(proto.entity_group().SerializeToString()))
type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,
kind_name,
entity_key_size)
property_index_count = 0
property_index_size = 0
for prop_list in (proto.property_list(), proto.raw_property_list()):
for prop in prop_list:
index_size, index_count = self.__GetPropertyIndexStat(namespace,
kind_name,
entity_key_size,
prop)
property_index_size += index_size
property_index_count += index_count
builtin_index_size = type_index_size + property_index_size
builtin_index_count = type_index_count + property_index_count
if stat_kind == stats.GlobalStat:
count = 0
else:
count = 1
self.__Increment(self.whole_app_stats, count, _GLOBAL_KEY, size,
builtin_index_count=builtin_index_count,
builtin_index_size=builtin_index_size)
name_id = namespace
if not name_id:
name_id = 1
if (stat_kind == stats.NamespaceStat) and (namespace == ''):
count = 0
self.__Increment(self.whole_app_stats, count,
(stats.NamespaceStat, name_id, ''),
size,
builtin_index_count=builtin_index_count,
builtin_index_size=builtin_index_size,
subject_namespace=namespace)
if stat_kind == stats.NamespaceGlobalStat:
count = 0
self.__Increment(
self.namespace_stats, count,
(stats.NamespaceGlobalStat, 'total_entity_usage', namespace), size,
builtin_index_count=builtin_index_count,
builtin_index_size=builtin_index_size)
def __Increment(self, stats_dict, count, stat_key, size,
builtin_index_count=0, builtin_index_size=0,
composite_index_count=0, composite_index_size=0, **kwds):
"""Increment stats for a particular kind.
Args:
stats_dict: The dictionary where the entities are held.
The entities are keyed by stat_key. e.g. The
__Stat_Total__ entity will be found in stats_dict[_GLOBAL_KEY].
count: The amount to increment the datastore stat by.
stat_key: A tuple of (db.Model of the stat, key value, namespace).
size: The "bytes" to increment the size by.
builtin_index_count: The bytes of builtin index to add in to a stat.
builtin_index_size: The count of builtin index to add in to a stat.
composite_index_count: The bytes of composite index to add in to a stat.
composite_index_size: The count of composite index to add in to a stat.
kwds: Name value pairs that are set on the created entities.
"""
if stat_key not in stats_dict:
stat_model = stat_key[0](
key=datastore_types.Key.from_path(stat_key[0].STORED_KIND_NAME,
stat_key[1],
namespace=stat_key[2],
_app=self.app_id),
_app=self.app_id)
stats_dict[stat_key] = stat_model
for field, value in kwds.iteritems():
setattr(stat_model, field, value)
stat_model.count = count
if size:
stat_model.entity_bytes = size
if builtin_index_size:
stat_model.builtin_index_bytes = builtin_index_size
stat_model.builtin_index_count = builtin_index_count
if composite_index_size:
stat_model.composite_index_bytes = composite_index_size
stat_model.composite_index_count = composite_index_count
stat_model.bytes = size + builtin_index_size + composite_index_size
stat_model.timestamp = self.timestamp
else:
stat_model = stats_dict[stat_key]
stat_model.count += count
if size:
stat_model.entity_bytes += size
if builtin_index_size:
stat_model.builtin_index_bytes += builtin_index_size
stat_model.builtin_index_count += builtin_index_count
if composite_index_size:
stat_model.composite_index_bytes += composite_index_size
stat_model.composite_index_count += composite_index_count
stat_model.bytes += size + builtin_index_size + composite_index_size
def __Finalize(self):
"""Finishes processing, deletes all old stats and writes new ones."""
for i in range(0, len(self.old_stat_keys), DELETE_BATCH_SIZE):
datastore.Delete(self.old_stat_keys[i:i+DELETE_BATCH_SIZE])
self.written = 0
for stat in self.whole_app_stats.itervalues():
if stat.count or not (isinstance(stat, stats.GlobalStat) or
isinstance(stat, stats.NamespaceStat)):
stat.put()
self.written += 1
if self.found_non_empty_namespace:
for stat in self.namespace_stats.itervalues():
if stat.count or not isinstance(stat, stats.NamespaceGlobalStat):
stat.put()
self.written += 1
def Run(self):
"""Scans the datastore, computes new stats and writes them."""
self.__ScanAllNamespaces()
self.__Finalize()
return self
def Report(self):
"""Produce a small report about the result."""
stat = self.whole_app_stats.get(_GLOBAL_KEY, None)
entity_size = 0
entity_count = 0
builtin_index_size = 0
builtin_index_count = 0
composite_index_size = 0
composite_index_count = 0
if stat:
entity_size = stat.entity_bytes
entity_count = stat.count
builtin_index_size = stat.builtin_index_bytes
builtin_index_count = stat.builtin_index_count
composite_index_size = stat.composite_index_bytes
composite_index_count = stat.composite_index_count
if not entity_count:
entity_count = 1
return ('Scanned %d entities of total %d bytes, %d index entries of total '
'%d bytes and %d composite index entries of total %d bytes. '
'Inserted %d new records.'
% (entity_count, entity_size, builtin_index_count,
builtin_index_size, composite_index_count, composite_index_size,
self.written))