blob: 638ccaca607b23885a45536aea884dffcb1ee3a8 [file] [log] [blame]
/*
* vim:noexpandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2010, The Linux Box Corporation
* Contributor : Matt Benjamin <matt@linuxbox.com>
*
* Some portions Copyright CEA/DAM/DIF (2008)
* contributeur : Philippe DENIEL philippe.deniel@cea.fr
* Thomas LEIBOVICI thomas.leibovici@cea.fr
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* -------------
*/
/**
* @addtogroup cache_inode
* @{
*/
/**
* @file cache_inode_avl.c
* @brief AVL tree for caching directory entries
*/
#include "config.h"
#include "log.h"
#include "fsal.h"
#include "cache_inode.h"
#include "cache_inode_avl.h"
#include "murmur3.h"
#include "city.h"
#include <unistd.h>
#include <sys/types.h>
#include <sys/param.h>
#include <time.h>
#include <pthread.h>
#include <assert.h>
void
cache_inode_avl_init(cache_entry_t *entry)
{
avltree_init(&entry->object.dir.avl.t, avl_dirent_hk_cmpf,
0 /* flags */);
avltree_init(&entry->object.dir.avl.c, avl_dirent_hk_cmpf,
0 /* flags */);
}
static inline struct avltree_node *
avltree_inline_lookup(
const struct avltree_node *key,
const struct avltree *tree)
{
struct avltree_node *node = tree->root;
int res = 0;
while (node) {
res = avl_dirent_hk_cmpf(node, key);
if (res == 0)
return node;
if (res > 0)
node = node->left;
else
node = node->right;
}
return NULL;
}
void
avl_dirent_set_deleted(cache_entry_t *entry, cache_inode_dir_entry_t *v)
{
struct avltree *t = &entry->object.dir.avl.t;
struct avltree_node *node;
assert(!(v->flags & DIR_ENTRY_FLAG_DELETED));
node = avltree_inline_lookup(&v->node_hk, t);
assert(node);
avltree_remove(&v->node_hk, &entry->object.dir.avl.t);
#if EXTRA_CHECK_DELETED_WORKED
node = avltree_inline_lookup(&v->node_hk, c);
assert(!node);
#endif
v->flags |= DIR_ENTRY_FLAG_DELETED;
cache_inode_key_delete(&v->ckey);
/* save cookie in deleted avl */
avltree_insert(&v->node_hk, &entry->object.dir.avl.c);
}
void
avl_dirent_clear_deleted(cache_entry_t *entry,
cache_inode_dir_entry_t *v)
{
struct avltree *t = &entry->object.dir.avl.t;
struct avltree *c = &entry->object.dir.avl.c;
struct avltree_node *node;
node = avltree_inline_lookup(&v->node_hk, c);
assert(node);
avltree_remove(&v->node_hk, c);
memset(&v->node_hk, 0, sizeof(struct avltree_node));
node = avltree_insert(&v->node_hk, t);
assert(!node);
v->flags &= ~DIR_ENTRY_FLAG_DELETED;
}
static inline int
cache_inode_avl_insert_impl(cache_entry_t *entry,
cache_inode_dir_entry_t *v,
int j, int j2)
{
int code = -1;
struct avltree_node *node;
struct avltree *t = &entry->object.dir.avl.t;
struct avltree *c = &entry->object.dir.avl.c;
/* first check for a previously-deleted entry */
node = avltree_inline_lookup(&v->node_hk, c);
/* XXX we must not allow persist-cookies to overrun resource
* management processes (ie, more coming in CIR/LRU) */
if ((!node) && (avltree_size(c) > 65535)) {
/* ie, recycle the smallest deleted entry */
node = avltree_first(c);
}
/* We can't really re-use slots safely, since filenames can
have wildly differing lengths. */
#if 0
if (node) {
/* reuse the slot */
v_exist =
avltree_container_of(node, cache_inode_dir_entry_t,
node_hk);
FSAL_namecpy(&v_exist->name, &v->name);
v_exist->entry = v->entry;
avl_dirent_clear_deleted(entry, v_exist);
v = v_exist;
code = 1; /* tell client to dispose v */
} else {
/* try to insert active */
node = avltree_insert(&v->node_hk, t);
if (!node)
code = 0;
}
#endif /* 0 */
if (node) {
avltree_remove(node, c);
node = NULL;
}
node = avltree_insert(&v->node_hk, t);
if (!node)
code = 0;
switch (code) {
case 0:
/* success, note iterations */
v->hk.p = j + j2;
if (entry->object.dir.avl.collisions < v->hk.p)
entry->object.dir.avl.collisions = v->hk.p;
LogDebug(COMPONENT_CACHE_INODE,
"inserted new dirent on entry=%p cookie=%" PRIu64
" collisions %d", entry, v->hk.k,
entry->object.dir.avl.collisions);
break;
default:
/* already inserted, or, keep trying at current j, j2 */
LogDebug(COMPONENT_CACHE_INODE,
"Already existent when inserting new dirent on entry=%p cookie=%"
PRIu64 " this should never happen.",
entry, v->hk.k);
break;
}
return code;
}
#define MIN_COOKIE_VAL 3
/*
* Insert with quadatic, linear probing. A unique k is assured for
* any k whenever size(t) < max(uint64_t).
*
* First try quadratic probing, with coeff. 2 (since m = 2^n.)
* A unique k is not assured, since the codomain is not prime.
* If this fails, fall back to linear probing from hk.k+1.
*
* On return, the stored key is in v->hk.k, the iteration
* count in v->hk.p.
**/
int
cache_inode_avl_qp_insert(cache_entry_t *entry,
cache_inode_dir_entry_t *v)
{
#if AVL_HASH_MURMUR3
uint32_t hk[4];
#endif
int j, j2, code = -1;
/* don't permit illegal cookies */
#if AVL_HASH_MURMUR3
MurmurHash3_x64_128(v->name, strlen(v->name), 67, hk);
memcpy(&v->hk.k, hk, 8);
#else
v->hk.k = CityHash64WithSeed(v->name, strlen(v->name), 67);
#endif
#ifdef _USE_9P
/* tmp hook : it seems like client running v9fs dislike "negative"
* cookies just kill the sign bit, making
* cookies 63 bits... */
v->hk.k &= ~(1L << 63);
#endif
/* XXX would we really wait for UINT64_MAX? if not, how many
* probes should we attempt? */
for (j = 0; j < UINT64_MAX; j++) {
v->hk.k = (v->hk.k + (j * 2));
/* reject values 0, 1 and 2 */
if (v->hk.k < MIN_COOKIE_VAL)
continue;
code = cache_inode_avl_insert_impl(entry, v, j, 0);
if (code >= 0)
return code;
}
LogCrit(COMPONENT_CACHE_INODE,
"cache_inode_avl_qp_insert_s: could not insert at j=%d (%s)", j,
v->name);
#ifdef _USE_9P
/* tmp hook : it seems like client running v9fs dislike "negative"
* cookies */
v->hk.k &= ~(1L << 63);
#endif
for (j2 = 1 /* tried j=0 */; j2 < UINT64_MAX; j2++) {
v->hk.k = v->hk.k + j2;
code = cache_inode_avl_insert_impl(entry, v, j, j2);
if (code >= 0)
return code;
j2++;
}
LogCrit(COMPONENT_CACHE_INODE,
"cache_inode_avl_qp_insert_s: could not insert at j=%d (%s)", j,
v->name);
return -1;
}
cache_inode_dir_entry_t *
cache_inode_avl_lookup_k(cache_entry_t *entry, uint64_t k, uint32_t flags)
{
struct avltree *t = &entry->object.dir.avl.t;
struct avltree *c = &entry->object.dir.avl.c;
cache_inode_dir_entry_t dirent_key[1], *dirent = NULL;
struct avltree_node *node, *node2;
dirent_key->hk.k = k;
node = avltree_inline_lookup(&dirent_key->node_hk, t);
if (node) {
if (flags & CACHE_INODE_FLAG_NEXT_ACTIVE)
/* client wants the cookie -after- the last we sent, and
* the Linux 3.0 and 3.1.0-rc7 clients misbehave if we
* resend the last one */
node = avltree_next(node);
if (!node) {
LogFullDebug(COMPONENT_NFS_READDIR,
"seek to cookie=%" PRIu64
" fail (no next entry)", k);
goto out;
}
}
/* Try the deleted AVL. If a node with hk.k == v->hk.k is found,
* return its least upper bound in -t-, if any. */
if (!node) {
node2 = avltree_inline_lookup(&dirent_key->node_hk, c);
if (node2)
node = avltree_sup(&dirent_key->node_hk, t);
LogDebug(COMPONENT_NFS_READDIR,
"node %p found deleted supremum %p", node2, node);
}
if (node)
dirent =
avltree_container_of(node, cache_inode_dir_entry_t,
node_hk);
out:
return dirent;
}
cache_inode_dir_entry_t *
cache_inode_avl_qp_lookup_s(cache_entry_t *entry, const char *name, int maxj)
{
struct avltree *t = &entry->object.dir.avl.t;
struct avltree_node *node;
cache_inode_dir_entry_t *v2;
#if AVL_HASH_MURMUR3
uint32_t hashbuff[4];
#endif
int j;
size_t namelen = strlen(name);
cache_inode_dir_entry_t v;
#if AVL_HASH_MURMUR3
MurmurHash3_x64_128(name, namelen, 67, hashbuff);
/* This seems to be correct. The avltree_lookup function looks
as hk.k, but does no namecmp on its own, so there's no need to
allocate space for or copy the name in the key. */
memcpy(&v.hk.k, hashbuff, 8);
#else
v.hk.k = CityHash64WithSeed(name, namelen, 67);
#endif
#ifdef _USE_9P
/* tmp hook : it seems like client running v9fs dislike "negative"
* cookies */
v.hk.k &= ~(1L << 63);
#endif
for (j = 0; j < maxj; j++) {
v.hk.k = (v.hk.k + (j * 2));
node = avltree_lookup(&v.node_hk, t);
if (node) {
/* ensure that node is related to v */
v2 = avltree_container_of(node, cache_inode_dir_entry_t,
node_hk);
if (strcmp(name, v2->name) == 0) {
assert(!(v2->flags & DIR_ENTRY_FLAG_DELETED));
return v2;
}
}
}
LogFullDebug(COMPONENT_CACHE_INODE,
"cache_inode_avl_qp_lookup_s: entry not found at j=%d (%s)",
j, name);
return NULL;
}
/** @} */