blob: 136fca167b8f373c28b6b0d072f506ac2e22f1d4 [file] [log] [blame]
/*
Copyright (C) 2015 Bert Frees
This file is free software; you can redistribute it and/or modify it under
the terms of the Lesser or Library GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option) any later
version.
This file is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. See the Library GNU General Public License for more
details.
You should have received a copy of the Library GNU General Public License
along with this program; see the file COPYING. If not, write to the Free
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
*/
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include "louis.h"
#include "findTable.h"
/* =============================== LIST =================================== */
typedef struct List
{
void * head;
void (* free)(void *);
struct List * tail;
} List;
/*
* Returns a list with the element `x' added to `list'. Returns a sorted list
* if `cmp' is not NULL and if `list' is also sorted. New elements replace
* existing ones if they are equal according to `cmp'. If `cmp' is NULL,
* elements are simply prepended to the list. The function `dup' is used to
* duplicate elements before they are added to the list. If `free' function is
* used to free elements when they are removed from the list. The returned
* list must be freed by the caller.
*/
static List *
list_conj(List * list,
void * x,
int (* cmp)(void *, void *),
void * (* dup)(void *),
void (* free)(void *))
{
if (!list)
{
list = malloc(sizeof(List));
list->head = dup ? dup(x) : x;
list->free = free;
list->tail = NULL;
return list;
}
else if (!cmp)
{
List * l = malloc(sizeof(List));
l->head = dup ? dup(x) : x;
l->free = free;
l->tail = list;
return l;
}
else
{
List * l1 = list;
List * l2 = NULL;
while (l1)
{
int c = cmp(l1->head, x);
if (c > 0)
break;
else if (c < 0)
{
l2 = l1;
l1 = l2->tail;
}
else
{
if (x != l1->head && !dup && free)
free(x);
return list;
}
}
List * l3 = malloc(sizeof(List));
l3->head = dup? dup(x) : x;
l3->free = free;
l3->tail = l1;
if (!l2)
list = l3;
else
l2->tail = l3;
return list;
}
}
/*
* Free an instance of type List.
*/
static void
list_free(List * list)
{
if (list)
{
if (list->free)
list->free(list->head);
list_free(list->tail);
free(list);
}
}
/*
* Sort a list based on a comparison function.
*/
static List *
list_sort(List * list, int (* cmp)(void *, void *))
{
List * newList = NULL;
List * l;
for (l = list; l; l = l->tail)
{
newList = list_conj(newList, l->head, cmp, NULL, l->free);
l->free = NULL;
}
list_free(list);
return newList;
}
/*
* Get the size of a list.
*/
static int
list_size(List * list)
{
int len = 0;
List * l;
for (l = list; l; l = l->tail)
len++;
return len;
}
/*
* Convert a list into a NULL terminated array.
*/
static void **
list_toArray(List * list, void * (* dup)(void *))
{
void ** array;
List * l;
int i;
array = malloc((1 + list_size(list)) * sizeof(void *));
i = 0;
for (l = list; l; l = l->tail)
array[i++] = dup ? dup(l->head) : l->head;
array[i] = NULL;
return array;
}
/* ============================== FEATURE ================================= */
typedef struct
{
char * key;
char * val;
} Feature;
typedef struct
{
Feature feature;
int importance;
} FeatureWithImportance;
typedef struct
{
char * name;
List * features;
} TableMeta;
/*
* Create an instance of type Feature. The `key' and `val' strings are
* duplicated. Leaving out the `val' argument results in a value of "yes".
*/
static Feature
feature_new(const char * key, const char * val)
{
static char * YES = "yes";
Feature f;
f.key = strdup(key);
f.val = strdup(val ? val : YES);
return f;
}
/*
* Free an instance of type Feature
*/
static void
feature_free(Feature * f)
{
if (f)
{
free(f->key);
free(f->val);
free(f);
}
}
/* ======================================================================== */
/*
* Sort features based on their keys.
*/
static int
cmpKeys(Feature * f1, Feature * f2)
{
return strcmp(f1->key, f2->key);
}
/*
* Compute the match quotient of the features in a query against the features
* in a table's metadata. The features are assumed to be sorted and to have no
* duplicate keys. The query's features must be of type
* FeatureWithImportance. How a feature contributes to the match quotient
* depends on its importance, on whether the feature is undefined, defined
* with the same value (positive match), or defined with a different value
* (negative match), and on the `fuzzy' argument. If the `fuzzy' argument
* evaluates to true, negative matches and undefined features get a lower
* penalty.
*/
static int
matchFeatureLists(const List * query, const List * tableFeatures, int fuzzy)
{
static int POS_MATCH = 10;
static int NEG_MATCH = -100;
static int UNDEFINED = -20;
static int EXTRA = -1;
static int POS_MATCH_FUZZY = 10;
static int NEG_MATCH_FUZZY = -25;
static int UNDEFINED_FUZZY = -5;
static int EXTRA_FUZZY = -1;
int posMatch, negMatch, undefined, extra;
if (!fuzzy)
{
posMatch = POS_MATCH;
negMatch = NEG_MATCH;
undefined = UNDEFINED;
extra = EXTRA;
}
else
{
posMatch = POS_MATCH_FUZZY;
negMatch = NEG_MATCH_FUZZY;
undefined = UNDEFINED_FUZZY;
extra = EXTRA_FUZZY;
}
int quotient = 0;
const List * l1 = query;
const List * l2 = tableFeatures;
while (1)
{
if (!l1)
{
if (!l2)
break;
quotient += extra;
l2 = l2->tail;
}
else if (!l2)
{
quotient += undefined;
l1 = l1->tail;
}
else
{
int cmp = cmpKeys(l1->head, l2->head);
if (cmp < 0)
{
quotient += undefined;
l1 = l1->tail;
}
else if (cmp > 0)
{
quotient += extra;
l2 = l2->tail;
}
else
{
if (strcmp(((Feature *)l1->head)->val, ((Feature *)l2->head)->val) == 0)
quotient += posMatch;
else
quotient += negMatch;
l1 = l1->tail;
l2 = l2->tail;
}
}
}
return quotient;
}
/*
* Return true if a character matches [0-9A-Za-z_-]
*/
static int
isIdentChar(char c)
{
return (c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')
|| c == '-'
|| c == '_';
}
/*
* Parse a table query into a list of features. Features defined first get a
* higher importance.
*/
static List *
parseQuery(const char * query)
{
List * features = NULL;
const char * key = NULL;
const char * val = NULL;
size_t keySize = 0;
size_t valSize = 0;
const char * c;
int pos = 0;
while (1)
{
c = &query[pos++];
if (*c == ' ' || *c == '\t' || *c == '\n' | *c == '\0')
{
if (key)
{
char * k = strndup(key, keySize);
char * v = val ? strndup(val, valSize) : NULL;
FeatureWithImportance f = { feature_new(k, v), 0 };
logMessage(LOG_DEBUG, "Query has feature '%s:%s'", f.feature.key, f.feature.val);
features = list_conj(features, memcpy(malloc(sizeof(f)), &f, sizeof(f)),
NULL, NULL, (void (*)(void *))feature_free);
free(k);
free(v);
key = val = NULL;
keySize = valSize = 0;
}
if (*c == '\0')
break;
}
else if (*c == ':')
{
if (!key || val)
goto compile_error;
else
{
c = &query[pos++];
if (isIdentChar(*c))
{
val = c;
valSize = 1;
}
else
goto compile_error;
}
}
else if (isIdentChar(*c))
{
if (val)
valSize++;
else if (key)
keySize++;
else
{
key = c;
keySize = 1;
}
}
else
goto compile_error;
}
int k = 1;
List * l;
for (l = features; l; l = l->tail)
{
FeatureWithImportance * f = l->head;
f->importance = k++;
}
return list_sort(features, (int (*)(void *, void *))cmpKeys);
compile_error:
logMessage(LOG_ERROR, "Unexpected character '%c' at position %d", c, pos);
list_free(features);
return NULL;
}
/*
* Convert a widechar string to a normal string.
*/
static char *
widestrToStr(const widechar * str, size_t n)
{
char * result = malloc((1 + n) * sizeof(char));
int k;
for (k = 0; k < n; k++)
result[k] = str[k];
result[k] = '\0';
return result;
}
/*
* Extract a list of features from a table.
*/
static List *
analyzeTable(const char * table)
{
static char fileName[MAXSTRING];
char ** resolved;
List * features = NULL;
FileInfo info;
int k;
resolved = resolveTable(table, NULL);
if (resolved == NULL)
{
logMessage(LOG_ERROR, "Cannot resolve table '%s'", table);
return NULL;
}
sprintf(fileName, "%s", *resolved);
k = 0;
for (k = 0; resolved[k]; k++)
free(resolved[k]);
free(resolved);
if (k > 1)
{
logMessage(LOG_ERROR, "Table '%s' resolves to more than one file", table);
return NULL;
}
info.fileName = fileName;
info.encoding = noEncoding;
info.status = 0;
info.lineNumber = 0;
if ((info.in = fopen(info.fileName, "rb")))
{
while (getALine(&info))
{
if (info.linelen == 0);
else if (info.line[0] == '#')
{
if (info.linelen >= 2 && info.line[1] == '+')
{
widechar * key = NULL;
widechar * val = NULL;
size_t keySize = 0;
size_t valSize = 0;
info.linepos = 2;
if (info.linepos < info.linelen && isIdentChar(info.line[info.linepos]))
{
key = &info.line[info.linepos];
keySize = 1;
info.linepos++;
while (info.linepos < info.linelen && isIdentChar(info.line[info.linepos]))
{
keySize++;
info.linepos++;
}
if (info.linepos < info.linelen && info.line[info.linepos] == ':')
{
info.linepos++;
while (info.linepos < info.linelen
&& (info.line[info.linepos] == ' ' || info.line[info.linepos] == '\t'))
info.linepos++;
if (info.linepos < info.linelen && isIdentChar(info.line[info.linepos]))
{
val = &info.line[info.linepos];
valSize = 1;
info.linepos++;
while (info.linepos < info.linelen && isIdentChar(info.line[info.linepos]))
{
valSize++;
info.linepos++;
}
}
else
goto compile_error;
}
if (info.linepos == info.linelen)
{
char * k = widestrToStr(key, keySize);
char * v = val ? widestrToStr(val, valSize) : NULL;
Feature f = feature_new(k, v);
logMessage(LOG_DEBUG, "Table has feature '%s:%s'", f.key, f.val);
features = list_conj(features, memcpy(malloc(sizeof(f)), &f, sizeof(f)),
NULL, NULL, (void (*)(void *))feature_free);
free(k);
free(v);
}
else
goto compile_error;
}
else
goto compile_error;
}
}
else
break;
}
fclose(info.in);
}
else
logMessage (LOG_ERROR, "Cannot open table '%s'", info.fileName);
return list_sort(features, (int (*)(void *, void *))cmpKeys);
compile_error:
if (info.linepos < info.linelen)
logMessage(LOG_ERROR, "Unexpected character '%c' on line %d, column %d",
info.line[info.linepos],
info.lineNumber,
info.linepos);
else
logMessage(LOG_ERROR, "Unexpected newline on line %d", info.lineNumber);
list_free(features);
return NULL;
}
static List * tableIndex = NULL;
void EXPORT_CALL
lou_indexTables(const char ** tables)
{
const char ** table;
list_free(tableIndex);
tableIndex = NULL;
for (table = tables; *table; table++)
{
logMessage(LOG_DEBUG, "Analyzing table %s", *table);
List * features = analyzeTable(*table);
if (features)
{
TableMeta m = { strdup(*table), features };
tableIndex = list_conj(tableIndex, memcpy(malloc(sizeof(m)), &m, sizeof(m)), NULL, NULL, free);
}
}
if (!tableIndex)
logMessage(LOG_WARN, "No tables were indexed");
}
#ifdef _WIN32
#define DIR_SEP '\\'
#else
#define DIR_SEP '/'
#endif
/*
* Returns the list of files found on searchPath, where searchPath is a
* comma-separated list of directories.
*/
static List *
listFiles(char * searchPath)
{
List * list = NULL;
char * dirName;
DIR * dir;
struct dirent * file;
static char fileName[MAXSTRING];
struct stat info;
int pos = 0;
int n;
while (1)
{
for (n = 0; searchPath[pos + n] != '\0' && searchPath[pos + n] != ','; n++);
dirName = strndup(&searchPath[pos], n);
if ((dir = opendir(dirName)))
{
while ((file = readdir(dir)))
{
sprintf(fileName, "%s%c%s", dirName, DIR_SEP, file->d_name);
if (stat(fileName, &info) == 0 && !(info.st_mode & S_IFDIR))
{
list = list_conj(list, strdup(fileName), NULL, NULL, free);
}
}
closedir(dir);
}
else
{
logMessage(LOG_WARN, "%s is not a directory", dirName);
}
free(dirName);
pos += n;
if (searchPath[pos] == '\0')
break;
else
pos++;
}
return list;
}
char * EXPORT_CALL
lou_findTable(const char * query)
{
if (!tableIndex)
{
char * searchPath;
List * tables;
const char ** tablesArray;
logMessage(LOG_WARN, "Tables have not been indexed yet. Indexing LOUIS_TABLEPATH.");
searchPath = getTablePath();
tables = listFiles(searchPath);
tablesArray = (const char **)list_toArray(tables, NULL);
lou_indexTables(tablesArray);
free(searchPath);
list_free(tables);
free(tablesArray);
}
List * queryFeatures = parseQuery(query);
int bestQuotient = 0;
char * bestMatch = NULL;
List * l;
for (l = tableIndex; l; l = l->tail)
{
TableMeta * table = l->head;
int q = matchFeatureLists(queryFeatures, table->features, 0);
if (q > bestQuotient)
{
bestQuotient = q;
bestMatch = strdup(table->name);
}
}
if (bestMatch)
{
logMessage(LOG_INFO, "Best match: %s (%d)", bestMatch, bestQuotient);
return bestMatch;
}
else
{
logMessage(LOG_INFO, "No table could be found for query '%s'", query);
return NULL;
}
}