blob: ae375c12f50ab29e531b42f006790d0d842755cd [file] [log] [blame]
/*************************************************************************
*
* PathFinder: finding a series of labeled nodes within a
* two-layer directed, cyclic graph.
* Copyright (2013) Sandia Corporation
*
* Copyright (2013) Sandia Corporation. Under the terms of Contract
* DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
* retains certain rights in this software.
*
* This file is part of PathFinder.
*
* PathFinder is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* PathFinder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PathFinder. If not, see <http://www.gnu.org/licenses/>.
*
* Questions? Contact J. Brian Rigdon (jbrigdo@sandia.gov)
*
*/
/*
* searchAlgorithms.c
*
* Created on: Jun 18, 2013
* Author: jbrigdo
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <math.h>
#ifdef USE_OMP
#include <omp.h>
#endif
#include "searchAlgorithms.h"
#include "vectorUtils.h"
#include "utils.h"
#include "graphGen.h"
#include "statistics.h"
#include "yaml.h"
extern double currentTime();
static Stats *globalStats = NULL;
void doMultiSearches(Configuration *config)
{
NodePtrVec *result;
bool success;
Graph *graph;
Signature signature;
int i;
int j;
char *debug;
double tick, tock;
int hours;
int min;
double sec;
tick = currentTime();
result = NodePtrVec_new(64); /* 64 is an arbitrary size */
for ( i = 0; config->signatures[i] != NULL; ++i )
{
signature = config->signatures[i];
/* Debug --* /
printf ("\n\nSignature (");
for ( k = 0; signature[k] != NULL; ++k )
{
debug = signature[k];
printf("%s", signature[k]);
if ( signature[k+1] != NULL )
printf(" ");
else
printf("):\n");
}
/ *-- End Debug */
// printf("Signature %d:\n", i);
for ( j = 0; config->graphs[j] != NULL; ++j )
{
graph = config->graphs[j];
result->contentSize = 0; /* clear last search's result */
// printf("\t checking file %s... ", graph->fileName);
fflush(stdout);
success = findLabelPath(graph, signature, result, config->searchOptions->searchType);
if ( success )
{}// printf("Found!\n");
else
{}// printf("Not found. Bummer.\n");
}
}
tock = currentTime();
sec = tock-tick;
hours = (int)sec/3600;
sec = fmod( sec, 3600.0 );
min = (int)sec/60;
sec = fmod(sec, 60.0);
printf ("\n\nOverall Search Time: %02d:02%d:%05.2f\n", hours, min, sec);
}
/* findNextLabel is a recursive method that determines if a path exists between a
* specific node and the label that is at the start of the label array argument. The
* label array is an array of char pointers that is terminated with a NULL pointer. The
* first pointer is the "current" label.
*
* The node's edge nodes are checked against the current label. If none of them match, then
* this method recurses with each edge node and the current label.
*
* If an edge matches, then we are beginning a search for the next "leg" of the labels.
* We recurse with that specific edge and the "next" label. That is the [1] element of
* the label array. This starts the search along the next leg - at this point we have
* to start a new "result" vector, because we check that vector for loops. It MAY be the
* case that the next path segment passes through nodes already traversed in a previous
* leg. A new "result" vector will solve that.
*
* If the next element in the labels list is NULL, the matching edge signifies the end of
* our search! VICTORY!
*
* If we reach the last edge with out a match (directly or recursively) we return a false
* list.
*/
bool findNextLabel( Node *node, Signature labels, NodePtrVec *result, Bitfield *visited ) /* NodePtrVec *visited ) */
{
EdgeList *edge;
bool success = false;
NodePtrVec *nextLegResult = NULL;
/* NodePtrVec *nextLegVisited = NULL; */
Bitfield *nextLegVisited = NULL;
/* A little basic error checking */
if ( !node || !labels || !result|| !visited )
{
return( false );
}
/* If this node is already in the vector, we have found a loop. return false. */
if ( Bitfield_nodeVisited( visited, node ) )
return( false );
/* put this node on the result vector to show that we've been here */
NodePtrVec_push( result, node );
/* Check this node's edges to see if there's a match */
/* Note: We have a NodePtrVec holding the set of nodes with this label. It
* may be more optimal to see if a given edge node is in that set
* rather than doing a bunch of inefficient strcmp calls. Another approach
* would be to have unique hash values for each label, and thereby be
* able to compare those. However for the initial version of this code,
* keeping things simple and straightforward, we're doing the string
* compare.
*/
for ( edge = node->edges; edge != NULL; edge = edge->nextEdge )
{
// string based:
if ( edge->targetNode->label && strcmp( edge->targetNode->label, labels[0] ) == 0 )
// index based: if ( edge->targetNode->labelIdx == labelIdxs[0] )
{
if ( labels[1] != NULL ) /* more steps in the signature */
{
nextLegResult = NodePtrVec_new( 50 ); /* arbitrary size, malloc success checked in recursion */
nextLegVisited = Bitfield_new(visited->bitsNeeded);
success = findNextLabel( edge->targetNode, &labels[1], nextLegResult, nextLegVisited );
/* NodePtrVec_delete( nextLegVisited ); */
Bitfield_delete( nextLegVisited );
if ( success )
{
NodePtrVec_appendVectors( result, nextLegResult, true );
NodePtrVec_delete( nextLegResult );
return( true );
}
}
else /* We have exhausted the signature - ultimate victory! */
{
/* Register this edge node as being the final node */
NodePtrVec_push( result, edge->targetNode );
return( true );
}
}
}
/* IF we made it here, we need to continue through the tree, seeing if any of our
* edge nodes have a connection to a labeled node.
*/
for ( edge = node->edges; edge != NULL; edge = edge->nextEdge )
{
success = findNextLabel( edge->targetNode, labels, result, visited );
if ( success )
return( true ); /* this edge has a path to the ultimate signature path */
}
/* and, if we make it here, we have failed. */
NodePtrVec_pop( result ); /* take current node off the result vector */
return false;
}
/* A method to custom-store our results */
static void logStats(NodePtrVec *result)
{
if ( !result ) return;
if ( !globalStats )
globalStats = Stats_new();
Stats_logPath(globalStats, result);
}
static void printStats()
{
int i;
if ( globalStats )
{
Stats_calculate(globalStats);
printf ( "\nThis graph has %f average nodes between labels.\nStandard deviation: %f, total paths: %d\n\n",
globalStats->averageLength, globalStats->standardDeviation, globalStats->pathLengths->size );
printf ("\tShortest Path: %d, Longest Path: %d\n", globalStats->minLength, globalStats->maxLength);
for ( i = globalStats->minLength; i <= globalStats->maxLength; ++i )
{
if ( globalStats->histogram[i] != 0 )
printf ("\tlength %d appeared %d times\n", i, globalStats->histogram[i] );
}
}
}
/* A method to custom-store our results */
/* At some point, we will want to use the FullPath argument and pass it along to
* buildGraphFromPaths. Until then, however, we're only going to build the most
* minimal graph possible.
*/
static void logResult(NodeVecVec *storage, NodePtrVec *result, SearchOptions *options)
{
NodePtrVec *tips = NodePtrVec_new(2);
if ( !storage || !result || !tips ) return;
/* We log the stats here, because the storage method below may not hold
* the entire path.
*/
if ( !options->multiThreaded && options->doStatistics ) /* statistics are not thread-safe yet */
logStats(result);
/* Ultimately we may want to log more than the ends... */
/* Future: if ( options->buildType == endNodesOnly ) { */
NodePtrVec_push(tips, result->vector[0]);
NodePtrVec_push(tips, result->vector[result->contentSize-1]);
if ( !NodeVecVec_insert(storage, tips) ) /* makes a copy of tips, so we need to... */
{
printf("CrashAndBURN!!!\n\n");
fflush(stdout);
}
NodePtrVec_delete(tips); /* ... free the mallocs! */
}
/* findAndRecordAllPaths is almost exactly like findNextLabel. It differs only in that it
* 1) doesn't stop with the first discovery of the specified path and 2) records the discovered
* path. To do this, it doesn't create a new result vector for
*/
void findAndRecordAllPaths( Node *node, Signature labels, int *labelIdxs,
NodePtrVec *result, Bitfield *visited, NodeVecVec *storage,
SearchOptions *options )
{
EdgeList *edge;
/* NodePtrVec *nextLegVisited = NULL; */
Bitfield *nextLegVisited = NULL;
/* A little basic error checking */
if ( !node || !labels || !labelIdxs || !result|| !visited )
{
return;
}
/* If this node is already in the vector, we have found a loop. return false. */
/*
if ( NodePtrVec_findReverse( visited, node ) )
return( false );
else
NodePtrVec_push( visited, node );
*/
if ( Bitfield_nodeVisited( visited, node ) )
return;
/* put this node on the result vector to show that we've been here */
NodePtrVec_push( result, node );
/* Check this node's edges to see if there's a match */
/* Note: We have a NodePtrVec holding the set of nodes with this label. It
* may be more optimal to see if a given edge node is in that set
* rather than doing a bunch of inefficient strcmp calls. Another approach
* would be to have unique hash values for each label, and thereby be
* able to compare those. However for the initial version of this code,
* keeping things simple and straightforward, we're doing the string
* compare.
*/
for ( edge = node->edges; edge != NULL; edge = edge->nextEdge )
{
// string based:
if ( edge->targetNode->label && strcmp( edge->targetNode->label, labels[0] ) == 0 )
// index based: if ( edge->targetNode->labelIdx == labelIdxs[0] )
{
// strcmp based:
if ( labels[1] != NULL ) /* more steps in the signature */
// index based: if ( labelIdxs[1] != -1 ) /* more steps in the signature */
{
nextLegVisited = Bitfield_new(visited->bitsNeeded);
findAndRecordAllPaths(edge->targetNode, &labels[1], &labelIdxs[1], result,
nextLegVisited, storage, options );
Bitfield_delete(nextLegVisited);
}
else /* We have exhausted the signature - ultimate victory! */
{
/* Register this edge node as being the final node */
// printf ("\tFound!\n");
NodePtrVec_push(result, edge->targetNode);
Bitfield_nodeVisited(visited, edge->targetNode);/* Mark it as visited */
logResult(storage, result, options); // <<<--- here's where I record it
NodePtrVec_pop(result);
}
}
}
/* IF we made it here, we need to continue through the tree, seeing if any of our
* edge nodes have a connection to a labeled node.
*/
for ( edge = node->edges; edge != NULL; edge = edge->nextEdge )
{
findAndRecordAllPaths(edge->targetNode, labels, labelIdxs, result, visited, storage, options);
}
NodePtrVec_pop( result ); /* take current node off the result vector */
return;
}
/* findLabelPath takes an array of labels, determines if there are nodes associated
* with the labels (each label search will return a NodePtrVec*), and determine if
* paths exist passing through the set of labels. This returns the first FOUND
* path, not the optimal one. We could do that in a future iteration of the code
* by keeping track of all paths found, and going with the shortest.
*
* The labels passed in is an array of char pointers. The array is null terminated -
* i.e. the last valid entry in the array is followed by a pointer with NULL in it.
*/
bool findLabelPath( Graph *graph, Signature labels, NodePtrVec *result, SearchType searchType )
{
NodePtrVec *startNodes = NULL; /* set of nodes with the first label - potential path start nodes */
int i;
bool found = false; /* stopping as soon as we can find a path to a final node */
/* NodePtrVec *visited = NodePtrVec_new( 50 ); /-* arbitrary size *-/ */
Bitfield *visited = Bitfield_new(graph->totalNodes);
/* some basic error checking */
if ( !graph || !labels || !labels[0] || !labels[1] || !result || !visited )
return( false );
/* Un-comment to short-circuit searches for non-represented signatures. -->* /
if ( !SystemCallMap_signatureRepresented( graph->systemCallMap, labels ) )
{
//printf( "%s %s invalid signature labels for graph.\n", labels[0], labels[1] );
return( false );
}
/ *<-- end of short-circuit region */
startNodes = SystemCallMap_findLabeledNodes( graph->systemCallMap, labels[0] );
if ( !startNodes )
return( false );
/* So, if we've made it this far, we have a valid start label. Now, we need to
* iterate through the start nodes and see if they can get to the next label...
*/
for ( i = 0; i < startNodes->contentSize && !found; ++i )
{
if ( searchType == diagramSearch )
{
SearchDiagram *element = SearchDiagram_findNode( graph->searchDiagram, startNodes->vector[i] );
if ( element )
found = SearchDiagram_findSignatureAlongEdges( element->node, element->edgeReferenceArray,
&labels[1], result, visited );
}
else
found = findNextLabel( startNodes->vector[i], &labels[1], result, visited );
Bitfield_clear( visited );
if ( !found && result->contentSize != 0 ) /* If it's not found the result SHOULD be empty, however... */
result->contentSize = 0; /* effectively clear the result NodePtrVec */
}
/* NodePtrVec_delete( visited ); */
Bitfield_delete( visited );
return( found );
}
/* findAllPossibleLegs does an exhaustive search through each possible label
* pairing to determine which legs are possible. This is a brute-force nxn
* approach that builds a signature holding only the second label and then
* searches for that signature from the nodes representing the first label.
* For the time being, it simply prints out the legs that are possible in
* this graph.
*/
int findAllPossibleLegs( Graph *graph, SearchType searchType )
{
int i, j;
int found = 0;
int searches = 0;
double tick, tock;
int hours, min;
double sec;
char timeStr[50];
tick = currentTime();
fprintf ( stdout, "Immediately before parallel\n" );
//#pragma omp parallel private(i,j) shared(graph) reduction(+:found) reduction(+:searches)
//#pragma omp single
{
//#pragma omp parallel for private(i,j) default(none)
#ifdef USE_OMP
#pragma omp parallel for private(i,j) shared(graph) reduction(+:found) reduction(+:searches)
#pragma omp collapse(2)
#endif
for ( i = 0; i < graph->systemCallMap->contentSize; ++i )
{
for ( j = 0; j < graph->systemCallMap->contentSize; ++j )
{
++searches;
char *fullSignature[3] = { NULL, NULL, NULL };
int fullIntSignature[3] = { 0, 0, -1 };
fullSignature[0] = graph->systemCallMap->vector[i]->label;
fullSignature[1] = graph->systemCallMap->vector[j]->label;
fullIntSignature[0] = i;
fullIntSignature[1] = j;
// NodePtrVec *result = NodePtrVec_new(25);
/* findLabelPath does the findNextLabel in the above loops */
//#pragma omp task shared(graph, fullSignature, found)
{
NodePtrVec *result = NodePtrVec_new( 25 );
if ( findLabelPath( graph, fullSignature, result, searchType ) )
{
// printStack(result);
// logStats(result);
++ found;
}
else
{
// printf ( "\n\tPath does not exist %s --/ /-> %s. %d steps.\n", fullSignature[0],
// fullSignature[1], result->contentSize );
}
if ( result )
{
NodePtrVec_delete( result );
}
}
}
//#pragma omp taskwait
}
}
tock = currentTime();
sec = tock-tick;
hours = (int)sec/3600;
sec = fmod( sec, 3600.0 );
min = (int)sec/60;
sec = fmod( sec, 60.0 );
printf ( "\n\n%d found out of %d searches. Overall Time: %d:%d:%2.3f\n",
found, searches, hours, min, sec );
timeStr[0] = '\0'; /* just in case sprintf doesn't do what we want. */
sprintf ( timeStr, "%02d:%02d:%02.3f", hours, min, sec );
YAMLWriteInt("Signatures Found", found);
YAMLWriteString("Search Time", timeStr);
//printStats();
return( found );
}
/*
* findAndLogAllPossibleLegs has the same search algorithm as findAllPossibleLegs. However, the requirement
* that all the legs be logged requires a more sophisticated means of parallelization in order to do a
* reduction of the solutions when all the threads are done.
*/
int findAndLogAllPossibleLegs(Graph *graph, SearchOptions *options)
{
int i, j, k;
int found = 0;
int searches = 0;
double tick, tock;
int hours, min;
double sec;
int maxThreads = 1;
NodeVecVec **lastingResults;
Graph *optimizedGraph = NULL;
char timeStr[50];
/* A little bit of error checking */
if ( !graph )
return 0;
tick = currentTime();
fprintf(stdout, "Immediately before parallel\n" );
#ifdef USE_OMP
#pragma omp parallel private(i,j,k) shared(graph,maxThreads,lastingResults) reduction(+:found) reduction(+:searches)
#endif
{
#ifdef USE_OMP
int myThread = omp_get_thread_num();
#else
int myThread = 0;
#endif
NodeVecVec *myResults = NULL;
#ifdef USE_OMP
#pragma omp single
#endif
{
#ifdef USE_OMP
maxThreads = omp_get_num_threads();
#else
maxThreads = 1;
#endif
options->multiThreaded = maxThreads > 1;
/* debug * /printf( "%d total threads, this one is %d\n", maxThreads, myThread ); / * debug */
lastingResults = malloc((maxThreads+1) * sizeof(NodeVecVec*) );
lastingResults[maxThreads] = 0; /* Null terminated to avoid having to keep track of bitsNeeded */
}
#ifdef USE_OMP
#pragma omp critical
#endif
{
lastingResults[myThread] = NodeVecVec_new(64); /* Arbitrary - multiple of 8, eventually cache line aligned... */
myResults = lastingResults[myThread];
}
#ifdef USE_OMP
#pragma omp single
#endif
{
printf ("Immediately before nested for's\n");
}
#ifdef USE_OMP
#pragma omp for
#pragma omp collapse(2)
#endif
for ( i = 0; i < graph->systemCallMap->contentSize; ++i )
{
for ( j = 0; j < graph->systemCallMap->contentSize; ++j )
{
++searches;
for (k = 0; k < graph->systemCallMap->vector[i]->nodes->contentSize; ++k)
{
char *fullSignature[3] = { NULL, NULL, NULL };
int fullIntSignature[3] = { 0, 0, -1 };
fullSignature[0] = graph->systemCallMap->vector[i]->label;
fullSignature[1] = graph->systemCallMap->vector[j]->label;
fullIntSignature[0] = i;
fullIntSignature[1] = j;
NodePtrVec *result = NodePtrVec_new(16);
Bitfield *visited = Bitfield_new(graph->totalNodes);
/* debug ---> * /
printf( "Searching for %s(%d) ~~~> %s\n", fullSignature[0],
graph->systemCallMap->vector[i]->nodes->vector[k]->id,
fullSignature[1]);
/ * <-- debug */
findAndRecordAllPaths( graph->systemCallMap->vector[i]->nodes->vector[k], &fullSignature[1],
&fullIntSignature[1], result, visited, myResults, options );
Bitfield_delete(visited);
if ( result )
NodePtrVec_delete( result );
} // end of for (k)... fork? heh.
} // end of for (j)
} // end of for (i)
found = myResults->contentSize;
}
tock = currentTime();
sec = tock-tick;
hours = (int)sec/3600;
sec = fmod( sec, 3600.0 );
min = (int)sec/60;
sec = fmod( sec, 60.0 );
printf ( "\n\n%d found for %d searches. Overall Time: %d:%d:%2.3f\n",
found, searches, hours, min, sec );
timeStr[0] = '\0'; /* just in case sprintf doesn't do what we want. */
sprintf ( timeStr, "%02d:%02d:%02.3f", hours, min, sec );
YAMLWriteInt("Signatures Found", found);
YAMLWriteString("Search Time", timeStr);
/* debug --> * /
printf ("max threads still:%d\n", maxThreads);
for ( i = 0; i < maxThreads; ++i )
{
printf ( "printing out thread %d result - %d long\n", i, lastingResults[i]->contentSize );
for ( j = 0; j < lastingResults[i]->contentSize; ++j )
{
printf("\t");
printStack( lastingResults[i]->vector[j] );
printf("\n");
}
}
/ * <-- debug */
/* At some point, we will want to use the FullPath argument and pass it along to
* buildGraphFromPaths. Until then, however, we're only going to build the most
* minimal graph possible.
*/
if ( options->writeOutputFile && options->outputFile )
{
optimizedGraph = buildGraphFromPaths(lastingResults, options->buildType);
exportGraph(optimizedGraph, options->outputFile);
}
if ( options->doStatistics && !options->multiThreaded )
printStats();
return( found );
}