blob: 975d163abc8ad5ab30089db06968d9e66cc32ce4 [file] [log] [blame]
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef wasm_support_strongly_connected_components_h
#define wasm_support_strongly_connected_components_h
#include <cassert>
#include <optional>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <iostream>
namespace wasm {
// A CRTP utility implementing Tarjan's Strongly Connected Component algorithm
// (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
// in terms of iterators. Given the beginning and end iterators over the
// elements in a graph an implementation of `pushChildren` in the CRTP subclass
// that pushes an element's children, provides an iterable over the SCCs, each
// of which is an iterable over the elements in the SCC. All implemented
// iterators are input iterators that mutate the underlying state, so this
// utility can only be used for single-pass algorithms.
template<typename It, typename Class> struct SCCs {
SCCs(It inputIt, It inputEnd) : inputIt(inputIt), inputEnd(inputEnd) {}
private:
using T = typename std::iterator_traits<It>::value_type;
// The iterators over the graph we are calculating the SCCs for.
It inputIt;
It inputEnd;
// Stack of pending elements to visit, used instead of a recursive visitor.
struct WorkItem {
T item;
std::optional<T> parent = std::nullopt;
bool processedChildren = false;
};
std::vector<WorkItem> workStack;
// The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only
// popped off once they are committed to a strongly connected component.
// Elements stay on the stack after they are visited iff they have a back edge
// to an element earlier in the stack.
std::vector<T> stack;
struct ElementInfo {
// Index assigned based on element visitation order.
size_t index;
// The smallest index of the elements reachable from this element.
size_t lowlink;
// Whether this element is still on `stack`.
bool onStack;
};
std::unordered_map<T, ElementInfo> elementInfo;
// The parent to record when calling into the subclass to push children.
std::optional<T> currParent;
// The root (i.e. deepest element in the stack) for the current SCC. Empty
// whenever we have yet to find the next SCC.
std::optional<T> currRoot;
bool stepToNextGroup() {
while (inputIt != inputEnd || !workStack.empty()) {
if (workStack.empty()) {
workStack.push_back({*inputIt++});
}
while (!workStack.empty()) {
auto& work = workStack.back();
auto& item = work.item;
if (!work.processedChildren) {
auto newIndex = elementInfo.size();
auto [it, inserted] =
elementInfo.insert({item, {newIndex, newIndex, true}});
if (inserted) {
// This is a new item we have never seen before. We have already
// initialized its associated data.
stack.push_back(item);
// Leave the element on the work stack because we will have to do
// more work after we have finished processing its children.
work.processedChildren = true;
currParent = item;
static_cast<Class*>(this)->pushChildren(item);
currParent = std::nullopt;
// Process the pushed children first; we will come back to this item
// later.
continue;
}
auto& info = it->second;
if (info.onStack) {
assert(work.parent);
// Item is already in the current SCC. Update the parent's lowlink
// if this child has a smaller index than we have seen so far.
auto& parentLowlink = elementInfo[*work.parent].lowlink;
parentLowlink = std::min(parentLowlink, info.index);
} else {
// Item is in an SCC we have already processed, so ignore it
// entirely.
}
// Do not recurse for this item we have seen before. We are done with
// it.
workStack.pop_back();
continue;
}
// We have finished processing the children for the current element, so
// we know its final lowlink value. Use it to potentially update the
// parent's lowlink value.
auto& info = elementInfo[item];
if (work.parent) {
auto& parentLowlink = elementInfo[*work.parent].lowlink;
parentLowlink = std::min(parentLowlink, info.lowlink);
}
if (info.index == info.lowlink) {
// This element reaches and is reachable by all shallower elements in
// the stack (otherwise they would have already been popped) and does
// not itself reach any deeper elements, so we have found an SCC and
// the current item is its root.
currRoot = item;
workStack.pop_back();
return true;
}
workStack.pop_back();
}
}
// We are at the end.
return false;
}
void stepToNextElem() {
assert(currRoot);
if (stack.back() == *currRoot) {
// This was the last element in the current SCC. We have to find the next
// SCC now.
currRoot = std::nullopt;
}
elementInfo[stack.back()].onStack = false;
stack.pop_back();
}
void pushChildren(const T& parent) {
static_assert(&SCCs<It, Class>::pushChildren != &Class::pushChildren,
"SCCs subclass must implement `pushChildren`");
}
protected:
// Call this from `Class::pushChildren` to add a child.
void push(const T& item) {
assert(currParent);
workStack.push_back({item, currParent});
}
public:
struct SCC {
SCCs<It, Class>* parent;
// Iterate over the elements in a strongly connected component.
struct Iterator {
using value_type = T;
using difference_type = std::ptrdiff_t;
using reference = T&;
using pointer = T*;
using iterator_category = std::input_iterator_tag;
SCCs<It, Class>* parent;
std::optional<T> val = std::nullopt;
bool isEnd() const { return !parent || !parent->currRoot; }
bool operator==(const Iterator& other) const {
return isEnd() == other.isEnd();
}
bool operator!=(const Iterator& other) const { return !(*this == other); }
T operator*() { return *val; }
T* operator->() { return &*val; }
void setVal() {
if (isEnd()) {
val = std::nullopt;
} else {
val = parent->stack.back();
}
}
Iterator& operator++() {
parent->stepToNextElem();
setVal();
return *this;
}
Iterator operator++(int) {
auto it = *this;
++(*this);
return it;
}
};
Iterator begin() {
Iterator it = {parent};
it.setVal();
return it;
}
Iterator end() { return {nullptr}; }
};
// Iterate over the strongly connected components of the graph.
struct Iterator {
using value_type = SCC;
using difference_type = std::ptrdiff_t;
using reference = SCC&;
using pointer = SCC*;
using iterator_category = std::input_iterator_tag;
// scc.parent is null iff we are at the end.
SCC scc;
bool isEnd() const { return !scc.parent; }
bool operator==(const Iterator& other) const {
return isEnd() == other.isEnd();
}
bool operator!=(const Iterator& other) const { return !(*this == other); }
SCC operator*() { return scc; }
SCC* operator->() { return &scc; }
Iterator& operator++() {
// Skip the rest of the current SCC, if for some reason it was not
// consumed.
for (auto elem : *(*this)) {
(void)elem;
}
if (!scc.parent->stepToNextGroup()) {
// We are at the end, so mark ourselves as such.
scc.parent = nullptr;
}
return *this;
}
void operator++(int) { ++(*this); }
};
Iterator begin() { return ++Iterator{this}; }
Iterator end() { return {nullptr}; }
};
} // namespace wasm
#endif // wasm_support_strongly_connected_components_h