/*
 * Copyright 2018 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// DataFlow IR is an SSA representation. It can be built from the main
// Binaryen IR.
//
// THe main initial use case was an IR that could easily be converted to
// Souper IR, and the design favors that.
//

#ifndef wasm_dataflow_graph_h
#define wasm_dataflow_graph_h

#include "dataflow/node.h"
#include "ir/abstract.h"
#include "ir/iteration.h"
#include "ir/literal-utils.h"
#include "wasm.h"

namespace wasm::DataFlow {

// Main logic to generate IR for a function. This is implemented as a
// visitor on the wasm, where visitors return a Node* that either
// contains the DataFlow IR for that expression, which can be a
// Bad node if not supported, or nullptr if not relevant (we only
// use the return value for internal expressions, that is, the
// value of a local.set or the condition of an if etc).
struct Graph : public UnifiedExpressionVisitor<Graph, Node*> {
  // We only need one canonical bad node. It is never modified.
  Node bad = Node(Node::Type::Bad);

  // Connects a specific set to the data in its value.
  std::unordered_map<LocalSet*, Node*> setNodeMap;

  // Maps a control-flow expression to the conditions for it. Currently,
  // this maps an if to the conditions for its arms
  std::unordered_map<Expression*, std::vector<Node*>> expressionConditionMap;

  // Maps each expression to its control-flow parent (or null if
  // there is none). We only map expressions we need to know about,
  // which are sets, set values, and control-flow constructs.
  std::unordered_map<Expression*, Expression*> expressionParentMap;

  // The same, for nodes. Note that we currently don't know the parents
  // of nodes like phis that don't exist in wasm - we need to add more
  // stuff to handle that. But we will know the parent of anything using
  // that phi and storing to a local, which is probably enough anyhow
  // for pc generation.
  std::unordered_map<Node*, Expression*> nodeParentMap;

  // All the sets, in order of appearance.
  std::vector<LocalSet*> sets;

  // The function being processed.
  Function* func;

  // The module we are working in.
  Module* module;

  // All of our nodes
  std::vector<std::unique_ptr<Node>> nodes;

  // Tracking state during building

  // We need to track the parents of control flow nodes.
  Expression* parent = nullptr;

  // Tracks the state of locals in a control flow path:
  //   locals[i] = the node whose value it contains
  // When we are in unreachable code (i.e., a path that does not
  // need to be merged in anywhere), we set the length of this
  // vector to 0 to indicate that.
  typedef std::vector<Node*> Locals;

  // The current local state in the control flow path being emitted.
  Locals locals;

  // The local states on branches to a specific target.
  std::unordered_map<Name, std::vector<Locals>> breakStates;

  // The local state in a control flow path, including a possible
  // condition as well.
  struct FlowState {
    Locals locals;
    Node* condition;
    FlowState(Locals locals, Node* condition)
      : locals(locals), condition(condition) {}
  };

  // API

  void build(Function* funcInit, Module* moduleInit) {
    func = funcInit;
    module = moduleInit;

    auto numLocals = func->getNumLocals();
    if (numLocals == 0) {
      return; // nothing to do
    }
    // Set up initial local state IR.
    setInReachable();
    for (Index i = 0; i < numLocals; i++) {
      if (!isRelevantType(func->getLocalType(i))) {
        continue;
      }
      Node* node;
      auto type = func->getLocalType(i);
      if (func->isParam(i)) {
        node = makeVar(type);
      } else {
        node = makeZero(type);
      }
      locals[i] = node;
    }
    // Process the function body, generating the rest of the IR.
    visit(func->body);
  }

  // Makes a Var node, representing a value that could be anything.
  Node* makeVar(wasm::Type type) {
    if (isRelevantType(type)) {
      return addNode(Node::makeVar(type));
    } else {
      return &bad;
    }
  }

  // We create one node per constant value
  std::unordered_map<Literal, Node*> constantNodes;

  Node* makeConst(Literal value) {
    auto iter = constantNodes.find(value);
    if (iter != constantNodes.end()) {
      return iter->second;
    }
    // Create one for this literal.
    Builder builder(*module);
    auto* c = builder.makeConst(value);
    auto* ret = addNode(Node::makeExpr(c, c));
    constantNodes[value] = ret;
    return ret;
  }

  Node* makeZero(wasm::Type type) { return makeConst(Literal::makeZero(type)); }

  // Add a new node to our list of owned nodes.
  Node* addNode(Node* node) {
    nodes.push_back(std::unique_ptr<Node>(node));
    return node;
  }

  Node* makeZeroComp(Node* node, bool equal, Expression* origin) {
    assert(!node->isBad());
    Builder builder(*module);
    auto type = node->getWasmType();
    if (!type.isConcrete()) {
      return &bad;
    }
    auto* zero = makeZero(type);
    auto* expr = builder.makeBinary(
      Abstract::getBinary(type, equal ? Abstract::Eq : Abstract::Ne),
      makeUse(node),
      makeUse(zero));
    auto* check = addNode(Node::makeExpr(expr, origin));
    check->addValue(expandFromI1(node, origin));
    check->addValue(zero);
    return check;
  }

  void setInUnreachable() { locals.clear(); }

  void setInReachable() { locals.resize(func->getNumLocals()); }

  bool isInUnreachable() { return isInUnreachable(locals); }

  bool isInUnreachable(const Locals& state) { return state.empty(); }

  bool isInUnreachable(const FlowState& state) {
    return isInUnreachable(state.locals);
  }

  // Visiting.

  Node* visitExpression(Expression* curr) {
    // TODO Exception handling instruction support

    // Control flow and get/set etc. are special. Aside from them, we just need
    // to do something very generic.
    if (auto* block = curr->dynCast<Block>()) {
      return doVisitBlock(block);
    } else if (auto* iff = curr->dynCast<If>()) {
      return doVisitIf(iff);
    } else if (auto* loop = curr->dynCast<Loop>()) {
      return doVisitLoop(loop);
    } else if (auto* get = curr->dynCast<LocalGet>()) {
      return doVisitLocalGet(get);
    } else if (auto* set = curr->dynCast<LocalSet>()) {
      return doVisitLocalSet(set);
    } else if (auto* br = curr->dynCast<Break>()) {
      return doVisitBreak(br);
    } else if (auto* sw = curr->dynCast<Switch>()) {
      return doVisitSwitch(sw);
    } else if (auto* c = curr->dynCast<Const>()) {
      return doVisitConst(c);
    } else if (auto* unary = curr->dynCast<Unary>()) {
      return doVisitUnary(unary);
    } else if (auto* binary = curr->dynCast<Binary>()) {
      return doVisitBinary(binary);
    } else if (auto* select = curr->dynCast<Select>()) {
      return doVisitSelect(select);
    } else if (auto* unreachable = curr->dynCast<Unreachable>()) {
      return doVisitUnreachable(unreachable);
    } else if (auto* drop = curr->dynCast<Drop>()) {
      return doVisitDrop(drop);
    } else if (curr->is<Try>() || curr->is<Throw>() || curr->is<Rethrow>()) {
      Fatal() << "DataFlow does not support EH instructions yet";
    } else {
      return doVisitGeneric(curr);
    }
  }

  Node* doVisitBlock(Block* curr) {
    // TODO: handle super-deep nesting
    auto* oldParent = parent;
    expressionParentMap[curr] = oldParent;
    parent = curr;
    for (auto* child : curr->list) {
      visit(child);
    }
    // Merge the outputs
    // TODO handle conditions on these breaks
    if (curr->name.is()) {
      auto iter = breakStates.find(curr->name);
      if (iter != breakStates.end()) {
        auto& states = iter->second;
        // Add the state flowing out
        if (!isInUnreachable()) {
          states.push_back(locals);
        }
        mergeBlock(states, locals);
      }
    }
    parent = oldParent;
    return &bad;
  }
  Node* doVisitIf(If* curr) {
    auto* oldParent = parent;
    expressionParentMap[curr] = oldParent;
    parent = curr;
    // Set up the condition.
    Node* condition = visit(curr->condition);
    assert(condition);
    // Handle the contents.
    auto initialState = locals;
    visit(curr->ifTrue);
    auto afterIfTrueState = locals;
    if (curr->ifFalse) {
      locals = initialState;
      visit(curr->ifFalse);
      auto afterIfFalseState = locals; // TODO: optimize
      mergeIf(afterIfTrueState, afterIfFalseState, condition, curr, locals);
    } else {
      mergeIf(initialState, afterIfTrueState, condition, curr, locals);
    }
    parent = oldParent;
    return &bad;
  }
  Node* doVisitLoop(Loop* curr) {
    auto* oldParent = parent;
    expressionParentMap[curr] = oldParent;
    parent = curr;
    // As in Souper's LLVM extractor, we avoid loop phis, as we don't want
    // our traces to represent a value that differs across loop iterations.
    // For example,
    //   %b = block
    //   %x = phi %b, 1, %y
    //   %y = phi %b, 2, %x
    //   %z = eq %x %y
    //   infer %z
    // Here %y refers to the previous iteration's %x.
    // To do this, we set all locals to a Var at the loop entry, then process
    // the inside of the loop. When that is done, we can see if a phi was
    // actually needed for each local. If it was, we leave the Var (it
    // represents an unknown value; analysis stops there), and if not, we
    // can replace the Var with the fixed value.
    // TODO: perhaps some more general uses of DataFlow will want loop phis?
    // TODO: optimize stuff here
    if (isInUnreachable()) {
      return &bad; // none of this matters
    }
    if (!curr->name.is()) {
      visit(curr->body);
      return &bad; // no phis are possible
    }
    auto previous = locals;
    auto numLocals = func->getNumLocals();
    for (Index i = 0; i < numLocals; i++) {
      locals[i] = makeVar(func->getLocalType(i));
    }
    auto vars = locals; // all the Vars we just created
    // We may need to replace values later - only new nodes added from
    // here are relevant.
    auto firstNodeFromLoop = nodes.size();
    // Process the loop body.
    visit(curr->body);
    // Find all incoming paths.
    auto& breaks = breakStates[curr->name];
    // Phis are possible, check for them.
    for (Index i = 0; i < numLocals; i++) {
      if (!isRelevantType(func->getLocalType(i))) {
        continue;
      }
      bool needPhi = false;
      // We replaced the proper value with a Var. If it's still that
      // Var - or it's the original proper value, which can happen with
      // constants - on all incoming paths, then a phi is not needed.
      auto* var = vars[i];
      auto* proper = previous[i];
      for (auto& other : breaks) {
        assert(!isInUnreachable(other));
        auto& curr = *(other[i]);
        if (curr != *var && curr != *proper) {
          // A phi would be necessary here.
          needPhi = true;
          break;
        }
      }
      if (needPhi) {
        // Nothing to do - leave the Vars, the loop phis are
        // unknown values to us.
      } else {
        // Undo the Var for this local: In every new node added for
        // the loop body, replace references to the Var with the
        // previous value (the value that is all we need instead of a phi).
        for (auto j = firstNodeFromLoop; j < nodes.size(); j++) {
          for (auto*& value : nodes[j].get()->values) {
            if (value == var) {
              value = proper;
            }
          }
        }
        // Also undo in the current local state, which is flowing out
        // of the loop.
        for (auto*& node : locals) {
          if (node == var) {
            node = proper;
          }
        }
      }
    }
    return &bad;
  }
  Node* doVisitBreak(Break* curr) {
    if (!isInUnreachable()) {
      breakStates[curr->name].push_back(locals);
    }
    if (!curr->condition) {
      setInUnreachable();
    } else {
      visit(curr->condition);
    }
    return &bad;
  }
  Node* doVisitSwitch(Switch* curr) {
    visit(curr->condition);
    if (!isInUnreachable()) {
      std::unordered_set<Name> targets;
      for (auto target : curr->targets) {
        targets.insert(target);
      }
      targets.insert(curr->default_);
      for (auto target : targets) {
        breakStates[target].push_back(locals);
      }
    }
    setInUnreachable();
    return &bad;
  }
  Node* doVisitLocalGet(LocalGet* curr) {
    if (!isRelevantLocal(curr->index) || isInUnreachable()) {
      return &bad;
    }
    // We now know which IR node this get refers to
    auto* node = locals[curr->index];
    return node;
  }
  Node* doVisitLocalSet(LocalSet* curr) {
    if (!isRelevantLocal(curr->index) || isInUnreachable()) {
      return &bad;
    }
    assert(curr->value->type.isConcrete());
    sets.push_back(curr);
    expressionParentMap[curr] = parent;
    expressionParentMap[curr->value] = curr;
    // Set the current node in the local state.
    auto* node = visit(curr->value);
    locals[curr->index] = setNodeMap[curr] = node;
    // If we created a new node (and not just did a get of a set, which
    // passes around an existing node), mark its parent.
    if (nodeParentMap.find(node) == nodeParentMap.end()) {
      nodeParentMap[node] = curr;
    }
    return &bad;
  }
  Node* doVisitConst(Const* curr) { return makeConst(curr->value); }
  Node* doVisitUnary(Unary* curr) {
    // First, check if we support this op.
    switch (curr->op) {
      case ClzInt32:
      case ClzInt64:
      case CtzInt32:
      case CtzInt64:
      case PopcntInt32:
      case PopcntInt64: {
        // These are ok as-is.
        // Check if our child is supported.
        auto* value = expandFromI1(visit(curr->value), curr);
        if (value->isBad()) {
          return value;
        }
        // Great, we are supported!
        auto* ret = addNode(Node::makeExpr(curr, curr));
        ret->addValue(value);
        return ret;
      }
      case EqZInt32:
      case EqZInt64: {
        // These can be implemented using a binary.
        // Check if our child is supported.
        auto* value = expandFromI1(visit(curr->value), curr);
        if (value->isBad()) {
          return value;
        }
        // Great, we are supported!
        return makeZeroComp(value, true, curr);
      }
      default: {
        // Anything else is an unknown value.
        return makeVar(curr->type);
      }
    }
  }
  Node* doVisitBinary(Binary* curr) {
    // First, check if we support this op.
    switch (curr->op) {
      case AddInt32:
      case AddInt64:
      case SubInt32:
      case SubInt64:
      case MulInt32:
      case MulInt64:
      case DivSInt32:
      case DivSInt64:
      case DivUInt32:
      case DivUInt64:
      case RemSInt32:
      case RemSInt64:
      case RemUInt32:
      case RemUInt64:
      case AndInt32:
      case AndInt64:
      case OrInt32:
      case OrInt64:
      case XorInt32:
      case XorInt64:
      case ShlInt32:
      case ShlInt64:
      case ShrUInt32:
      case ShrUInt64:
      case ShrSInt32:
      case ShrSInt64:
      case RotLInt32:
      case RotLInt64:
      case RotRInt32:
      case RotRInt64:
      case EqInt32:
      case EqInt64:
      case NeInt32:
      case NeInt64:
      case LtSInt32:
      case LtSInt64:
      case LtUInt32:
      case LtUInt64:
      case LeSInt32:
      case LeSInt64:
      case LeUInt32:
      case LeUInt64: {
        // These are ok as-is.
        // Check if our children are supported.
        auto* left = expandFromI1(visit(curr->left), curr);
        if (left->isBad()) {
          return left;
        }
        auto* right = expandFromI1(visit(curr->right), curr);
        if (right->isBad()) {
          return right;
        }
        // Great, we are supported!
        auto* ret = addNode(Node::makeExpr(curr, curr));
        ret->addValue(left);
        ret->addValue(right);
        return ret;
      }
      case GtSInt32:
      case GtSInt64:
      case GeSInt32:
      case GeSInt64:
      case GtUInt32:
      case GtUInt64:
      case GeUInt32:
      case GeUInt64: {
        // These need to be flipped as Souper does not support redundant ops.
        Builder builder(*module);
        BinaryOp opposite;
        switch (curr->op) {
          case GtSInt32:
            opposite = LtSInt32;
            break;
          case GtSInt64:
            opposite = LtSInt64;
            break;
          case GeSInt32:
            opposite = LeSInt32;
            break;
          case GeSInt64:
            opposite = LeSInt64;
            break;
          case GtUInt32:
            opposite = LtUInt32;
            break;
          case GtUInt64:
            opposite = LtUInt64;
            break;
          case GeUInt32:
            opposite = LeUInt32;
            break;
          case GeUInt64:
            opposite = LeUInt64;
            break;
          default:
            WASM_UNREACHABLE("unexpected op");
        }
        auto* ret =
          visitBinary(builder.makeBinary(opposite, curr->right, curr->left));
        // We just created a new binary node, but we need to set the origin
        // properly to the original.
        ret->origin = curr;
        return ret;
      }
      default: {
        // Anything else is an unknown value.
        return makeVar(curr->type);
      }
    }
  }
  Node* doVisitSelect(Select* curr) {
    auto* ifTrue = expandFromI1(visit(curr->ifTrue), curr);
    if (ifTrue->isBad()) {
      return ifTrue;
    }
    auto* ifFalse = expandFromI1(visit(curr->ifFalse), curr);
    if (ifFalse->isBad()) {
      return ifFalse;
    }
    auto* condition = ensureI1(visit(curr->condition), curr);
    if (condition->isBad()) {
      return condition;
    }
    // Great, we are supported!
    auto* ret = addNode(Node::makeExpr(curr, curr));
    ret->addValue(condition);
    ret->addValue(ifTrue);
    ret->addValue(ifFalse);
    return ret;
  }
  Node* doVisitUnreachable(Unreachable* curr) {
    setInUnreachable();
    return &bad;
  }
  Node* doVisitDrop(Drop* curr) {
    visit(curr->value);
    // We need to know that the value's parent is a drop, indicating
    // the value is not actually used here.
    expressionParentMap[curr->value] = curr;
    return &bad;
  }
  Node* doVisitGeneric(Expression* curr) {
    // Just need to visit the nodes so we note all the gets
    for (auto* child : ChildIterator(curr)) {
      visit(child);
    }
    return makeVar(curr->type);
  }

  // Helpers.

  bool isRelevantType(wasm::Type type) { return type.isInteger(); }

  bool isRelevantLocal(Index index) {
    return isRelevantType(func->getLocalType(index));
  }

  // Merge local state for an if, also creating a block and conditions.
  void mergeIf(Locals& aState,
               Locals& bState,
               Node* condition,
               Expression* expr,
               Locals& out) {
    // Create the conditions (if we can).
    Node* ifTrue;
    Node* ifFalse;
    if (!condition->isBad()) {
      // Generate boolean (i1 returning) conditions for the two branches.
      auto& conditions = expressionConditionMap[expr];
      ifTrue = ensureI1(condition, nullptr);
      conditions.push_back(ifTrue);
      ifFalse = makeZeroComp(condition, true, nullptr);
      conditions.push_back(ifFalse);
    } else {
      ifTrue = ifFalse = &bad;
    }
    // Finally, merge the state with that block. TODO optimize
    std::vector<FlowState> states;
    if (!isInUnreachable(aState)) {
      states.emplace_back(aState, ifTrue);
    }
    if (!isInUnreachable(bState)) {
      states.emplace_back(bState, ifFalse);
    }
    merge(states, out);
  }

  // Merge local state for a block
  void mergeBlock(std::vector<Locals>& localses, Locals& out) {
    // TODO: conditions
    std::vector<FlowState> states;
    for (auto& locals : localses) {
      states.emplace_back(locals, &bad);
    }
    merge(states, out);
  }

  // Merge local state for multiple control flow paths, creating phis as needed.
  void merge(std::vector<FlowState>& states, Locals& out) {
    // We should only receive reachable states.
#ifndef NDEBUG
    for (auto& state : states) {
      assert(!isInUnreachable(state.locals));
    }
#endif
    Index numStates = states.size();
    if (numStates == 0) {
      // We were unreachable, and still are.
      assert(isInUnreachable());
      return;
    }
    // We may have just become reachable, if we were not before.
    setInReachable();
    // Just one thing to merge is trivial.
    if (numStates == 1) {
      out = states[0].locals;
      return;
    }
    // We create a block if we need one.
    Index numLocals = func->getNumLocals();
    Node* block = nullptr;
    for (Index i = 0; i < numLocals; i++) {
      if (!isRelevantType(func->getLocalType(i))) {
        continue;
      }
      // Process the inputs. If any is bad, the phi is bad.
      bool bad = false;
      for (auto& state : states) {
        auto* node = state.locals[i];
        if (node->isBad()) {
          bad = true;
          out[i] = node;
          break;
        }
      }
      if (bad) {
        continue;
      }
      // Nothing is bad, proceed.
      Node* first = nullptr;
      for (auto& state : states) {
        if (!first) {
          first = out[i] = state.locals[i];
        } else if (state.locals[i] != first) {
          // We need to actually merge some stuff.
          if (!block) {
            block = addNode(Node::makeBlock());
            for (Index index = 0; index < numStates; index++) {
              auto* condition = states[index].condition;
              if (!condition->isBad()) {
                condition = addNode(Node::makeCond(block, index, condition));
              }
              block->addValue(condition);
            }
          }
          auto* phi = addNode(Node::makePhi(block, i));
          for (auto& state : states) {
            auto* value = expandFromI1(state.locals[i], nullptr);
            phi->addValue(value);
          }
          out[i] = phi;
          break;
        }
      }
    }
  }

  // If the node returns an i1, then we are called from a context that needs
  // to use it normally as in wasm - extend it
  Node* expandFromI1(Node* node, Expression* origin) {
    if (!node->isBad() && node->returnsI1()) {
      node = addNode(Node::makeZext(node, origin));
    }
    return node;
  }

  Node* ensureI1(Node* node, Expression* origin) {
    if (!node->isBad() && !node->returnsI1()) {
      node = makeZeroComp(node, false, origin);
    }
    return node;
  }

  // Given a node representing something that is local.set'd, return
  // the set.
  LocalSet* getSet(Node* node) {
    auto iter = nodeParentMap.find(node);
    if (iter == nodeParentMap.end()) {
      return nullptr;
    }
    return iter->second->dynCast<LocalSet>();
  }

  // Given an expression, return the parent if such exists.
  Expression* getParent(Expression* curr) {
    auto iter = expressionParentMap.find(curr);
    if (iter == expressionParentMap.end()) {
      return nullptr;
    }
    return iter->second;
  }

  // Given an expression, return the set for it if such exists.
  LocalSet* getSet(Expression* curr) {
    auto* parent = getParent(curr);
    return parent ? parent->dynCast<LocalSet>() : nullptr;
  }

  // Creates an expression that uses a node. Generally, a node represents
  // a value in a local, so we create a local.get for it.
  Expression* makeUse(Node* node) {
    Builder builder(*module);
    if (node->isPhi()) {
      // The index is the wasm local that we assign to when implementing
      // the phi; get from there.
      auto index = node->index;
      return builder.makeLocalGet(index, func->getLocalType(index));
    } else if (node->isConst()) {
      return builder.makeConst(node->expr->cast<Const>()->value);
    } else if (node->isExpr()) {
      // Find the set we are a value of.
      auto index = getSet(node)->index;
      return builder.makeLocalGet(index, func->getLocalType(index));
    } else if (node->isZext()) {
      // i1 zexts are a no-op for wasm
      return makeUse(node->values[0]);
    } else if (node->isVar()) {
      // Nothing valid for us to read here. Emit a call, representing an unknown
      // variable value.
      return Builder(*module).makeCall(FAKE_CALL, {}, node->wasmType);
    } else {
      WASM_UNREACHABLE("unexpected node type"); // TODO
    }
  }

  const Name FAKE_CALL = "fake$dfo$call";
};

} // namespace wasm::DataFlow

#endif // wasm_dataflow_graph_h
