src/passes/OptimizeInstructions.cpp - external/github.com/WebAssembly/binaryen - Git at Google

 /*
  * Copyright 2016 WebAssembly Community Group participants
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 //
 // Optimize combinations of instructions
 //

 #include <algorithm>
 #include <cmath>
 #include <type_traits>

 #include <ir/abstract.h>
 #include <ir/bits.h>
 #include <ir/boolean.h>
 #include <ir/cost.h>
 #include <ir/drop.h>
 #include <ir/effects.h>
 #include <ir/eh-utils.h>
 #include <ir/find_all.h>
 #include <ir/gc-type-utils.h>
 #include <ir/iteration.h>
 #include <ir/literal-utils.h>
 #include <ir/load-utils.h>
 #include <ir/localize.h>
 #include <ir/manipulation.h>
 #include <ir/match.h>
 #include <ir/ordering.h>
 #include <ir/properties.h>
 #include <ir/type-updating.h>
 #include <ir/utils.h>
 #include <pass.h>
 #include <support/stdckdint.h>
 #include <support/threads.h>
 #include <wasm.h>

 #include "call-utils.h"

 // TODO: Use the new sign-extension opcodes where appropriate. This needs to be
 // conditionalized on the availability of atomics.

 namespace wasm {

 static Index getBitsForType(Type type) {
   if (!type.isNumber()) {
     return -1;
   }
   return type.getByteSize() * 8;
 }

 static bool isSignedOp(BinaryOp op) {
   switch (op) {
     case LtSInt32:
     case LeSInt32:
     case GtSInt32:
     case GeSInt32:
     case LtSInt64:
     case LeSInt64:
     case GtSInt64:
     case GeSInt64:
       return true;
     default:
       return false;
   }
 }

 // Useful information about locals
 struct LocalInfo {
   static const Index kUnknown = Index(-1);

   Index maxBits = -1;
   Index signExtBits = 0;
 };

 struct LocalScanner : PostWalker<LocalScanner> {
   std::vector<LocalInfo>& localInfo;
   const PassOptions& passOptions;

   LocalScanner(std::vector<LocalInfo>& localInfo,
                const PassOptions& passOptions)
     : localInfo(localInfo), passOptions(passOptions) {}

   void doWalkFunction(Function* func) {
     // prepare
     localInfo.resize(func->getNumLocals());
     for (Index i = 0; i < func->getNumLocals(); i++) {
       auto& info = localInfo[i];
       if (func->isParam(i)) {
         info.maxBits = getBitsForType(func->getLocalType(i)); // worst-case
         info.signExtBits = LocalInfo::kUnknown; // we will never know anything
       } else {
         info.maxBits = info.signExtBits = 0; // we are open to learning
       }
     }
     // walk
     PostWalker<LocalScanner>::doWalkFunction(func);
     // finalize
     for (Index i = 0; i < func->getNumLocals(); i++) {
       auto& info = localInfo[i];
       if (info.signExtBits == LocalInfo::kUnknown) {
         info.signExtBits = 0;
       }
     }
   }

   void visitLocalSet(LocalSet* curr) {
     auto* func = getFunction();
     if (func->isParam(curr->index)) {
       return;
     }
     auto type = getFunction()->getLocalType(curr->index);
     if (type != Type::i32 && type != Type::i64) {
       return;
     }
     // an integer var, worth processing
     auto* value =
       Properties::getFallthrough(curr->value, passOptions, *getModule());
     auto& info = localInfo[curr->index];
     info.maxBits = std::max(info.maxBits, Bits::getMaxBits(value, this));
     auto signExtBits = LocalInfo::kUnknown;
     if (Properties::getSignExtValue(value)) {
       signExtBits = Properties::getSignExtBits(value);
     } else if (auto* load = value->dynCast<Load>()) {
       if (LoadUtils::isSignRelevant(load) && load->signed_) {
         signExtBits = load->bytes * 8;
       }
     }
     if (info.signExtBits == 0) {
       info.signExtBits = signExtBits; // first info we see
     } else if (info.signExtBits != signExtBits) {
       // contradictory information, give up
       info.signExtBits = LocalInfo::kUnknown;
     }
   }

   // define this for the templated getMaxBits method. we know nothing here yet
   // about locals, so return the maxes
   Index getMaxBitsForLocal(LocalGet* get) { return getBitsForType(get->type); }
 };

 namespace {
 // perform some final optimizations
 struct FinalOptimizer : public PostWalker<FinalOptimizer> {
   const PassOptions& passOptions;

   FinalOptimizer(const PassOptions& passOptions) : passOptions(passOptions) {}

   void visitBinary(Binary* curr) {
     if (auto* replacement = optimize(curr)) {
       replaceCurrent(replacement);
     }
   }

   Binary* optimize(Binary* curr) {
     using namespace Abstract;
     using namespace Match;
     {
       Const* c;
       if (matches(curr, binary(Add, any(), ival(&c)))) {
         // normalize x + (-C)  ==>   x - C
         if (c->value.isNegative()) {
           c->value = c->value.neg();
           curr->op = Abstract::getBinary(c->type, Sub);
         }
         // Wasm binary encoding uses signed LEBs, which slightly favor negative
         // numbers: -64 is more efficient than +64 etc., as well as other powers
         // of two 7 bits etc. higher. we therefore prefer x - -64 over x + 64.
         // in theory we could just prefer negative numbers over positive, but
         // that can have bad effects on gzip compression (as it would mean more
         // subtractions than the more common additions).
         int64_t value = c->value.getInteger();
         if (value == 0x40LL || value == 0x2000LL || value == 0x100000LL ||
             value == 0x8000000LL || value == 0x400000000LL ||
             value == 0x20000000000LL || value == 0x1000000000000LL ||
             value == 0x80000000000000LL || value == 0x4000000000000000LL) {
           c->value = c->value.neg();
           if (curr->op == Abstract::getBinary(c->type, Add)) {
             curr->op = Abstract::getBinary(c->type, Sub);
           } else {
             curr->op = Abstract::getBinary(c->type, Add);
           }
         }
         return curr;
       }
     }
     return nullptr;
   }
 };

 } // anonymous namespace

 // Create a custom matcher for checking side effects
 template<class Opt> struct PureMatcherKind {};
 template<class Opt>
 struct Match::Internal::KindTypeRegistry<PureMatcherKind<Opt>> {
   using matched_t = Expression*;
   using data_t = Opt*;
 };
 template<class Opt> struct Match::Internal::MatchSelf<PureMatcherKind<Opt>> {
   bool operator()(Expression* curr, Opt* opt) {
     return !opt->effects(curr).hasSideEffects();
   }
 };

 // Main pass class
 struct OptimizeInstructions
   : public WalkerPass<PostWalker<OptimizeInstructions>> {
   bool isFunctionParallel() override { return true; }

   std::unique_ptr<Pass> create() override {
     return std::make_unique<OptimizeInstructions>();
   }

   bool fastMath;

   // In rare cases we make a change to a type, and will do a refinalize.
   bool refinalize = false;

   void doWalkFunction(Function* func) {
     fastMath = getPassOptions().fastMath;

     // First, scan locals.
     {
       LocalScanner scanner(localInfo, getPassOptions());
       scanner.setModule(getModule());
       scanner.walkFunction(func);
     }

     // Main walk.
     Super::doWalkFunction(func);

     if (refinalize) {
       ReFinalize().walkFunctionInModule(func, getModule());
     }

     // Final optimizations.
     {
       FinalOptimizer optimizer(getPassOptions());
       optimizer.walkFunction(func);
     }

     // Some patterns create blocks that can interfere 'catch' and 'pop', nesting
     // the 'pop' into a block making it invalid.
     EHUtils::handleBlockNestedPops(func, *getModule());
   }

   // Set to true when one of the visitors makes a change (either replacing the
   // node or modifying it).
   bool changed;

   // Used to avoid recursion in replaceCurrent, see below.
   bool inReplaceCurrent = false;

   void replaceCurrent(Expression* rep) {
     if (rep->type != getCurrent()->type) {
       // This operation will change the type, so refinalize.
       refinalize = true;
     }

     WalkerPass<PostWalker<OptimizeInstructions>>::replaceCurrent(rep);
     // We may be able to apply multiple patterns as one may open opportunities
     // for others. NB: patterns must not have cycles

     // To avoid recursion, this uses the following pattern: the initial call to
     // this method comes from one of the visit*() methods. We then loop in here,
     // and if we are called again we set |changed| instead of recursing, so that
     // we can loop on that value.
     if (inReplaceCurrent) {
       // We are in the loop below so just note a change and return to there.
       changed = true;
       return;
     }
     // Loop on further changes.
     inReplaceCurrent = true;
     do {
       changed = false;
       visit(getCurrent());
     } while (changed);
     inReplaceCurrent = false;
   }

   EffectAnalyzer effects(Expression* expr) {
     return EffectAnalyzer(getPassOptions(), *getModule(), expr);
   }

   decltype(auto) pure(Expression** binder) {
     using namespace Match::Internal;
     return Matcher<PureMatcherKind<OptimizeInstructions>>(binder, this);
   }

   bool canReorder(Expression* a, Expression* b) {
     return EffectAnalyzer::canReorder(getPassOptions(), *getModule(), a, b);
   }

   void visitBinary(Binary* curr) {
     // If this contains dead code, don't bother trying to optimize it, the type
     // might change (if might not be unreachable if just one arm is, for
     // example). This optimization pass focuses on actually executing code.
     if (curr->type == Type::unreachable) {
       return;
     }

     if (shouldCanonicalize(curr)) {
       canonicalize(curr);
     }

     {
       // TODO: It is an ongoing project to port more transformations to the
       // match API. Once most of the transformations have been ported, the
       // `using namespace Match` can be hoisted to function scope and this extra
       // block scope can be removed.
       using namespace Match;
       using namespace Abstract;
       Builder builder(*getModule());
       {
         // try to get rid of (0 - ..), that is, a zero only used to negate an
         // int. an add of a subtract can be flipped in order to remove it:
         //   (ival.add
         //     (ival.sub
         //       (ival.const 0)
         //       X
         //     )
         //     Y
         //   )
         // =>
         //   (ival.sub
         //     Y
         //     X
         //   )
         // Note that this reorders X and Y, so we need to be careful about that.
         Expression *x, *y;
         Binary* sub;
         if (matches(
               curr,
               binary(Add, binary(&sub, Sub, ival(0), any(&x)), any(&y))) &&
             canReorder(x, y)) {
           sub->left = y;
           sub->right = x;
           return replaceCurrent(sub);
         }
       }
       {
         // The flip case is even easier, as no reordering occurs:
         //   (ival.add
         //     Y
         //     (ival.sub
         //       (ival.const 0)
         //       X
         //     )
         //   )
         // =>
         //   (ival.sub
         //     Y
         //     X
         //   )
         Expression* y;
         Binary* sub;
         if (matches(curr,
                     binary(Add, any(&y), binary(&sub, Sub, ival(0), any())))) {
           sub->left = y;
           return replaceCurrent(sub);
         }
       }
       {
         // try de-morgan's AND law,
         //  (eqz X) and (eqz Y) === eqz (X or Y)
         // Note that the OR and XOR laws do not work here, as these
         // are not booleans (we could check if they are, but a boolean
         // would already optimize with the eqz anyhow, unless propagating).
         // But for AND, the left is true iff X and Y are each all zero bits,
         // and the right is true if the union of their bits is zero; same.
         Unary* un;
         Binary* bin;
         Expression *x, *y;
         if (matches(curr,
                     binary(&bin,
                            AndInt32,
                            unary(&un, EqZInt32, any(&x)),
                            unary(EqZInt32, any(&y))))) {
           bin->op = OrInt32;
           bin->left = x;
           bin->right = y;
           un->value = bin;
           return replaceCurrent(un);
         }
       }
       {
         // x <<>> (C & (31 | 63))   ==>   x <<>> C'
         // x <<>> (y & (31 | 63))   ==>   x <<>> y
         // x <<>> (y & (32 | 64))   ==>   x
         // where '<<>>':
         //   '<<', '>>', '>>>'. 'rotl' or 'rotr'
         BinaryOp op;
         Const* c;
         Expression *x, *y;

         // x <<>> C
         if (matches(curr, binary(&op, any(&x), ival(&c))) &&
             Abstract::hasAnyShift(op)) {
           // truncate RHS constant to effective size as:
           // i32(x) <<>> const(C & 31))
           // i64(x) <<>> const(C & 63))
           c->value = c->value.and_(
             Literal::makeFromInt32(c->type.getByteSize() * 8 - 1, c->type));
           // x <<>> 0   ==>   x
           if (c->value.isZero()) {
             return replaceCurrent(x);
           }
         }
         if (matches(curr,
                     binary(&op, any(&x), binary(And, any(&y), ival(&c)))) &&
             Abstract::hasAnyShift(op)) {
           // i32(x) <<>> (y & 31)   ==>   x <<>> y
           // i64(x) <<>> (y & 63)   ==>   x <<>> y
           if ((c->type == Type::i32 && (c->value.geti32() & 31) == 31) ||
               (c->type == Type::i64 && (c->value.geti64() & 63LL) == 63LL)) {
             curr->cast<Binary>()->right = y;
             return replaceCurrent(curr);
           }
           // i32(x) <<>> (y & C)   ==>   x,  where (C & 31) == 0
           // i64(x) <<>> (y & C)   ==>   x,  where (C & 63) == 0
           if (((c->type == Type::i32 && (c->value.geti32() & 31) == 0) ||
                (c->type == Type::i64 && (c->value.geti64() & 63LL) == 0LL)) &&
               !effects(y).hasSideEffects()) {
             return replaceCurrent(x);
           }
         }
       }
       {
         // -x + y   ==>   y - x
         //   where  x, y  are floating points
         Expression *x, *y;
         if (matches(curr, binary(Add, unary(Neg, any(&x)), any(&y))) &&
             canReorder(x, y)) {
           curr->op = Abstract::getBinary(curr->type, Sub);
           curr->left = x;
           std::swap(curr->left, curr->right);
           return replaceCurrent(curr);
         }
       }
       {
         // x + (-y)   ==>   x - y
         // x - (-y)   ==>   x + y
         //   where  x, y  are floating points
         Expression* y;
         if (matches(curr, binary(Add, any(), unary(Neg, any(&y)))) ||
             matches(curr, binary(Sub, any(), unary(Neg, any(&y))))) {
           curr->op = Abstract::getBinary(
             curr->type,
             curr->op == Abstract::getBinary(curr->type, Add) ? Sub : Add);
           curr->right = y;
           return replaceCurrent(curr);
         }
       }
       {
         // -x * -y   ==>   x * y
         //   where  x, y  are integers
         Binary* bin;
         Expression *x, *y;
         if (matches(curr,
                     binary(&bin,
                            Mul,
                            binary(Sub, ival(0), any(&x)),
                            binary(Sub, ival(0), any(&y))))) {
           bin->left = x;
           bin->right = y;
           return replaceCurrent(curr);
         }
       }
       {
         // -x * y   ==>   -(x * y)
         // x * -y   ==>   -(x * y)
         //   where  x, y  are integers
         Expression *x, *y;
         if ((matches(curr,
                      binary(Mul, binary(Sub, ival(0), any(&x)), any(&y))) ||
              matches(curr,
                      binary(Mul, any(&x), binary(Sub, ival(0), any(&y))))) &&
             !x->is<Const>() && !y->is<Const>()) {
           Builder builder(*getModule());
           return replaceCurrent(
             builder.makeBinary(Abstract::getBinary(curr->type, Sub),
                                builder.makeConst(Literal::makeZero(curr->type)),
                                builder.makeBinary(curr->op, x, y)));
         }
       }
       {
         if (getModule()->features.hasSignExt()) {
           Const *c1, *c2;
           Expression* x;
           // i64(x) << 56 >> 56   ==>   i64.extend8_s(x)
           // i64(x) << 48 >> 48   ==>   i64.extend16_s(x)
           // i64(x) << 32 >> 32   ==>   i64.extend32_s(x)
           if (matches(curr,
                       binary(ShrSInt64,
                              binary(ShlInt64, any(&x), i64(&c1)),
                              i64(&c2))) &&
               Bits::getEffectiveShifts(c1) == Bits::getEffectiveShifts(c2)) {
             switch (64 - Bits::getEffectiveShifts(c1)) {
               case 8:
                 return replaceCurrent(builder.makeUnary(ExtendS8Int64, x));
               case 16:
                 return replaceCurrent(builder.makeUnary(ExtendS16Int64, x));
               case 32:
                 return replaceCurrent(builder.makeUnary(ExtendS32Int64, x));
               default:
                 break;
             }
           }
           // i32(x) << 24 >> 24   ==>   i32.extend8_s(x)
           // i32(x) << 16 >> 16   ==>   i32.extend16_s(x)
           if (matches(curr,
                       binary(ShrSInt32,
                              binary(ShlInt32, any(&x), i32(&c1)),
                              i32(&c2))) &&
               Bits::getEffectiveShifts(c1) == Bits::getEffectiveShifts(c2)) {
             switch (32 - Bits::getEffectiveShifts(c1)) {
               case 8:
                 return replaceCurrent(builder.makeUnary(ExtendS8Int32, x));
               case 16:
                 return replaceCurrent(builder.makeUnary(ExtendS16Int32, x));
               default:
                 break;
             }
           }
         }
       }
       {
         // unsigned(x) >= 0   =>   i32(1)
         // TODO: Use getDroppedChildrenAndAppend() here, so we can optimize even
         //       if pure.
         Const* c;
         Expression* x;
         if (matches(curr, binary(GeU, pure(&x), ival(&c))) &&
             c->value.isZero()) {
           c->value = Literal::makeOne(Type::i32);
           c->type = Type::i32;
           return replaceCurrent(c);
         }
         // unsigned(x) < 0   =>   i32(0)
         if (matches(curr, binary(LtU, pure(&x), ival(&c))) &&
             c->value.isZero()) {
           c->value = Literal::makeZero(Type::i32);
           c->type = Type::i32;
           return replaceCurrent(c);
         }
       }
     }
     if (auto* ext = Properties::getAlmostSignExt(curr)) {
       Index extraLeftShifts;
       auto bits = Properties::getAlmostSignExtBits(curr, extraLeftShifts);
       if (extraLeftShifts == 0) {
         if (auto* load =
               Properties::getFallthrough(ext, getPassOptions(), *getModule())
                 ->dynCast<Load>()) {
           // pattern match a load of 8 bits and a sign extend using a shl of
           // 24 then shr_s of 24 as well, etc.
           if (LoadUtils::canBeSigned(load) &&
               ((load->bytes == 1 && bits == 8) ||
                (load->bytes == 2 && bits == 16))) {
             // if the value falls through, we can't alter the load, as it
             // might be captured in a tee
             if (load->signed_ == true || load == ext) {
               load->signed_ = true;
               return replaceCurrent(ext);
             }
           }
         }
       }
       // We can in some cases remove part of a sign extend, that is,
       //   (x << A) >> B   =>   x << (A - B)
       // If the sign-extend input cannot have a sign bit, we don't need it.
       if (Bits::getMaxBits(ext, this) + extraLeftShifts < bits) {
         return replaceCurrent(removeAlmostSignExt(curr));
       }
       // We also don't need it if it already has an identical-sized sign
       // extend applied to it. That is, if it is already a sign-extended
       // value, then another sign extend will do nothing. We do need to be
       // careful of the extra shifts, though.
       if (isSignExted(ext, bits) && extraLeftShifts == 0) {
         return replaceCurrent(removeAlmostSignExt(curr));
       }
     } else if (curr->op == EqInt32 || curr->op == NeInt32) {
       if (auto* c = curr->right->dynCast<Const>()) {
         if (auto* ext = Properties::getSignExtValue(curr->left)) {
           // We are comparing a sign extend to a constant, which means we can
           // use a cheaper zero-extend in some cases. That is,
           //  (x << S) >> S ==/!= C    =>    x & T ==/!= C
           // where S and T are the matching values for sign/zero extend of the
           // same size. For example, for an effective 8-bit value:
           //  (x << 24) >> 24 ==/!= C    =>    x & 255 ==/!= C
           //
           // The key thing to track here are the upper bits plus the sign bit;
           // call those the "relevant bits". This is crucial because x is
           // sign-extended, that is, its effective sign bit is spread to all
           // the upper bits, which means that the relevant bits on the left
           // side are either all 0, or all 1.
           auto bits = Properties::getSignExtBits(curr->left);
           uint32_t right = c->value.geti32();
           uint32_t numRelevantBits = 32 - bits + 1;
           uint32_t setRelevantBits =
             Bits::popCount(right >> uint32_t(bits - 1));
           // If all the relevant bits on C are zero
           // then we can mask off the high bits instead of sign-extending x.
           // This is valid because if x is negative, then the comparison was
           // false before (negative vs positive), and will still be false
           // as the sign bit will remain to cause a difference. And if x is
           // positive then the upper bits would be zero anyhow.
           if (setRelevantBits == 0) {
             curr->left = makeZeroExt(ext, bits);
             return replaceCurrent(curr);
           } else if (setRelevantBits == numRelevantBits) {
             // If all those bits are one, then we can do something similar if
             // we also zero-extend on the right as well. This is valid
             // because, as in the previous case, the sign bit differentiates
             // the two sides when they are different, and if the sign bit is
             // identical, then the upper bits don't matter, so masking them
             // off both sides is fine.
             curr->left = makeZeroExt(ext, bits);
             c->value = c->value.and_(Literal(Bits::lowBitMask(bits)));
             return replaceCurrent(curr);
           } else {
             // Otherwise, C's relevant bits are mixed, and then the two sides
             // can never be equal, as the left side's bits cannot be mixed.
             Builder builder(*getModule());
             // The result is either always true, or always false.
             c->value = Literal::makeFromInt32(curr->op == NeInt32, c->type);
             return replaceCurrent(
               builder.makeSequence(builder.makeDrop(ext), c));
           }
         }
       } else if (auto* left = Properties::getSignExtValue(curr->left)) {
         if (auto* right = Properties::getSignExtValue(curr->right)) {
           auto bits = Properties::getSignExtBits(curr->left);
           if (Properties::getSignExtBits(curr->right) == bits) {
             // we are comparing two sign-exts with the same bits, so we may as
             // well replace both with cheaper zexts
             curr->left = makeZeroExt(left, bits);
             curr->right = makeZeroExt(right, bits);
             return replaceCurrent(curr);
           }
         } else if (auto* load = curr->right->dynCast<Load>()) {
           // we are comparing a load to a sign-ext, we may be able to switch
           // to zext
           auto leftBits = Properties::getSignExtBits(curr->left);
           if (load->signed_ && leftBits == load->bytes * 8) {
             load->signed_ = false;
             curr->left = makeZeroExt(left, leftBits);
             return replaceCurrent(curr);
           }
         }
       } else if (auto* load = curr->left->dynCast<Load>()) {
         if (auto* right = Properties::getSignExtValue(curr->right)) {
           // we are comparing a load to a sign-ext, we may be able to switch
           // to zext
           auto rightBits = Properties::getSignExtBits(curr->right);
           if (load->signed_ && rightBits == load->bytes * 8) {
             load->signed_ = false;
             curr->right = makeZeroExt(right, rightBits);
             return replaceCurrent(curr);
           }
         }
       }
       // note that both left and right may be consts, but then we let
       // precompute compute the constant result
     } else if (curr->op == AddInt32 || curr->op == AddInt64 ||
                curr->op == SubInt32 || curr->op == SubInt64) {
       if (auto* ret = optimizeAddedConstants(curr)) {
         return replaceCurrent(ret);
       }
     } else if (curr->op == MulFloat32 || curr->op == MulFloat64 ||
                curr->op == DivFloat32 || curr->op == DivFloat64) {
       if (curr->left->type == curr->right->type) {
         if (auto* leftUnary = curr->left->dynCast<Unary>()) {
           if (leftUnary->op == Abstract::getUnary(curr->type, Abstract::Abs)) {
             if (auto* rightUnary = curr->right->dynCast<Unary>()) {
               if (leftUnary->op == rightUnary->op) { // both are abs ops
                 // abs(x) * abs(y)   ==>   abs(x * y)
                 // abs(x) / abs(y)   ==>   abs(x / y)
                 curr->left = leftUnary->value;
                 curr->right = rightUnary->value;
                 leftUnary->value = curr;
                 return replaceCurrent(leftUnary);
               }
             }
           }
         }
       }
     }
     // a bunch of operations on a constant right side can be simplified
     if (auto* right = curr->right->dynCast<Const>()) {
       if (curr->op == AndInt32) {
         auto mask = right->value.geti32();
         // and with -1 does nothing (common in asm.js output)
         if (mask == -1) {
           return replaceCurrent(curr->left);
         }
         // small loads do not need to be masked, the load itself masks
         if (auto* load = curr->left->dynCast<Load>()) {
           if ((load->bytes == 1 && mask == 0xff) ||
               (load->bytes == 2 && mask == 0xffff)) {
             load->signed_ = false;
             return replaceCurrent(curr->left);
           }
         } else if (auto maskedBits = Bits::getMaskedBits(mask)) {
           if (Bits::getMaxBits(curr->left, this) <= maskedBits) {
             // a mask of lower bits is not needed if we are already smaller
             return replaceCurrent(curr->left);
           }
         }
       }
       // some math operations have trivial results
       if (auto* ret = optimizeWithConstantOnRight(curr)) {
         return replaceCurrent(ret);
       }
       if (auto* ret = optimizeDoubletonWithConstantOnRight(curr)) {
         return replaceCurrent(ret);
       }
       if (right->type == Type::i32) {
         BinaryOp op;
         int32_t c = right->value.geti32();
         // First, try to lower signed operations to unsigned if that is
         // possible. Some unsigned operations like div_u or rem_u are usually
         // faster on VMs. Also this opens more possibilities for further
         // simplifications afterwards.
         if (c >= 0 && (op = makeUnsignedBinaryOp(curr->op)) != InvalidBinary &&
             Bits::getMaxBits(curr->left, this) <= 31) {
           curr->op = op;
         }
         if (c < 0 && c > std::numeric_limits<int32_t>::min() &&
             curr->op == DivUInt32) {
           // u32(x) / C   ==>   u32(x) >= C  iff C > 2^31
           // We avoid applying this for C == 2^31 due to conflict
           // with other rule which transform to more prefereble
           // right shift operation.
           curr->op = c == -1 ? EqInt32 : GeUInt32;
           return replaceCurrent(curr);
         }
         if (Bits::isPowerOf2((uint32_t)c)) {
           switch (curr->op) {
             case MulInt32:
               return replaceCurrent(optimizePowerOf2Mul(curr, (uint32_t)c));
             case RemUInt32:
               return replaceCurrent(optimizePowerOf2URem(curr, (uint32_t)c));
             case DivUInt32:
               return replaceCurrent(optimizePowerOf2UDiv(curr, (uint32_t)c));
             default:
               break;
           }
         }
       }
       if (right->type == Type::i64) {
         BinaryOp op;
         int64_t c = right->value.geti64();
         // See description above for Type::i32
         if (c >= 0 && (op = makeUnsignedBinaryOp(curr->op)) != InvalidBinary &&
             Bits::getMaxBits(curr->left, this) <= 63) {
           curr->op = op;
         }
         if (getPassOptions().shrinkLevel == 0 && c < 0 &&
             c > std::numeric_limits<int64_t>::min() && curr->op == DivUInt64) {
           // u64(x) / C   ==>   u64(u64(x) >= C)  iff C > 2^63
           // We avoid applying this for C == 2^31 due to conflict
           // with other rule which transform to more prefereble
           // right shift operation.
           // And apply this only for shrinkLevel == 0 due to it
           // increasing size by one byte.
           curr->op = c == -1LL ? EqInt64 : GeUInt64;
           curr->type = Type::i32;
           return replaceCurrent(
             Builder(*getModule()).makeUnary(ExtendUInt32, curr));
         }
         if (Bits::isPowerOf2((uint64_t)c)) {
           switch (curr->op) {
             case MulInt64:
               return replaceCurrent(optimizePowerOf2Mul(curr, (uint64_t)c));
             case RemUInt64:
               return replaceCurrent(optimizePowerOf2URem(curr, (uint64_t)c));
             case DivUInt64:
               return replaceCurrent(optimizePowerOf2UDiv(curr, (uint64_t)c));
             default:
               break;
           }
         }
       }
       if (curr->op == DivFloat32) {
         float c = right->value.getf32();
         if (Bits::isPowerOf2InvertibleFloat(c)) {
           return replaceCurrent(optimizePowerOf2FDiv(curr, c));
         }
       }
       if (curr->op == DivFloat64) {
         double c = right->value.getf64();
         if (Bits::isPowerOf2InvertibleFloat(c)) {
           return replaceCurrent(optimizePowerOf2FDiv(curr, c));
         }
       }
     }
     // a bunch of operations on a constant left side can be simplified
     if (curr->left->is<Const>()) {
       if (auto* ret = optimizeWithConstantOnLeft(curr)) {
         return replaceCurrent(ret);
       }
     }
     if (curr->op == AndInt32 || curr->op == OrInt32) {
       if (curr->op == AndInt32) {
         if (auto* ret = combineAnd(curr)) {
           return replaceCurrent(ret);
         }
       }
       // for or, we can potentially combine
       if (curr->op == OrInt32) {
         if (auto* ret = combineOr(curr)) {
           return replaceCurrent(ret);
         }
       }
       // bitwise operations
       // for and and or, we can potentially conditionalize
       if (auto* ret = conditionalizeExpensiveOnBitwise(curr)) {
         return replaceCurrent(ret);
       }
     }
     // relation/comparisons allow for math optimizations
     if (curr->isRelational()) {
       if (auto* ret = optimizeRelational(curr)) {
         return replaceCurrent(ret);
       }
     }
     // finally, try more expensive operations on the curr in
     // the case that they have no side effects
     if (!effects(curr->left).hasSideEffects()) {
       if (ExpressionAnalyzer::equal(curr->left, curr->right)) {
         if (auto* ret = optimizeBinaryWithEqualEffectlessChildren(curr)) {
           return replaceCurrent(ret);
         }
       }
     }

     if (auto* ret = deduplicateBinary(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitUnary(Unary* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     {
       using namespace Match;
       using namespace Abstract;
       Builder builder(*getModule());
       {
         // eqz(x - y)  =>  x == y
         Binary* inner;
         if (matches(curr, unary(EqZ, binary(&inner, Sub, any(), any())))) {
           inner->op = Abstract::getBinary(inner->left->type, Eq);
           inner->type = Type::i32;
           return replaceCurrent(inner);
         }
       }
       {
         // eqz(x + C)  =>  x == -C
         Const* c;
         Binary* inner;
         if (matches(curr, unary(EqZ, binary(&inner, Add, any(), ival(&c))))) {
           c->value = c->value.neg();
           inner->op = Abstract::getBinary(c->type, Eq);
           inner->type = Type::i32;
           return replaceCurrent(inner);
         }
       }
       {
         // eqz((signed)x % C_pot)  =>  eqz(x & (abs(C_pot) - 1))
         Const* c;
         Binary* inner;
         if (matches(curr, unary(EqZ, binary(&inner, RemS, any(), ival(&c)))) &&
             (c->value.isSignedMin() ||
              Bits::isPowerOf2(c->value.abs().getInteger()))) {
           inner->op = Abstract::getBinary(c->type, And);
           if (c->value.isSignedMin()) {
             c->value = Literal::makeSignedMax(c->type);
           } else {
             c->value = c->value.abs().sub(Literal::makeOne(c->type));
           }
           return replaceCurrent(curr);
         }
       }
       {
         // i32.wrap_i64 can be removed if the operations inside it do not
         // actually require 64 bits, e.g.:
         //
         // i32.wrap_i64(i64.extend_i32_u(x))  =>  x
         if (matches(curr, unary(WrapInt64, any()))) {
           if (auto* ret = optimizeWrappedResult(curr)) {
             return replaceCurrent(ret);
           }
         }
       }
       {
         // i32.eqz(i32.wrap_i64(x))  =>  i64.eqz(x)
         //   where maxBits(x) <= 32
         Unary* inner;
         Expression* x;
         if (matches(curr, unary(EqZInt32, unary(&inner, WrapInt64, any(&x)))) &&
             Bits::getMaxBits(x, this) <= 32) {
           inner->op = EqZInt64;
           return replaceCurrent(inner);
         }
       }
       {
         // i32.eqz(i32.eqz(x))  =>  i32(x) != 0
         // i32.eqz(i64.eqz(x))  =>  i64(x) != 0
         //   iff shinkLevel == 0
         // (1 instruction instead of 2, but 1 more byte)
         if (getPassRunner()->options.shrinkLevel == 0) {
           Expression* x;
           if (matches(curr, unary(EqZInt32, unary(EqZ, any(&x))))) {
             Builder builder(*getModule());
             return replaceCurrent(builder.makeBinary(
               getBinary(x->type, Ne),
               x,
               builder.makeConst(Literal::makeZero(x->type))));
           }
         }
       }
       {
         // i64.extend_i32_s(i32.wrap_i64(x))  =>  x
         //   where maxBits(x) <= 31
         //
         // i64.extend_i32_u(i32.wrap_i64(x))  =>  x
         //   where maxBits(x) <= 32
         Expression* x;
         UnaryOp unaryOp;
         if (matches(curr, unary(&unaryOp, unary(WrapInt64, any(&x))))) {
           if (unaryOp == ExtendSInt32 || unaryOp == ExtendUInt32) {
             auto maxBits = Bits::getMaxBits(x, this);
             if ((unaryOp == ExtendSInt32 && maxBits <= 31) ||
                 (unaryOp == ExtendUInt32 && maxBits <= 32)) {
               return replaceCurrent(x);
             }
           }
         }
       }
       if (getModule()->features.hasSignExt()) {
         // i64.extend_i32_s(i32.wrap_i64(x))  =>  i64.extend32_s(x)
         Unary* inner;
         if (matches(curr,
                     unary(ExtendSInt32, unary(&inner, WrapInt64, any())))) {
           inner->op = ExtendS32Int64;
           inner->type = Type::i64;
           return replaceCurrent(inner);
         }
       }
     }

     if (curr->op == ExtendUInt32 || curr->op == ExtendSInt32) {
       if (auto* load = curr->value->dynCast<Load>()) {
         // i64.extend_i32_s(i32.load(_8|_16)(_u|_s)(x))  =>
         //    i64.load(_8|_16|_32)(_u|_s)(x)
         //
         // i64.extend_i32_u(i32.load(_8|_16)(_u|_s)(x))  =>
         //    i64.load(_8|_16|_32)(_u|_s)(x)
         //
         // but we can't do this in following cases:
         //
         //    i64.extend_i32_u(i32.load8_s(x))
         //    i64.extend_i32_u(i32.load16_s(x))
         //
         // this mixed sign/zero extensions can't represent in single
         // signed or unsigned 64-bit load operation. For example if `load8_s(x)`
         // return i8(-1) (0xFF) than sign extended result will be
         // i32(-1) (0xFFFFFFFF) and with zero extension to i64 we got
         // finally 0x00000000FFFFFFFF. However with `i64.load8_s` in this
         // situation we got `i64(-1)` (all ones) and with `i64.load8_u` it
         // will be 0x00000000000000FF.
         //
         // Another limitation is atomics which only have unsigned loads.
         // So we also avoid this only case:
         //
         //   i64.extend_i32_s(i32.atomic.load(x))

         // Special case for i32.load. In this case signedness depends on
         // extend operation.
         bool willBeSigned = curr->op == ExtendSInt32 && load->bytes == 4;
         if (!(curr->op == ExtendUInt32 && load->bytes <= 2 && load->signed_) &&
             !(willBeSigned && load->isAtomic)) {
           if (willBeSigned) {
             load->signed_ = true;
           }
           load->type = Type::i64;
           return replaceCurrent(load);
         }
       }
     }

     // Simple sign extends can be removed if the value is already sign-extended.
     auto signExtBits = getSignExtBits(curr->value);
     if (signExtBits > 0) {
       // Note that we can handle the case of |curr| having a larger sign-extend:
       // if we have an 8-bit value in 32-bit, then there are 24 sign bits, and
       // doing a sign-extend to 16 will only affect 16 of those 24, and the
       // effect is to leave them as they are.
       if ((curr->op == ExtendS8Int32 && signExtBits <= 8) ||
           (curr->op == ExtendS16Int32 && signExtBits <= 16) ||
           (curr->op == ExtendS8Int64 && signExtBits <= 8) ||
           (curr->op == ExtendS16Int64 && signExtBits <= 16) ||
           (curr->op == ExtendS32Int64 && signExtBits <= 32)) {
         return replaceCurrent(curr->value);
       }
     }

     if (Abstract::hasAnyReinterpret(curr->op)) {
       // i32.reinterpret_f32(f32.reinterpret_i32(x))  =>  x
       // i64.reinterpret_f64(f64.reinterpret_i64(x))  =>  x
       // f32.reinterpret_i32(i32.reinterpret_f32(x))  =>  x
       // f64.reinterpret_i64(i64.reinterpret_f64(x))  =>  x
       if (auto* inner = curr->value->dynCast<Unary>()) {
         if (Abstract::hasAnyReinterpret(inner->op)) {
           if (inner->value->type == curr->type) {
             return replaceCurrent(inner->value);
           }
         }
       }
       // f32.reinterpret_i32(i32.load(x))  =>  f32.load(x)
       // f64.reinterpret_i64(i64.load(x))  =>  f64.load(x)
       // i32.reinterpret_f32(f32.load(x))  =>  i32.load(x)
       // i64.reinterpret_f64(f64.load(x))  =>  i64.load(x)
       if (auto* load = curr->value->dynCast<Load>()) {
         if (!load->isAtomic && load->bytes == curr->type.getByteSize()) {
           load->type = curr->type;
           return replaceCurrent(load);
         }
       }
     }

     if (curr->op == EqZInt32) {
       if (auto* inner = curr->value->dynCast<Binary>()) {
         // Try to invert a relational operation using De Morgan's law
         auto op = invertBinaryOp(inner->op);
         if (op != InvalidBinary) {
           inner->op = op;
           return replaceCurrent(inner);
         }
       }
       // eqz of a sign extension can be of zero-extension
       if (auto* ext = Properties::getSignExtValue(curr->value)) {
         // we are comparing a sign extend to a constant, which means we can
         // use a cheaper zext
         auto bits = Properties::getSignExtBits(curr->value);
         curr->value = makeZeroExt(ext, bits);
         return replaceCurrent(curr);
       }
     } else if (curr->op == AbsFloat32 || curr->op == AbsFloat64) {
       // abs(-x)   ==>   abs(x)
       if (auto* unaryInner = curr->value->dynCast<Unary>()) {
         if (unaryInner->op ==
             Abstract::getUnary(unaryInner->type, Abstract::Neg)) {
           curr->value = unaryInner->value;
           return replaceCurrent(curr);
         }
       }
       // abs(x * x)   ==>   x * x
       // abs(x / x)   ==>   x / x
       if (auto* binary = curr->value->dynCast<Binary>()) {
         if ((binary->op == Abstract::getBinary(binary->type, Abstract::Mul) ||
              binary->op == Abstract::getBinary(binary->type, Abstract::DivS)) &&
             areConsecutiveInputsEqual(binary->left, binary->right)) {
           return replaceCurrent(binary);
         }
         // abs(0 - x)   ==>   abs(x),
         // only for fast math
         if (fastMath &&
             binary->op == Abstract::getBinary(binary->type, Abstract::Sub)) {
           if (auto* c = binary->left->dynCast<Const>()) {
             if (c->value.isZero()) {
               curr->value = binary->right;
               return replaceCurrent(curr);
             }
           }
         }
       }
     }

     if (auto* ret = deduplicateUnary(curr)) {
       return replaceCurrent(ret);
     }

     if (auto* ret = simplifyRoundingsAndConversions(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitSelect(Select* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     if (auto* ret = optimizeSelect(curr)) {
       return replaceCurrent(ret);
     }
     optimizeTernary(curr);
   }

   void visitGlobalSet(GlobalSet* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     // optimize out a set of a get
     auto* get = curr->value->dynCast<GlobalGet>();
     if (get && get->name == curr->name) {
       ExpressionManipulator::nop(curr);
       return replaceCurrent(curr);
     }
   }

   void visitIf(If* curr) {
     curr->condition = optimizeBoolean(curr->condition);
     if (curr->ifFalse) {
       if (auto* unary = curr->condition->dynCast<Unary>()) {
         if (unary->op == EqZInt32) {
           // flip if-else arms to get rid of an eqz
           curr->condition = unary->value;
           std::swap(curr->ifTrue, curr->ifFalse);
         }
       }
       if (curr->condition->type != Type::unreachable &&
           ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) {
         // The sides are identical, so fold. If we can replace the If with one
         // arm and there are no side effects in the condition, replace it. But
         // make sure not to change a concrete expression to an unreachable
         // expression because we want to avoid having to refinalize.
         bool needCondition = effects(curr->condition).hasSideEffects();
         bool wouldBecomeUnreachable =
           curr->type.isConcrete() && curr->ifTrue->type == Type::unreachable;
         Builder builder(*getModule());
         if (!wouldBecomeUnreachable && !needCondition) {
           return replaceCurrent(curr->ifTrue);
         } else if (!wouldBecomeUnreachable) {
           return replaceCurrent(builder.makeSequence(
             builder.makeDrop(curr->condition), curr->ifTrue));
         } else {
           // Emit a block with the original concrete type.
           auto* ret = builder.makeBlock();
           if (needCondition) {
             ret->list.push_back(builder.makeDrop(curr->condition));
           }
           ret->list.push_back(curr->ifTrue);
           ret->finalize(curr->type);
           return replaceCurrent(ret);
         }
       }
       optimizeTernary(curr);
     }
   }

   void visitLocalSet(LocalSet* curr) {
     // Interactions between local.set/tee and ref.as_non_null can be optimized
     // in some cases, by removing or moving the ref.as_non_null operation.
     if (auto* as = curr->value->dynCast<RefAs>()) {
       if (as->op == RefAsNonNull &&
           getFunction()->getLocalType(curr->index).isNullable()) {
         //   (local.tee (ref.as_non_null ..))
         // =>
         //   (ref.as_non_null (local.tee ..))
         //
         // The reordering allows the ref.as to be potentially optimized further
         // based on where the value flows to.
         if (curr->isTee()) {
           curr->value = as->value;
           curr->finalize();
           as->value = curr;
           as->finalize();
           replaceCurrent(as);
           return;
         }

         // Otherwise, if this is not a tee, then no value falls through. The
         // ref.as_non_null acts as a null check here, basically. If we are
         // ignoring such traps, we can remove it.
         auto& passOptions = getPassOptions();
         if (passOptions.ignoreImplicitTraps || passOptions.trapsNeverHappen) {
           curr->value = as->value;
         }
       }
     }
   }

   void visitBreak(Break* curr) {
     if (curr->condition) {
       curr->condition = optimizeBoolean(curr->condition);
     }
   }

   void visitLoad(Load* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     optimizeMemoryAccess(curr->ptr, curr->offset, curr->memory);
   }

   void visitStore(Store* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     optimizeMemoryAccess(curr->ptr, curr->offset, curr->memory);
     optimizeStoredValue(curr->value, curr->bytes);
     if (auto* unary = curr->value->dynCast<Unary>()) {
       if (unary->op == WrapInt64) {
         // instead of wrapping to 32, just store some of the bits in the i64
         curr->valueType = Type::i64;
         curr->value = unary->value;
       } else if (!curr->isAtomic && Abstract::hasAnyReinterpret(unary->op) &&
                  curr->bytes == curr->valueType.getByteSize()) {
         // f32.store(y, f32.reinterpret_i32(x))  =>  i32.store(y, x)
         // f64.store(y, f64.reinterpret_i64(x))  =>  i64.store(y, x)
         // i32.store(y, i32.reinterpret_f32(x))  =>  f32.store(y, x)
         // i64.store(y, i64.reinterpret_f64(x))  =>  f64.store(y, x)
         curr->valueType = unary->value->type;
         curr->value = unary->value;
       }
     }
   }

   void optimizeStoredValue(Expression*& value, Index bytes) {
     if (!value->type.isInteger()) {
       return;
     }
     // truncates constant values during stores
     // (i32|i64).store(8|16|32)(p, C)   ==>
     //    (i32|i64).store(8|16|32)(p, C & mask)
     if (auto* c = value->dynCast<Const>()) {
       if (value->type == Type::i64 && bytes == 4) {
         c->value = c->value.and_(Literal(uint64_t(0xffffffff)));
       } else {
         c->value = c->value.and_(
           Literal::makeFromInt32(Bits::lowBitMask(bytes * 8), value->type));
       }
     }
     // stores of fewer bits truncates anyhow
     if (auto* binary = value->dynCast<Binary>()) {
       if (binary->op == AndInt32) {
         if (auto* right = binary->right->dynCast<Const>()) {
           if (right->type == Type::i32) {
             auto mask = right->value.geti32();
             if ((bytes == 1 && mask == 0xff) ||
                 (bytes == 2 && mask == 0xffff)) {
               value = binary->left;
             }
           }
         }
       } else if (auto* ext = Properties::getSignExtValue(binary)) {
         // if sign extending the exact bit size we store, we can skip the
         // extension if extending something bigger, then we just alter bits we
         // don't save anyhow
         if (Properties::getSignExtBits(binary) >= Index(bytes) * 8) {
           value = ext;
         }
       }
     }
   }

   void visitMemoryCopy(MemoryCopy* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     assert(getModule()->features.hasBulkMemoryOpt());
     if (auto* ret = optimizeMemoryCopy(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitMemoryFill(MemoryFill* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     assert(getModule()->features.hasBulkMemoryOpt());
     if (auto* ret = optimizeMemoryFill(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitCallRef(CallRef* curr) {
     skipNonNullCast(curr->target, curr);
     if (trapOnNull(curr, curr->target)) {
       return;
     }
     if (curr->target->type == Type::unreachable) {
       // The call_ref is not reached; leave this for DCE.
       return;
     }

     if (auto* ref = curr->target->dynCast<RefFunc>()) {
       // We know the target!
       replaceCurrent(
         Builder(*getModule())
           .makeCall(ref->func, curr->operands, curr->type, curr->isReturn));
       return;
     }

     if (auto* get = curr->target->dynCast<TableGet>()) {
       // (call_ref ..args.. (table.get $table (index))
       //   =>
       // (call_indirect $table ..args.. (index))
       replaceCurrent(Builder(*getModule())
                        .makeCallIndirect(get->table,
                                          get->index,
                                          curr->operands,
                                          get->type.getHeapType(),
                                          curr->isReturn));
       return;
     }

     auto features = getModule()->features;

     // It is possible the target is not a function reference, but we can infer
     // the fallthrough value there. It takes more work to optimize this case,
     // but it is pretty important to allow a call_ref to become a fast direct
     // call, so make the effort.
     if (auto* ref = Properties::getFallthrough(
                       curr->target, getPassOptions(), *getModule())
                       ->dynCast<RefFunc>()) {
       // Check if the fallthrough make sense. We may have cast it to a different
       // type, which would be a problem - we'd be replacing a call_ref to one
       // type with a direct call to a function of another type. That would trap
       // at runtime; be careful not to emit invalid IR here.
       if (curr->target->type.getHeapType() != ref->type.getHeapType()) {
         return;
       }
       Builder builder(*getModule());
       if (curr->operands.empty()) {
         // No operands, so this is simple and there is nothing to reorder: just
         // emit:
         //
         // (block
         //  (drop curr->target)
         //  (call ref.func-from-curr->target)
         // )
         replaceCurrent(builder.makeSequence(
           builder.makeDrop(curr->target),
           builder.makeCall(ref->func, {}, curr->type, curr->isReturn)));
         return;
       }

       // In the presence of operands, we must execute the code in curr->target
       // after the last operand and before the call happens. Interpose at the
       // last operand:
       //
       // (call ref.func-from-curr->target)
       //  (operand1)
       //  (..)
       //  (operandN-1)
       //  (block
       //   (local.set $temp (operandN))
       //   (drop curr->target)
       //   (local.get $temp)
       //  )
       // )
       auto* lastOperand = curr->operands.back();
       auto lastOperandType = lastOperand->type;
       if (lastOperandType == Type::unreachable) {
         // The call_ref is not reached; leave this for DCE.
         return;
       }
       if (!TypeUpdating::canHandleAsLocal(lastOperandType)) {
         // We cannot create a local, so we must give up.
         return;
       }
       Index tempLocal = builder.addVar(
         getFunction(),
         TypeUpdating::getValidLocalType(lastOperandType, features));
       auto* set = builder.makeLocalSet(tempLocal, lastOperand);
       auto* drop = builder.makeDrop(curr->target);
       auto* get = TypeUpdating::fixLocalGet(
         builder.makeLocalGet(tempLocal, lastOperandType), *getModule());
       curr->operands.back() = builder.makeBlock({set, drop, get});
       replaceCurrent(builder.makeCall(
         ref->func, curr->operands, curr->type, curr->isReturn));
       return;
     }

     // If the target is a select of two different constants, we can emit an if
     // over two direct calls.
     if (auto* calls = CallUtils::convertToDirectCalls(
           curr,
           [](Expression* target) -> CallUtils::IndirectCallInfo {
             if (auto* refFunc = target->dynCast<RefFunc>()) {
               return CallUtils::Known{refFunc->func};
             }
             return CallUtils::Unknown{};
           },
           *getFunction(),
           *getModule())) {
       replaceCurrent(calls);
     }
   }

   // Note on removing casts (which the following utilities, skipNonNullCast and
   // skipCast do): removing a cast is potentially dangerous, as it removes
   // information from the IR. For example:
   //
   //  (ref.test (ref i31)
   //    (ref.cast (ref i31)
   //      (local.get $anyref)))
   //
   // The local has no useful type info here (it is anyref). The cast forces it
   // to be an i31, so we know that if we do not trap then the ref.test will
   // definitely be 1. But if we removed the ref.cast first (which we can do in
   // traps-never-happen mode) then we'd not have the type info we need to
   // optimize that way.
   //
   // To avoid such risks we should keep in mind the following:
   //
   //  * Before removing a cast we should use its type information in the best
   //    way we can. Only after doing so should a cast be removed. In the exmaple
   //    above, that means first seeing that the ref.test must return 1, and only
   //    then possibly removing the ref.cast.
   //  * Do not remove a cast if removing it might remove useful information for
   //    others. For example,
   //
   //      (ref.cast (ref null $A)
   //        (ref.as_non_null ..))
   //
   //    If we remove the inner cast then the outer cast becomes nullable. That
   //    means we'd be throwing away useful information, which we should not do,
   //    even in traps-never-happen mode and even if the wasm would validate
   //    without the cast. Only if we saw that the parents of the outer cast
   //    cannot benefit from non-nullability should we remove it.
   //    Another example:
   //
   //      (struct.get $A 0
   //        (ref.cast $B ..))
   //
   //    The cast only changes the type of the reference, which is consumed in
   //    this expression and so we don't have more parents to consider. But it is
   //    risky to remove this cast, since e.g. GUFA benefits from such info:
   //    it tells GUFA that we are reading from a $B here, and not the supertype
   //    $A. If $B may contain fewer values in field 0 than $A, then GUFA might
   //    be able to optimize better with this cast. Now, in traps-never-happen
   //    mode we can assume that only $B can arrive here, which means GUFA might
   //    be able to infer that even without the cast - but it might not, if we
   //    hit a limitation of GUFA. Some code patterns simply cannot be expected
   //    to be always inferred, say if a data structure has a tagged variant:
   //
   //      {
   //        tag: i32,
   //        ref: anyref
   //      }
   //
   //    Imagine that if tag == 0 then the reference always contains struct $A,
   //    and if tag == 1 then it always contains a struct $B, and so forth. We
   //    can't expect GUFA to figure out such invariants in general. But by
   //    having casts in the right places we can help GUFA optimize:
   //
   //      (if
   //        (tag == 1)
   //        (struct.get $A 0
   //          (ref.cast $B ..))
   //
   //    We know it must be a $B due to the tag. By keeping the cast there we can
   //    make sure that optimizations can benefit from that.
   //
   //    Given the large amount of potential benefit we can get from a successful
   //    optimization in GUFA, any reduction there may be a bad idea, so we
   //    should be very careful and probably *not* remove such casts.

   // If an instruction traps on a null input, there is no need for a
   // ref.as_non_null on that input: we will trap either way (and the binaryen
   // optimizer does not differentiate traps).
   //
   // See "notes on removing casts", above. However, in most cases removing a
   // non-null cast is obviously safe to do, since we only remove one if another
   // check will happen later.
   //
   // We also pass in the parent, because we need to be careful about ordering:
   // if the parent has other children than |input| then we may not be able to
   // remove the trap. For example,
   //
   //  (struct.set
   //   (ref.as_non_null X)
   //   (call $foo)
   //  )
   //
   // If X is null we'd trap before the call to $foo. If we remove the
   // ref.as_non_null then the struct.set will still trap, of course, but that
   // will only happen *after* the call, which is wrong.
   void skipNonNullCast(Expression*& input, Expression* parent) {
     // Check the other children for the ordering problem only if we find a
     // possible optimization, to avoid wasted work.
     bool checkedSiblings = false;
     auto& options = getPassOptions();
     while (1) {
       if (auto* as = input->dynCast<RefAs>()) {
         if (as->op == RefAsNonNull) {
           // The problem with effect ordering that is described above is not an
           // issue if traps are assumed to never happen anyhow.
           if (!checkedSiblings && !options.trapsNeverHappen) {
             // We need to see if a child with side effects exists after |input|.
             // If there is such a child, it is a problem as mentioned above (it
             // is fine for such a child to appear *before* |input|, as then we
             // wouldn't be reordering effects). Thus, all we need to do is
             // accumulate the effects in children after |input|, as we want to
             // move the trap across those.
             bool seenInput = false;
             EffectAnalyzer crossedEffects(options, *getModule());
             for (auto* child : ChildIterator(parent)) {
               if (child == input) {
                 seenInput = true;
               } else if (seenInput) {
                 crossedEffects.walk(child);
               }
             }

             // Check if the effects we cross interfere with the effects of the
             // trap we want to move. (We use a shallow effect analyzer since we
             // will only move the ref.as_non_null itself.)
             ShallowEffectAnalyzer movingEffects(options, *getModule(), input);
             if (crossedEffects.invalidates(movingEffects)) {
               return;
             }

             // If we got here, we've checked the siblings and found no problem.
             checkedSiblings = true;
           }
           input = as->value;
           continue;
         }
       }
       break;
     }
   }

   // As skipNonNullCast, but skips all casts if we can do so. This is useful in
   // cases where we don't actually care about the type but just the value, that
   // is, if casts of the type do not affect our behavior (which is the case in
   // ref.eq for example).
   //
   // |requiredType| is the required supertype of the final output. We will not
   // remove a cast that would leave something that would break that. If
   // |requiredType| is not provided we will accept any type there.
   //
   // See "notes on removing casts", above, for when this is safe to do.
   void skipCast(Expression*& input, Type requiredType = Type::none) {
     // Traps-never-happen mode is a requirement for us to optimize here.
     if (!getPassOptions().trapsNeverHappen) {
       return;
     }
     while (1) {
       if (auto* as = input->dynCast<RefAs>()) {
         if (requiredType == Type::none ||
             Type::isSubType(as->value->type, requiredType)) {
           input = as->value;
           continue;
         }
       } else if (auto* cast = input->dynCast<RefCast>()) {
         if (requiredType == Type::none ||
             Type::isSubType(cast->ref->type, requiredType)) {
           input = cast->ref;
           continue;
         }
       }
       break;
     }
   }

   // Appends a result after the dropped children, if we need them.
   Expression* getDroppedChildrenAndAppend(Expression* curr,
                                           Expression* result) {
     return wasm::getDroppedChildrenAndAppend(
       curr, *getModule(), getPassOptions(), result);
   }

   Expression* getDroppedChildrenAndAppend(Expression* curr, Literal value) {
     auto* result = Builder(*getModule()).makeConst(value);
     return getDroppedChildrenAndAppend(curr, result);
   }

   Expression* getResultOfFirst(Expression* first, Expression* second) {
     return wasm::getResultOfFirst(
       first, second, getFunction(), getModule(), getPassOptions());
   }

   // Optimize an instruction and the reference it operates on, under the
   // assumption that if the reference is a null then we will trap. Returns true
   // if we replaced the expression with something simpler. Returns false if we
   // found nothing to optimize, or if we just modified or replaced the ref (but
   // not the expression itself).
   bool trapOnNull(Expression* curr, Expression*& ref) {
     Builder builder(*getModule());

     if (getPassOptions().trapsNeverHappen) {
       // We can ignore the possibility of the reference being an input, so
       //
       //    (if
       //      (condition)
       //      (null)
       //      (other))
       // =>
       //    (drop
       //      (condition))
       //    (other)
       //
       // That is, we will by assumption not read from the null, so remove that
       // arm.
       //
       // TODO We could recurse here.
       // TODO We could do similar things for casts (rule out an impossible arm).
       // TODO Worth thinking about an 'assume' instrinsic of some form that
       //      annotates knowledge about a value, or another mechanism to allow
       //      that information to be passed around.

       // Note that we must check that the null is actually flowed out, that is,
       // that control flow is not transferred before:
       //
       //    (if
       //      (1)
       //      (block (result null)
       //        (return)
       //      )
       //      (other))
       //
       // The true arm has a bottom type, but in fact it just returns out of the
       // function and the null does not actually flow out. We can only optimize
       // here if a null definitely flows out (as only that would cause a trap).
       auto flowsOutNull = [&](Expression* child) {
         return child->type.isNull() && !effects(child).transfersControlFlow();
       };

       if (auto* iff = ref->dynCast<If>()) {
         if (iff->ifFalse) {
           if (flowsOutNull(iff->ifTrue)) {
             if (ref->type != iff->ifFalse->type) {
               refinalize = true;
             }
             ref = builder.makeSequence(builder.makeDrop(iff->condition),
                                        iff->ifFalse);
             return false;
           }
           if (flowsOutNull(iff->ifFalse)) {
             if (ref->type != iff->ifTrue->type) {
               refinalize = true;
             }
             ref = builder.makeSequence(builder.makeDrop(iff->condition),
                                        iff->ifTrue);
             return false;
           }
         }
       }

       if (auto* select = ref->dynCast<Select>()) {
         // We must check for unreachability explicitly here because a full
         // refinalize only happens at the end. That is, the select may stil be
         // reachable after we turned one child into an unreachable, and we are
         // calling getResultOfFirst which will error on unreachability.
         if (flowsOutNull(select->ifTrue) &&
             select->ifFalse->type != Type::unreachable) {
           ref = builder.makeSequence(
             builder.makeDrop(select->ifTrue),
             getResultOfFirst(select->ifFalse,
                              builder.makeDrop(select->condition)));
           return false;
         }
         if (flowsOutNull(select->ifFalse) &&
             select->ifTrue->type != Type::unreachable) {
           ref = getResultOfFirst(
             select->ifTrue,
             builder.makeSequence(builder.makeDrop(select->ifFalse),
                                  builder.makeDrop(select->condition)));
           return false;
         }
       }
     }

     // A nullable cast can be turned into a non-nullable one:
     //
     //    (struct.get ;; or something else that traps on a null ref
     //      (ref.cast null
     // =>
     //    (struct.get
     //      (ref.cast      ;; now non-nullable
     //
     // Either way we trap here, but refining the type may have benefits later.
     if (ref->type.isNullable()) {
       if (auto* cast = ref->dynCast<RefCast>()) {
         // Note that we must be the last child of the parent, otherwise effects
         // in the middle may need to remain:
         //
         //    (struct.set
         //      (ref.cast null
         //      (call ..
         //
         // The call here must execute before the trap in the struct.set. To
         // avoid that problem, inspect all children after us. If there are no
         // such children, then there is no problem; if there are, see below.
         auto canOptimize = true;
         auto seenRef = false;
         for (auto* child : ChildIterator(curr)) {
           if (child == ref) {
             seenRef = true;
           } else if (seenRef) {
             // This is a child after the reference. Check it for effects. For
             // simplicity, focus on the case of traps-never-happens: if we can
             // assume no trap occurs in the parent, then there must not be a
             // trap in the child either, unless control flow transfers and we
             // might not reach the parent.
             // TODO: handle more cases.
             if (!getPassOptions().trapsNeverHappen ||
                 effects(child).transfersControlFlow()) {
               canOptimize = false;
               break;
             }
           }
         }
         if (canOptimize) {
           cast->type = Type(cast->type.getHeapType(), NonNullable);
         }
       }
     }

     auto fallthrough =
       Properties::getFallthrough(ref, getPassOptions(), *getModule());

     if (fallthrough->type.isNull()) {
       replaceCurrent(
         getDroppedChildrenAndAppend(curr, builder.makeUnreachable()));
       return true;
     }
     return false;
   }

   void visitRefEq(RefEq* curr) {
     // The types may prove that the same reference cannot appear on both sides.
     auto leftType = curr->left->type;
     auto rightType = curr->right->type;
     if (leftType == Type::unreachable || rightType == Type::unreachable) {
       // Leave this for DCE.
       return;
     }
     auto leftHeapType = leftType.getHeapType();
     auto rightHeapType = rightType.getHeapType();
     auto leftIsHeapSubtype = HeapType::isSubType(leftHeapType, rightHeapType);
     auto rightIsHeapSubtype = HeapType::isSubType(rightHeapType, leftHeapType);
     if (!leftIsHeapSubtype && !rightIsHeapSubtype &&
         (leftType.isNonNullable() || rightType.isNonNullable())) {
       // The heap types have no intersection, so the only thing that can
       // possibly appear on both sides is null, but one of the two is non-
       // nullable, which rules that out. So there is no way that the same
       // reference can appear on both sides.
       replaceCurrent(
         getDroppedChildrenAndAppend(curr, Literal::makeZero(Type::i32)));
       return;
     }

     // Equality does not depend on the type, so casts may be removable.
     //
     // This is safe to do first because nothing farther down cares about the
     // type, and we consume the two input references, so removing a cast could
     // not help our parents (see "notes on removing casts").
     Type nullableEq = Type(HeapType::eq, Nullable);
     skipCast(curr->left, nullableEq);
     skipCast(curr->right, nullableEq);

     // Identical references compare equal.
     // (Technically we do not need to check if the inputs are also foldable into
     // a single one, but we do not have utility code to handle non-foldable
     // cases yet; the foldable case we do handle is the common one of the first
     // child being a tee and the second a get of that tee. TODO)
     if (areConsecutiveInputsEqualAndFoldable(curr->left, curr->right)) {
       replaceCurrent(
         getDroppedChildrenAndAppend(curr, Literal::makeOne(Type::i32)));
       return;
     }

     // Canonicalize to the pattern of a null on the right-hand side, if there is
     // one. This makes pattern matching simpler.
     if (curr->left->is<RefNull>()) {
       std::swap(curr->left, curr->right);
     }

     // RefEq of a value to Null can be replaced with RefIsNull.
     if (curr->right->is<RefNull>()) {
       replaceCurrent(Builder(*getModule()).makeRefIsNull(curr->left));
     }
   }

   void visitStructNew(StructNew* curr) {
     // If values are provided, but they are all the default, then we can remove
     // them (in reachable code).
     if (curr->type == Type::unreachable || curr->isWithDefault()) {
       return;
     }

     auto& passOptions = getPassOptions();
     const auto& fields = curr->type.getHeapType().getStruct().fields;
     assert(fields.size() == curr->operands.size());

     for (Index i = 0; i < fields.size(); i++) {
       // The field must be defaultable.
       auto type = fields[i].type;
       if (!type.isDefaultable()) {
         return;
       }

       // The field must be written the default value.
       auto* value = Properties::getFallthrough(
         curr->operands[i], passOptions, *getModule());
       if (!Properties::isSingleConstantExpression(value) ||
           Properties::getLiteral(value) != Literal::makeZero(type)) {
         return;
       }
     }

     // Success! Drop the children and return a struct.new_with_default.
     auto* rep = getDroppedChildrenAndAppend(curr, curr);
     curr->operands.clear();
     assert(curr->isWithDefault());
     replaceCurrent(rep);
   }

   void visitStructGet(StructGet* curr) {
     skipNonNullCast(curr->ref, curr);
     trapOnNull(curr, curr->ref);
     // Relax acquire loads of unshared fields to unordered because they cannot
     // synchronize with other threads.
     if (curr->order == MemoryOrder::AcqRel && curr->ref->type.isRef() &&
         !curr->ref->type.getHeapType().isShared()) {
       curr->order = MemoryOrder::Unordered;
     }
   }

   void visitStructSet(StructSet* curr) {
     skipNonNullCast(curr->ref, curr);
     if (trapOnNull(curr, curr->ref)) {
       return;
     }

     if (curr->ref->type != Type::unreachable && curr->value->type.isInteger()) {
       // We must avoid the case of a null type.
       auto heapType = curr->ref->type.getHeapType();
       if (heapType.isStruct()) {
         const auto& fields = heapType.getStruct().fields;
         optimizeStoredValue(curr->value, fields[curr->index].getByteSize());
       }
     }

     // Relax release stores of unshared fields to unordered because they cannot
     // synchronize with other threads.
     if (curr->order == MemoryOrder::AcqRel && curr->ref->type.isRef() &&
         !curr->ref->type.getHeapType().isShared()) {
       curr->order = MemoryOrder::Unordered;
     }
   }

   void visitStructRMW(StructRMW* curr) {
     skipNonNullCast(curr->ref, curr);
     if (trapOnNull(curr, curr->ref)) {
       return;
     }

     if (!curr->ref->type.isStruct()) {
       return;
     }

     // We generally can't optimize seqcst RMWs on shared memory because they can
     // act as both the source and sink of synchronization edges, even if they
     // don't modify the in-memory value.
     if (curr->ref->type.getHeapType().isShared() &&
         curr->order == MemoryOrder::SeqCst) {
       return;
     }

     Builder builder(*getModule());

     // This RMW is either to non-shared memory or has acquire-release ordering.
     // In the former case, it trivially does not synchronize with other threads
     // and we can optimize to our heart's content. In the latter case, if we
     // know the RMW does not change the value in memory, then we can consider
     // all subsequent reads as reading from the previous write rather than from
     // this RMW op, which means this RMW does not synchronize with later reads
     // and we can optimize out the write part. This optimization wouldn't be
     // valid for sequentially consistent RMW ops because the next reads from
     // this location in the total order of seqcst ops would have to be
     // considered to be reading from this RMW and therefore would synchronize
     // with it.
     auto* value =
       Properties::getFallthrough(curr->value, getPassOptions(), *getModule());
     if (Properties::isSingleConstantExpression(value)) {
       auto val = Properties::getLiteral(value);
       bool canOptimize = false;
       switch (curr->op) {
         case RMWAdd:
         case RMWSub:
         case RMWOr:
         case RMWXor:
           canOptimize = val.getInteger() == 0;
           break;
         case RMWAnd:
           canOptimize = val == Literal::makeNegOne(val.type);
           break;
         case RMWXchg:
           canOptimize = false;
           break;
       }
       if (canOptimize) {
         replaceCurrent(builder.makeStructGet(
           curr->index,
           getResultOfFirst(curr->ref, builder.makeDrop(curr->value)),
           curr->order,
           curr->type));
         return;
       }
     }

     // No further optimizations possible on RMWs to shared memory.
     if (curr->ref->type.getHeapType().isShared()) {
       return;
     }

     // Lower the RMW to its more basic operations. Breaking the atomic
     // operation into several non-atomic operations is safe because no other
     // thread can observe an intermediate state in the unshared memory. This
     // initially increases code size, but the more basic operations may be
     // more optimizable than the original RMW.
     // TODO: Experiment to determine whether this is worthwhile on real code.
     // Maybe we should do this optimization only when optimizing for speed over
     // size.
     auto ref = builder.addVar(getFunction(), curr->ref->type);
     auto val = builder.addVar(getFunction(), curr->type);
     auto result = builder.addVar(getFunction(), curr->type);
     auto* block = builder.makeBlock(
       {builder.makeLocalSet(ref, curr->ref),
        builder.makeLocalSet(val, curr->value),
        builder.makeLocalSet(
          result,
          builder.makeStructGet(curr->index,
                                builder.makeLocalGet(ref, curr->ref->type),
                                MemoryOrder::Unordered,
                                curr->type))});
     Expression* newVal = nullptr;
     if (curr->op == RMWXchg) {
       newVal = builder.makeLocalGet(val, curr->type);
     } else {
       Abstract::Op binop = Abstract::Add;
       switch (curr->op) {
         case RMWAdd:
           binop = Abstract::Add;
           break;
         case RMWSub:
           binop = Abstract::Sub;
           break;
         case RMWAnd:
           binop = Abstract::And;
           break;
         case RMWOr:
           binop = Abstract::Or;
           break;
         case RMWXor:
           binop = Abstract::Xor;
           break;
         case RMWXchg:
           WASM_UNREACHABLE("unexpected op");
       }
       newVal = builder.makeBinary(Abstract::getBinary(curr->type, binop),
                                   builder.makeLocalGet(result, curr->type),
                                   builder.makeLocalGet(val, curr->type));
     }
     block->list.push_back(
       builder.makeStructSet(curr->index,
                             builder.makeLocalGet(ref, curr->ref->type),
                             newVal,
                             MemoryOrder::Unordered));

     block->list.push_back(builder.makeLocalGet(result, curr->type));
     block->type = curr->type;
     replaceCurrent(block);
   }

   void visitStructCmpxchg(StructCmpxchg* curr) {
     skipNonNullCast(curr->ref, curr);
     if (trapOnNull(curr, curr->ref)) {
       return;
     }

     if (!curr->ref->type.isStruct()) {
       return;
     }

     Builder builder(*getModule());

     // As with other RMW operations, we cannot optimize if the RMW is
     // sequentially consistent and to shared memory.
     if (curr->ref->type.getHeapType().isShared() &&
         curr->order == MemoryOrder::SeqCst) {
       return;
     }

     // Just like other RMW operations, unshared or release-acquire cmpxchg can
     // be optimized to just a read if it is known not to change the in-memory
     // value. This is the case when `expected` and `replacement` are known to be
     // the same.
     if (areConsecutiveInputsEqual(curr->expected, curr->replacement)) {
       auto* ref = getResultOfFirst(
         curr->ref,
         builder.makeSequence(builder.makeDrop(curr->expected),
                              builder.makeDrop(curr->replacement)));
       replaceCurrent(
         builder.makeStructGet(curr->index, ref, curr->order, curr->type));
       return;
     }

     if (curr->ref->type.getHeapType().isShared()) {
       return;
     }

     // Just like other RMW operations, lower to basic operations when operating
     // on unshared memory.
     auto ref = builder.addVar(getFunction(), curr->ref->type);
     auto expected = builder.addVar(getFunction(), curr->type);
     auto replacement = builder.addVar(getFunction(), curr->type);
     auto result = builder.addVar(getFunction(), curr->type);
     auto* block =
       builder.makeBlock({builder.makeLocalSet(ref, curr->ref),
                          builder.makeLocalSet(expected, curr->expected),
                          builder.makeLocalSet(replacement, curr->replacement)});
     auto* lhs = builder.makeLocalTee(
       result,
       builder.makeStructGet(curr->index,
                             builder.makeLocalGet(ref, curr->ref->type),
                             MemoryOrder::Unordered,
                             curr->type),
       curr->type);
     auto* rhs = builder.makeLocalGet(expected, curr->type);
     Expression* pred = nullptr;
     if (curr->type.isRef()) {
       pred = builder.makeRefEq(lhs, rhs);
     } else {
       pred = builder.makeBinary(
         Abstract::getBinary(curr->type, Abstract::Eq), lhs, rhs);
     }
     block->list.push_back(builder.makeIf(
       pred,
       builder.makeStructSet(curr->index,
                             builder.makeLocalGet(ref, curr->ref->type),
                             builder.makeLocalGet(replacement, curr->type),
                             MemoryOrder::Unordered)));

     block->list.push_back(builder.makeLocalGet(result, curr->type));
     block->type = curr->type;
     replaceCurrent(block);
   }

   void visitArrayNew(ArrayNew* curr) {
     // If a value is provided, we can optimize in some cases.
     if (curr->type == Type::unreachable || curr->isWithDefault()) {
       return;
     }

     Builder builder(*getModule());

     // ArrayNew of size 1 is less efficient than ArrayNewFixed with one value
     // (the latter avoids a Const, which ends up saving one byte).
     // TODO: also look at the case with a fallthrough or effects on the size
     if (auto* c = curr->size->dynCast<Const>()) {
       if (c->value.geti32() == 1) {
         // Optimize to ArrayNewFixed. Note that if the value is the default
         // then we may end up optimizing further in visitArrayNewFixed.
         replaceCurrent(
           builder.makeArrayNewFixed(curr->type.getHeapType(), {curr->init}));
         return;
       }
     }

     // If the type is defaultable then perhaps the value here is the default.
     auto type = curr->type.getHeapType().getArray().element.type;
     if (!type.isDefaultable()) {
       return;
     }

     // The value must be the default/zero.
     auto& passOptions = getPassOptions();
     auto zero = Literal::makeZero(type);
     auto* value =
       Properties::getFallthrough(curr->init, passOptions, *getModule());
     if (!Properties::isSingleConstantExpression(value) ||
         Properties::getLiteral(value) != zero) {
       return;
     }

     // Success! Drop the init and return an array.new_with_default.
     auto* init = curr->init;
     curr->init = nullptr;
     assert(curr->isWithDefault());
     replaceCurrent(builder.makeSequence(builder.makeDrop(init), curr));
   }

   void visitArrayNewFixed(ArrayNewFixed* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     auto size = curr->values.size();
     if (size == 0) {
       // TODO: Consider what to do in the trivial case of an empty array: we can
       //       can use ArrayNew or ArrayNewFixed there. Measure which is best.
       return;
     }

     auto& passOptions = getPassOptions();

     // If all the values are equal then we can optimize, either to
     // array.new_default (if they are all equal to the default) or array.new (if
     // they are all equal to some other value). First, see if they are all
     // equal, which we do by comparing in pairs: [0,1], then [1,2], etc.
     for (Index i = 0; i < size - 1; i++) {
       if (!areConsecutiveInputsEqual(curr->values[i], curr->values[i + 1])) {
         return;
       }
     }

     // Great, they are all equal!

     Builder builder(*getModule());

     // See if they are equal to a constant, and if that constant is the default.
     auto type = curr->type.getHeapType().getArray().element.type;
     if (type.isDefaultable()) {
       auto* value =
         Properties::getFallthrough(curr->values[0], passOptions, *getModule());

       if (Properties::isSingleConstantExpression(value) &&
           Properties::getLiteral(value) == Literal::makeZero(type)) {
         // They are all equal to the default. Drop the children and return an
         // array.new_with_default.
         auto* withDefault = builder.makeArrayNew(
           curr->type.getHeapType(), builder.makeConst(int32_t(size)));
         replaceCurrent(getDroppedChildrenAndAppend(curr, withDefault));
         return;
       }
     }

     // They are all equal to each other, but not to the default value. If there
     // are 2 or more elements here then we can save by using array.new. For
     // example, with 2 elements we are doing this:
     //
     //  (array.new_fixed
     //    (A)
     //    (A)
     //  )
     // =>
     //  (array.new
     //    (A)
     //    (i32.const 2) ;; get two copies of (A)
     //  )
     //
     // However, with 1, ArrayNewFixed is actually more compact, and we optimize
     // ArrayNew to it, above.
     if (size == 1) {
       return;
     }

     // Move children to locals, if we need to keep them around. We are removing
     // them all, except from the first, when we remove the array.new_fixed's
     // list of children and replace it with a single child + a constant for the
     // number of children.
     ChildLocalizer localizer(
       curr, getFunction(), *getModule(), getPassOptions());
     auto* block = localizer.getChildrenReplacement();
     auto* arrayNew = builder.makeArrayNew(curr->type.getHeapType(),
                                           builder.makeConst(int32_t(size)),
                                           curr->values[0]);
     block->list.push_back(arrayNew);
     block->finalize();
     replaceCurrent(block);
   }

   void visitArrayGet(ArrayGet* curr) {
     skipNonNullCast(curr->ref, curr);
     trapOnNull(curr, curr->ref);
   }

   void visitArraySet(ArraySet* curr) {
     skipNonNullCast(curr->ref, curr);
     if (trapOnNull(curr, curr->ref)) {
       return;
     }

     if (curr->value->type.isInteger()) {
       if (auto field = GCTypeUtils::getField(curr->ref->type)) {
         optimizeStoredValue(curr->value, field->getByteSize());
       }
     }
   }

   void visitArrayLen(ArrayLen* curr) {
     skipNonNullCast(curr->ref, curr);
     trapOnNull(curr, curr->ref);
   }

   void visitArrayCopy(ArrayCopy* curr) {
     skipNonNullCast(curr->destRef, curr);
     skipNonNullCast(curr->srcRef, curr);
     trapOnNull(curr, curr->destRef) || trapOnNull(curr, curr->srcRef);
   }

   void visitRefCast(RefCast* curr) {
     // Note we must check the ref's type here and not our own, since we only
     // refinalize at the end, which means our type may not have been updated yet
     // after a change in the child.
     // TODO: we could update unreachability up the stack perhaps, or just move
     //       all patterns that can add unreachability to a pass that does so
     //       already like vacuum or dce.
     if (curr->ref->type == Type::unreachable) {
       return;
     }

     if (curr->type.isNonNullable() && trapOnNull(curr, curr->ref)) {
       return;
     }

     Builder builder(*getModule());

     // Look at all the fallthrough values to get the most precise possible type
     // of the value we are casting. local.tee, br_if, and blocks can all "lose"
     // type information, so looking at all the fallthrough values can give us a
     // more precise type than is stored in the IR.
     Type refType =
       Properties::getFallthroughType(curr->ref, getPassOptions(), *getModule());

     // As a first step, we can tighten up the cast type to be the greatest lower
     // bound of the original cast type and the type we know the cast value to
     // have. We know any less specific type either cannot appear or will fail
     // the cast anyways.
     auto glb = Type::getGreatestLowerBound(curr->type, refType);
     if (glb != Type::unreachable && glb != curr->type) {
       curr->type = glb;
       refinalize = true;
       // Call replaceCurrent() to make us re-optimize this node, as we may have
       // just unlocked further opportunities. (We could just continue down to
       // the rest, but we'd need to do more work to make sure all the local
       // state in this function is in sync which this change; it's easier to
       // just do another clean pass on this node.)
       replaceCurrent(curr);
       return;
     }

     // Given what we know about the type of the value, determine what we know
     // about the results of the cast and optimize accordingly.
     switch (GCTypeUtils::evaluateCastCheck(refType, curr->type)) {
       case GCTypeUtils::Unknown:
         // The cast may or may not succeed, so we cannot optimize.
         break;
       case GCTypeUtils::Success:
       case GCTypeUtils::SuccessOnlyIfNonNull: {
         // We know the cast will succeed, or at most requires a null check, so
         // we can try to optimize it out. Find the best-typed fallthrough value
         // to propagate.
         auto** refp = Properties::getMostRefinedFallthrough(
           &curr->ref, getPassOptions(), *getModule());
         auto* ref = *refp;
         assert(ref->type.isRef());
         if (HeapType::isSubType(ref->type.getHeapType(),
                                 curr->type.getHeapType())) {
           // We know ref's heap type matches, but the knowledge that the
           // nullabillity matches might come from somewhere else or we might not
           // know at all whether the nullability matches, so we might need to
           // emit a null check.
           bool needsNullCheck = ref->type.getNullability() == Nullable &&
                                 curr->type.getNullability() == NonNullable;
           // If the best value to propagate is the argument to the cast, we can
           // simply remove the cast (or downgrade it to a null check if
           // necessary).
           if (ref == curr->ref) {
             if (needsNullCheck) {
               replaceCurrent(builder.makeRefAs(RefAsNonNull, curr->ref));
             } else {
               replaceCurrent(ref);
             }
             return;
           }
           // Otherwise we can't just remove the cast and replace it with `ref`
           // because the intermediate expressions might have had side effects.
           // We can replace the cast with a drop followed by a direct return of
           // the value, though.
           if (ref->type.isNull()) {
             // We can materialize the resulting null value directly.
             //
             // The type must be nullable for us to do that, which it normally
             // would be, aside from the interesting corner case of
             // uninhabitable types:
             //
             //  (ref.cast func
             //    (block (result (ref nofunc))
             //      (unreachable)
             //    )
             //  )
             //
             // (ref nofunc) is a subtype of (ref func), so the cast might seem
             // to be successful, but since the input is uninhabitable we won't
             // even reach the cast. Such casts will be evaluated as
             // Unreachable, so we'll not hit this assertion.
             assert(curr->type.isNullable());
             auto nullType = curr->type.getHeapType().getBottom();
             replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
                                                 builder.makeRefNull(nullType)));
             return;
           }
           // We need to use a tee to return the value since we can't materialize
           // it directly.
           auto scratch = builder.addVar(getFunction(), ref->type);
           *refp = builder.makeLocalTee(scratch, ref, ref->type);
           Expression* get = builder.makeLocalGet(scratch, ref->type);
           if (needsNullCheck) {
             get = builder.makeRefAs(RefAsNonNull, get);
           }
           replaceCurrent(
             builder.makeSequence(builder.makeDrop(curr->ref), get));
           return;
         }
         // If we get here, then we know that the heap type of the cast input is
         // more refined than the heap type of the best available fallthrough
         // expression. The only way this can happen is if we were able to infer
         // that the input has bottom heap type because it was typed with
         // multiple, incompatible heap types in different fallthrough
         // expressions. For example:
         //
         // (ref.cast eqref
         //   (br_on_cast_fail $l anyref i31ref
         //     (br_on_cast_fail $l anyref structref
         //       ...)))
         //
         // In this case, the cast succeeds because the value must be null, so we
         // can fall through to handle that case.
         assert(Type::isSubType(refType, ref->type));
         assert(refType.getHeapType().isBottom());
       }
         [[fallthrough]];
       case GCTypeUtils::SuccessOnlyIfNull: {
         auto nullType = Type(curr->type.getHeapType().getBottom(), Nullable);
         // The cast either returns null or traps. In trapsNeverHappen mode
         // we know the result, since by assumption it will not trap.
         if (getPassOptions().trapsNeverHappen) {
           replaceCurrent(builder.makeBlock(
             {builder.makeDrop(curr->ref), builder.makeRefNull(nullType)},
             curr->type));
           return;
         }
         // Otherwise, we should have already refined the cast type to cast
         // directly to null.
         assert(curr->type == nullType);
         break;
       }
       case GCTypeUtils::Unreachable:
       case GCTypeUtils::Failure:
         // This cast cannot succeed, or it cannot even be reached, so we can
         // trap. Make sure to emit a block with the same type as us; leave
         // updating types for other passes.
         replaceCurrent(builder.makeBlock(
           {builder.makeDrop(curr->ref), builder.makeUnreachable()},
           curr->type));
         return;
     }

     // If we got past the optimizations above, it must be the case that we
     // cannot tell from the static types whether the cast will succeed or not,
     // which means we must have a proper down cast.
     assert(Type::isSubType(curr->type, curr->ref->type));

     if (auto* child = curr->ref->dynCast<RefCast>()) {
       // Repeated casts can be removed, leaving just the most demanding of them.
       // Since we know the current cast is a downcast, it must be strictly
       // stronger than its child cast and we can remove the child cast entirely.
       curr->ref = child->ref;
       return;
     }

     // Similarly, ref.cast can be combined with ref.as_non_null.
     //
     //   (ref.cast null (ref.as_non_null ..))
     // =>
     //   (ref.cast ..)
     //
     if (auto* as = curr->ref->dynCast<RefAs>(); as && as->op == RefAsNonNull) {
       curr->ref = as->value;
       curr->type = Type(curr->type.getHeapType(), NonNullable);
     }
   }

   void visitRefTest(RefTest* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     Builder builder(*getModule());

     // Parallel to the code in visitRefCast: we look not just at the final type
     // we are given, but at fallthrough values as well.
     Type refType =
       Properties::getFallthroughType(curr->ref, getPassOptions(), *getModule());

     // Improve the cast type as much as we can without changing the results.
     auto glb = Type::getGreatestLowerBound(curr->castType, refType);
     if (glb != Type::unreachable && glb != curr->castType) {
       curr->castType = glb;
     }

     switch (GCTypeUtils::evaluateCastCheck(refType, curr->castType)) {
       case GCTypeUtils::Unknown:
         break;
       case GCTypeUtils::Success:
         replaceCurrent(builder.makeBlock(
           {builder.makeDrop(curr->ref), builder.makeConst(int32_t(1))}));
         return;
       case GCTypeUtils::Unreachable:
         // Make sure to emit a block with the same type as us, to avoid other
         // code in this pass needing to handle unexpected unreachable code
         // (which is only properly propagated at the end of this pass when we
         // refinalize).
         replaceCurrent(builder.makeBlock(
           {builder.makeDrop(curr->ref), builder.makeUnreachable()}, Type::i32));
         return;
       case GCTypeUtils::Failure:
         replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
                                             builder.makeConst(int32_t(0))));
         return;
       case GCTypeUtils::SuccessOnlyIfNull:
         replaceCurrent(builder.makeRefIsNull(curr->ref));
         return;
       case GCTypeUtils::SuccessOnlyIfNonNull:
         // This adds an EqZ, but code size does not regress since ref.test
         // also encodes a type, and ref.is_null does not. The EqZ may also add
         // some work, but a cast is likely more expensive than a null check +
         // a fast int operation.
         replaceCurrent(
           builder.makeUnary(EqZInt32, builder.makeRefIsNull(curr->ref)));
         return;
     }
   }

   void visitRefIsNull(RefIsNull* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     // Optimizing RefIsNull is not that obvious, since even if we know the
     // result evaluates to 0 or 1 then the replacement may not actually save
     // code size, since RefIsNull is a single byte while adding a Const of 0
     // would be two bytes. Other factors are that we can remove the input and
     // the added drop on it if it has no side effects, and that replacing with a
     // constant may allow further optimizations later. For now, replace with a
     // constant, but this warrants more investigation. TODO

     Builder builder(*getModule());
     if (curr->value->type.isNonNullable()) {
       replaceCurrent(
         builder.makeSequence(builder.makeDrop(curr->value),
                              builder.makeConst(Literal::makeZero(Type::i32))));
     } else {
       skipCast(curr->value);
     }
   }

   void visitRefAs(RefAs* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     if (curr->op == ExternConvertAny || curr->op == AnyConvertExtern) {
       // These pass nulls through, and we can reorder them with null traps:
       //
       //  (any.convert_extern/extern.convert_any (ref.as_non_null.. ))
       // =>
       //  (ref.as_non_null (any.convert_extern/extern.convert_any ..))
       //
       // By moving the RefAsNonNull outside, it may reach a position where it
       // can be optimized (e.g. if the parent traps anyhow). And,
       // ExternConvertAny/AnyConvertExtern cannot be folded with anything, so
       // there is no harm to moving them inside.
       if (auto* refAsChild = curr->value->dynCast<RefAs>()) {
         if (refAsChild->op == RefAsNonNull) {
           // Reorder and fix up the types.
           curr->value = refAsChild->value;
           curr->finalize();
           refAsChild->value = curr;
           refAsChild->finalize();
           replaceCurrent(refAsChild);
           return;
         }

         // We can optimize away externalizations of internalizations and vice
         // versa.
         if ((curr->op == ExternConvertAny &&
              refAsChild->op == AnyConvertExtern) ||
             (curr->op == AnyConvertExtern &&
              refAsChild->op == ExternConvertAny)) {
           replaceCurrent(refAsChild->value);
           return;
         }
       }
       return;
     }

     assert(curr->op == RefAsNonNull);
     if (trapOnNull(curr, curr->value)) {
       return;
     }
     skipNonNullCast(curr->value, curr);
     if (!curr->value->type.isNullable()) {
       replaceCurrent(curr->value);
       return;
     }

     // As we do in visitRefCast, ref.cast can be combined with ref.as_non_null.
     // This code handles the case where the ref.as is on the outside:
     //
     //   (ref.as_non_null (ref.cast null ..))
     // =>
     //   (ref.cast ..)
     //
     if (auto* cast = curr->value->dynCast<RefCast>()) {
       // The cast cannot be non-nullable, or we would have handled this right
       // above by just removing the ref.as, since it would not be needed.
       assert(!cast->type.isNonNullable());
       cast->type = Type(cast->type.getHeapType(), NonNullable);
       replaceCurrent(cast);
     }
   }

   void visitTupleExtract(TupleExtract* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     if (auto* make = curr->tuple->dynCast<TupleMake>()) {
       Builder builder(*getModule());

       // Store the value of the lane we want in a tee, and return that after a
       // drop of the tuple (which might have side effects).
       auto valueType = make->type[curr->index];
       Index tempLocal = builder.addVar(getFunction(), valueType);
       make->operands[curr->index] =
         builder.makeLocalTee(tempLocal, make->operands[curr->index], valueType);
       auto* get = builder.makeLocalGet(tempLocal, valueType);
       replaceCurrent(getDroppedChildrenAndAppend(make, get));
     }
   }

   Index getMaxBitsForLocal(LocalGet* get) {
     // check what we know about the local
     return localInfo[get->index].maxBits;
   }

 private:
   // Information about our locals
   std::vector<LocalInfo> localInfo;

   // Checks if the first is a local.tee and the second a local.get of that same
   // index. This is useful in the methods right below us, as it is a common
   // pattern where two consecutive inputs are equal despite being syntactically
   // different.
   bool areMatchingTeeAndGet(Expression* left, Expression* right) {
     if (auto* set = left->dynCast<LocalSet>()) {
       if (auto* get = right->dynCast<LocalGet>()) {
         if (set->isTee() && get->index == set->index) {
           return true;
         }
       }
     }
     return false;
   }

   // Check if two consecutive inputs to an instruction are equal. As they are
   // consecutive, no code can execeute in between them, which simplies the
   // problem here (and which is the case we care about in this pass, which does
   // simple peephole optimizations - all we care about is a single instruction
   // at a time, and its inputs).
   bool areConsecutiveInputsEqual(Expression* left, Expression* right) {
     // When we look for a tee/get pair, we can consider the fallthrough values
     // for the first, as the fallthrough happens last (however, we must use
     // NoTeeBrIf as we do not want to look through the tee). We cannot do this
     // on the second, however, as there could be effects in the middle.
     // TODO: Use effects here perhaps.
     auto& passOptions = getPassOptions();
     left =
       Properties::getFallthrough(left,
                                  passOptions,
                                  *getModule(),
                                  Properties::FallthroughBehavior::NoTeeBrIf);
     if (areMatchingTeeAndGet(left, right)) {
       return true;
     }

     // Ignore extraneous things and compare them syntactically. We can also
     // look at the full fallthrough for both sides now.
     left = Properties::getFallthrough(left, passOptions, *getModule());
     auto* originalRight = right;
     right = Properties::getFallthrough(right, passOptions, *getModule());
     if (!ExpressionAnalyzer::equal(left, right)) {
       return false;
     }

     // We must also not have non-fallthrough effects that invalidate us, such as
     // this situation:
     //
     //  (local.get $x)
     //  (block
     //    (local.set $x ..)
     //    (local.get $x)
     //  )
     //
     // The fallthroughs are identical, but the set may cause us to read a
     // different value.
     if (originalRight != right) {
       // TODO: We could be more precise here and ignore right itself in
       //       originalRightEffects.
       auto originalRightEffects = effects(originalRight);
       auto rightEffects = effects(right);
       if (originalRightEffects.invalidates(rightEffects)) {
         return false;
       }
     }

     // To be equal, they must also be known to return the same result
     // deterministically.
     return !Properties::isGenerative(left);
   }

   // Similar to areConsecutiveInputsEqual() but also checks if we can remove
   // them (but we do not assume the caller will always remove them).
   bool areConsecutiveInputsEqualAndRemovable(Expression* left,
                                              Expression* right) {
     // First, check for side effects. If there are any, then we can't even
     // assume things like local.get's of the same index being identical. (It is
     // also ok to have removable side effects here, see the function
     // description.)
     auto& passOptions = getPassOptions();
     if (EffectAnalyzer(passOptions, *getModule(), left)
           .hasUnremovableSideEffects() ||
         EffectAnalyzer(passOptions, *getModule(), right)
           .hasUnremovableSideEffects()) {
       return false;
     }

     return areConsecutiveInputsEqual(left, right);
   }

   // Check if two consecutive inputs to an instruction are equal and can also be
   // folded into the first of the two (but we do not assume the caller will
   // always fold them). This is similar to areConsecutiveInputsEqualAndRemovable
   // but also identifies reads from the same local variable when the first of
   // them is a "tee" operation and the second is a get (in which case, it is
   // fine to remove the get, but not the tee).
   //
   // The inputs here must be consecutive, but it is also ok to have code with no
   // side effects at all in the middle. For example, a Const in between is ok.
   bool areConsecutiveInputsEqualAndFoldable(Expression* left,
                                             Expression* right) {
     // TODO: We could probably consider fallthrough values for left, at least
     //       (since we fold into it).
     if (areMatchingTeeAndGet(left, right)) {
       return true;
     }

     // stronger property than we need - we can not only fold
     // them but remove them entirely.
     return areConsecutiveInputsEqualAndRemovable(left, right);
   }

   // Canonicalizing the order of a symmetric binary helps us
   // write more concise pattern matching code elsewhere.
   void canonicalize(Binary* binary) {
     assert(shouldCanonicalize(binary));
     auto swap = [&]() {
       assert(canReorder(binary->left, binary->right));
       if (binary->isRelational()) {
         binary->op = reverseRelationalOp(binary->op);
       }
       std::swap(binary->left, binary->right);
     };
     auto maybeSwap = [&]() {
       if (canReorder(binary->left, binary->right)) {
         swap();
       }
     };
     // Prefer a const on the right.
     if (binary->left->is<Const>() && !binary->right->is<Const>()) {
       swap();
     }
     if (auto* c = binary->right->dynCast<Const>()) {
       // x - C   ==>   x + (-C)
       // Prefer use addition if there is a constant on the right.
       if (binary->op == Abstract::getBinary(c->type, Abstract::Sub)) {
         c->value = c->value.neg();
         binary->op = Abstract::getBinary(c->type, Abstract::Add);
         return;
       }
       // Prefer to compare to 0 instead of to -1 or 1.
       // (signed)x > -1   ==>   x >= 0
       if (binary->op == Abstract::getBinary(c->type, Abstract::GtS) &&
           c->value.getInteger() == -1LL) {
         binary->op = Abstract::getBinary(c->type, Abstract::GeS);
         c->value = Literal::makeZero(c->type);
         return;
       }
       // (signed)x <= -1   ==>   x < 0
       if (binary->op == Abstract::getBinary(c->type, Abstract::LeS) &&
           c->value.getInteger() == -1LL) {
         binary->op = Abstract::getBinary(c->type, Abstract::LtS);
         c->value = Literal::makeZero(c->type);
         return;
       }
       // (signed)x < 1   ==>   x <= 0
       if (binary->op == Abstract::getBinary(c->type, Abstract::LtS) &&
           c->value.getInteger() == 1LL) {
         binary->op = Abstract::getBinary(c->type, Abstract::LeS);
         c->value = Literal::makeZero(c->type);
         return;
       }
       // (signed)x >= 1   ==>   x > 0
       if (binary->op == Abstract::getBinary(c->type, Abstract::GeS) &&
           c->value.getInteger() == 1LL) {
         binary->op = Abstract::getBinary(c->type, Abstract::GtS);
         c->value = Literal::makeZero(c->type);
         return;
       }
       // (unsigned)x < 1   ==>   x == 0
       if (binary->op == Abstract::getBinary(c->type, Abstract::LtU) &&
           c->value.getInteger() == 1LL) {
         binary->op = Abstract::getBinary(c->type, Abstract::Eq);
         c->value = Literal::makeZero(c->type);
         return;
       }
       // (unsigned)x >= 1   ==>   x != 0
       if (binary->op == Abstract::getBinary(c->type, Abstract::GeU) &&
           c->value.getInteger() == 1LL) {
         binary->op = Abstract::getBinary(c->type, Abstract::Ne);
         c->value = Literal::makeZero(c->type);
         return;
       }
       // Prefer compare to signed min (s_min) instead of s_min + 1.
       // (signed)x < s_min + 1   ==>   x == s_min
       if (binary->op == LtSInt32 && c->value.geti32() == INT32_MIN + 1) {
         binary->op = EqInt32;
         c->value = Literal::makeSignedMin(Type::i32);
         return;
       }
       if (binary->op == LtSInt64 && c->value.geti64() == INT64_MIN + 1) {
         binary->op = EqInt64;
         c->value = Literal::makeSignedMin(Type::i64);
         return;
       }
       // (signed)x >= s_min + 1   ==>   x != s_min
       if (binary->op == GeSInt32 && c->value.geti32() == INT32_MIN + 1) {
         binary->op = NeInt32;
         c->value = Literal::makeSignedMin(Type::i32);
         return;
       }
       if (binary->op == GeSInt64 && c->value.geti64() == INT64_MIN + 1) {
         binary->op = NeInt64;
         c->value = Literal::makeSignedMin(Type::i64);
         return;
       }
       // Prefer compare to signed max (s_max) instead of s_max - 1.
       // (signed)x > s_max - 1   ==>   x == s_max
       if (binary->op == GtSInt32 && c->value.geti32() == INT32_MAX - 1) {
         binary->op = EqInt32;
         c->value = Literal::makeSignedMax(Type::i32);
         return;
       }
       if (binary->op == GtSInt64 && c->value.geti64() == INT64_MAX - 1) {
         binary->op = EqInt64;
         c->value = Literal::makeSignedMax(Type::i64);
         return;
       }
       // (signed)x <= s_max - 1   ==>   x != s_max
       if (binary->op == LeSInt32 && c->value.geti32() == INT32_MAX - 1) {
         binary->op = NeInt32;
         c->value = Literal::makeSignedMax(Type::i32);
         return;
       }
       if (binary->op == LeSInt64 && c->value.geti64() == INT64_MAX - 1) {
         binary->op = NeInt64;
         c->value = Literal::makeSignedMax(Type::i64);
         return;
       }
       // Prefer compare to unsigned max (u_max) instead of u_max - 1.
       // (unsigned)x <= u_max - 1   ==>   x != u_max
       if (binary->op == Abstract::getBinary(c->type, Abstract::LeU) &&
           c->value.getInteger() == (int64_t)(UINT64_MAX - 1)) {
         binary->op = Abstract::getBinary(c->type, Abstract::Ne);
         c->value = Literal::makeUnsignedMax(c->type);
         return;
       }
       // (unsigned)x > u_max - 1   ==>   x == u_max
       if (binary->op == Abstract::getBinary(c->type, Abstract::GtU) &&
           c->value.getInteger() == (int64_t)(UINT64_MAX - 1)) {
         binary->op = Abstract::getBinary(c->type, Abstract::Eq);
         c->value = Literal::makeUnsignedMax(c->type);
         return;
       }
       return;
     }
     // Prefer a get on the right.
     if (binary->left->is<LocalGet>() && !binary->right->is<LocalGet>()) {
       return maybeSwap();
     }
     // Sort by the node id type, if different.
     if (binary->left->_id != binary->right->_id) {
       if (binary->left->_id > binary->right->_id) {
         return maybeSwap();
       }
       return;
     }
     // If the children have the same node id, we have to go deeper.
     if (auto* left = binary->left->dynCast<Unary>()) {
       auto* right = binary->right->cast<Unary>();
       if (left->op > right->op) {
         return maybeSwap();
       }
     }
     if (auto* left = binary->left->dynCast<Binary>()) {
       auto* right = binary->right->cast<Binary>();
       if (left->op > right->op) {
         return maybeSwap();
       }
     }
     if (auto* left = binary->left->dynCast<LocalGet>()) {
       auto* right = binary->right->cast<LocalGet>();
       if (left->index > right->index) {
         return maybeSwap();
       }
     }
   }

   // Optimize given that the expression is flowing into a boolean context
   Expression* optimizeBoolean(Expression* boolean) {
     // TODO use a general getFallthroughs
     if (auto* unary = boolean->dynCast<Unary>()) {
       if (unary) {
         if (unary->op == EqZInt32) {
           auto* unary2 = unary->value->dynCast<Unary>();
           if (unary2 && unary2->op == EqZInt32) {
             // double eqz
             return unary2->value;
           }
           if (auto* binary = unary->value->dynCast<Binary>()) {
             // !(x <=> y)   ==>   x <!=> y
             auto op = invertBinaryOp(binary->op);
             if (op != InvalidBinary) {
               binary->op = op;
               return binary;
             }
           }
         }
       }
     } else if (auto* binary = boolean->dynCast<Binary>()) {
       if (binary->op == SubInt32) {
         if (auto* c = binary->left->dynCast<Const>()) {
           if (c->value.geti32() == 0) {
             // bool(0 - x)   ==>   bool(x)
             return binary->right;
           }
         }
       } else if (binary->op == OrInt32) {
         // an or flowing into a boolean context can consider each input as
         // boolean
         binary->left = optimizeBoolean(binary->left);
         binary->right = optimizeBoolean(binary->right);
       } else if (binary->op == NeInt32) {
         if (auto* c = binary->right->dynCast<Const>()) {
           // x != 0 is just x if it's used as a bool
           if (c->value.geti32() == 0) {
             return binary->left;
           }
           // TODO: Perhaps use it for separate final pass???
           // x != -1   ==>    x ^ -1
           // if (num->value.geti32() == -1) {
           //   binary->op = XorInt32;
           //   return binary;
           // }
         }
       } else if (binary->op == RemSInt32) {
         // bool(i32(x) % C_pot)  ==>  bool(x & (C_pot - 1))
         // bool(i32(x) % min_s)  ==>  bool(x & max_s)
         if (auto* c = binary->right->dynCast<Const>()) {
           if (c->value.isSignedMin() ||
               Bits::isPowerOf2(c->value.abs().geti32())) {
             binary->op = AndInt32;
             if (c->value.isSignedMin()) {
               c->value = Literal::makeSignedMax(Type::i32);
             } else {
               c->value = c->value.abs().sub(Literal::makeOne(Type::i32));
             }
             return binary;
           }
         }
       }
       if (auto* ext = Properties::getSignExtValue(binary)) {
         // use a cheaper zero-extent, we just care about the boolean value
         // anyhow
         return makeZeroExt(ext, Properties::getSignExtBits(binary));
       }
     } else if (auto* block = boolean->dynCast<Block>()) {
       if (block->type == Type::i32 && block->list.size() > 0) {
         block->list.back() = optimizeBoolean(block->list.back());
       }
     } else if (auto* iff = boolean->dynCast<If>()) {
       if (iff->type == Type::i32) {
         iff->ifTrue = optimizeBoolean(iff->ifTrue);
         iff->ifFalse = optimizeBoolean(iff->ifFalse);
       }
     } else if (auto* select = boolean->dynCast<Select>()) {
       select->ifTrue = optimizeBoolean(select->ifTrue);
       select->ifFalse = optimizeBoolean(select->ifFalse);
     } else if (auto* tryy = boolean->dynCast<Try>()) {
       if (tryy->type == Type::i32) {
         tryy->body = optimizeBoolean(tryy->body);
         for (Index i = 0; i < tryy->catchBodies.size(); i++) {
           tryy->catchBodies[i] = optimizeBoolean(tryy->catchBodies[i]);
         }
       }
     }
     // TODO: recurse into br values?
     return boolean;
   }

   Expression* optimizeSelect(Select* curr) {
     using namespace Match;
     using namespace Abstract;
     Builder builder(*getModule());
     curr->condition = optimizeBoolean(curr->condition);
     {
       // Constant condition, we can just pick the correct side (barring side
       // effects)
       Expression *ifTrue, *ifFalse;
       if (matches(curr, select(pure(&ifTrue), any(&ifFalse), i32(0)))) {
         return ifFalse;
       }
       if (matches(curr, select(any(&ifTrue), any(&ifFalse), i32(0)))) {
         return builder.makeSequence(builder.makeDrop(ifTrue), ifFalse);
       }
       int32_t cond;
       if (matches(curr, select(any(&ifTrue), pure(&ifFalse), i32(&cond)))) {
         // The condition must be non-zero because a zero would have matched one
         // of the previous patterns.
         assert(cond != 0);
         return ifTrue;
       }
       // Don't bother when `ifFalse` isn't pure - we would need to reverse the
       // order using a temp local, which would be bad
     }
     {
       // TODO: Remove this after landing SCCP pass. See: #4161

       // i32(x) ? i32(x) : 0  ==>  x
       Expression *x, *y;
       if (matches(curr, select(any(&x), i32(0), any(&y))) &&
           areConsecutiveInputsEqualAndFoldable(x, y)) {
         return curr->ifTrue;
       }
       // i32(x) ? 0 : i32(x)  ==>  { x, 0 }
       if (matches(curr, select(i32(0), any(&x), any(&y))) &&
           areConsecutiveInputsEqualAndFoldable(x, y)) {
         return builder.makeSequence(builder.makeDrop(x), curr->ifTrue);
       }

       // i64(x) == 0 ? 0 : i64(x)  ==>  x
       // i64(x) != 0 ? i64(x) : 0  ==>  x
       if ((matches(curr, select(i64(0), any(&x), unary(EqZInt64, any(&y)))) ||
            matches(
              curr,
              select(any(&x), i64(0), binary(NeInt64, any(&y), i64(0))))) &&
           areConsecutiveInputsEqualAndFoldable(x, y)) {
         return curr->condition->is<Unary>() ? curr->ifFalse : curr->ifTrue;
       }

       // i64(x) == 0 ? i64(x) : 0  ==>  { x, 0 }
       // i64(x) != 0 ? 0 : i64(x)  ==>  { x, 0 }
       if ((matches(curr, select(any(&x), i64(0), unary(EqZInt64, any(&y)))) ||
            matches(
              curr,
              select(i64(0), any(&x), binary(NeInt64, any(&y), i64(0))))) &&
           areConsecutiveInputsEqualAndFoldable(x, y)) {
         return builder.makeSequence(
           builder.makeDrop(x),
           curr->condition->is<Unary>() ? curr->ifFalse : curr->ifTrue);
       }
     }
     {
       // Simplify selects between 0 and 1
       Expression* c;
       bool reversed = matches(curr, select(ival(0), ival(1), any(&c)));
       if (reversed || matches(curr, select(ival(1), ival(0), any(&c)))) {
         if (reversed) {
           c = optimizeBoolean(builder.makeUnary(EqZInt32, c));
         }
         if (!Properties::emitsBoolean(c)) {
           // cond ? 1 : 0 ==> !!cond
           c = builder.makeUnary(EqZInt32, builder.makeUnary(EqZInt32, c));
         }
         return curr->type == Type::i64 ? builder.makeUnary(ExtendUInt32, c) : c;
       }
     }
     // Flip the arms if doing so might help later optimizations here.
     if (auto* binary = curr->condition->dynCast<Binary>()) {
       auto inv = invertBinaryOp(binary->op);
       if (inv != InvalidBinary) {
         // For invertible binary operations, we prefer to have non-zero values
         // in the ifTrue, and zero values in the ifFalse, due to the
         // optimization right after us. Even if this does not help there, it is
         // a nice canonicalization. (To ensure convergence - that we don't keep
         // doing work each time we get here - do nothing if both are zero, or
         // if both are nonzero.)
         Const* c;
         if ((matches(curr->ifTrue, ival(0)) &&
              !matches(curr->ifFalse, ival(0))) ||
             (!matches(curr->ifTrue, ival()) &&
              matches(curr->ifFalse, ival(&c)) && !c->value.isZero())) {
           binary->op = inv;
           std::swap(curr->ifTrue, curr->ifFalse);
         }
       }
     }
     if (curr->type == Type::i32 &&
         Bits::getMaxBits(curr->condition, this) <= 1 &&
         Bits::getMaxBits(curr->ifTrue, this) <= 1 &&
         Bits::getMaxBits(curr->ifFalse, this) <= 1) {
       // The condition and both arms are i32 booleans, which allows us to do
       // boolean optimizations.
       Expression* x;
       Expression* y;

       // x ? y : 0   ==>   x & y
       if (matches(curr, select(any(&y), ival(0), any(&x)))) {
         return builder.makeBinary(AndInt32, y, x);
       }

       // x ? 1 : y   ==>   x | y
       if (matches(curr, select(ival(1), any(&y), any(&x)))) {
         return builder.makeBinary(OrInt32, y, x);
       }
     }
     {
       // Simplify x < 0 ? -1 : 1 or x >= 0 ? 1 : -1 to
       // i32(x) >> 31 | 1
       // i64(x) >> 63 | 1
       Binary* bin;
       if (matches(
             curr,
             select(ival(-1), ival(1), binary(&bin, LtS, any(), ival(0)))) ||
           matches(
             curr,
             select(ival(1), ival(-1), binary(&bin, GeS, any(), ival(0))))) {
         auto c = bin->right->cast<Const>();
         auto type = curr->ifTrue->type;
         if (type == c->type) {
           bin->type = type;
           bin->op = Abstract::getBinary(type, ShrS);
           c->value = Literal::makeFromInt32(type.getByteSize() * 8 - 1, type);
           curr->ifTrue->cast<Const>()->value = Literal::makeOne(type);
           return builder.makeBinary(
             Abstract::getBinary(type, Or), bin, curr->ifTrue);
         }
       }
     }
     {
       // Flip select to remove eqz if we can reorder
       Select* s;
       Expression *ifTrue, *ifFalse, *c;
       if (matches(
             curr,
             select(
               &s, any(&ifTrue), any(&ifFalse), unary(EqZInt32, any(&c)))) &&
           canReorder(ifTrue, ifFalse)) {
         s->ifTrue = ifFalse;
         s->ifFalse = ifTrue;
         s->condition = c;
         return s;
       }
     }
     {
       // Sides are identical, fold
       Expression *ifTrue, *ifFalse, *c;
       if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) &&
           ExpressionAnalyzer::equal(ifTrue, ifFalse)) {
         auto value = effects(ifTrue);
         if (value.hasSideEffects()) {
           // At best we don't need the condition, but need to execute the
           // value twice. a block is larger than a select by 2 bytes, and we
           // must drop one value, so 3, while we save the condition, so it's
           // not clear this is worth it, TODO
         } else {
           // The value has no side effects, so we can replace ourselves with one
           // of the two identical values in the arms.
           auto condition = effects(c);
           if (!condition.hasSideEffects()) {
             return ifTrue;
           } else {
             // The condition is last, so we need a new local, and it may be a
             // bad idea to use a block like we do for an if. Do it only if we
             // can reorder
             if (!condition.invalidates(value)) {
               return builder.makeSequence(builder.makeDrop(c), ifTrue);
             }
           }
         }
       }
     }
     return nullptr;
   }

   // find added constants in an expression tree, including multiplied/shifted,
   // and combine them note that we ignore division/shift-right, as rounding
   // makes this nonlinear, so not a valid opt
   Expression* optimizeAddedConstants(Binary* binary) {
     assert(binary->type.isInteger());

     uint64_t constant = 0;
     std::vector<Const*> constants;

     struct SeekState {
       Expression* curr;
       uint64_t mul;
       SeekState(Expression* curr, uint64_t mul) : curr(curr), mul(mul) {}
     };
     std::vector<SeekState> seekStack;
     seekStack.emplace_back(binary, 1);
     while (!seekStack.empty()) {
       auto state = seekStack.back();
       seekStack.pop_back();
       auto curr = state.curr;
       auto mul = state.mul;
       if (auto* c = curr->dynCast<Const>()) {
         uint64_t value = c->value.getInteger();
         if (value != 0ULL) {
           constant += value * mul;
           constants.push_back(c);
         }
         continue;
       } else if (auto* binary = curr->dynCast<Binary>()) {
         if (binary->op == Abstract::getBinary(binary->type, Abstract::Add)) {
           seekStack.emplace_back(binary->right, mul);
           seekStack.emplace_back(binary->left, mul);
           continue;
         } else if (binary->op ==
                    Abstract::getBinary(binary->type, Abstract::Sub)) {
           // if the left is a zero, ignore it, it's how we negate ints
           auto* left = binary->left->dynCast<Const>();
           seekStack.emplace_back(binary->right, -mul);
           if (!left || !left->value.isZero()) {
             seekStack.emplace_back(binary->left, mul);
           }
           continue;
         } else if (binary->op ==
                    Abstract::getBinary(binary->type, Abstract::Shl)) {
           if (auto* c = binary->right->dynCast<Const>()) {
             seekStack.emplace_back(binary->left,
                                    mul << Bits::getEffectiveShifts(c));
             continue;
           }
         } else if (binary->op ==
                    Abstract::getBinary(binary->type, Abstract::Mul)) {
           if (auto* c = binary->left->dynCast<Const>()) {
             seekStack.emplace_back(binary->right,
                                    mul * (uint64_t)c->value.getInteger());
             continue;
           } else if (auto* c = binary->right->dynCast<Const>()) {
             seekStack.emplace_back(binary->left,
                                    mul * (uint64_t)c->value.getInteger());
             continue;
           }
         }
       }
     };
     // find all factors
     if (constants.size() <= 1) {
       // nothing much to do, except for the trivial case of adding/subbing a
       // zero
       if (auto* c = binary->right->dynCast<Const>()) {
         if (c->value.isZero()) {
           return binary->left;
         }
       }
       return nullptr;
     }
     // wipe out all constants, we'll replace with a single added one
     for (auto* c : constants) {
       c->value = Literal::makeZero(c->type);
     }
     // remove added/subbed zeros
     struct ZeroRemover : public PostWalker<ZeroRemover> {
       // TODO: we could save the binarys and costs we drop, and reuse them later

       PassOptions& passOptions;

       ZeroRemover(PassOptions& passOptions) : passOptions(passOptions) {}

       void visitBinary(Binary* curr) {
         if (!curr->type.isInteger()) {
           return;
         }
         auto type = curr->type;
         auto* left = curr->left->dynCast<Const>();
         auto* right = curr->right->dynCast<Const>();
         // Canonicalization prefers an add instead of a subtract wherever
         // possible. That prevents a subtracted constant on the right,
         // as it would be added. And for a zero on the left, it can't be
         // removed (it is how we negate ints).
         if (curr->op == Abstract::getBinary(type, Abstract::Add)) {
           if (left && left->value.isZero()) {
             replaceCurrent(curr->right);
             return;
           }
           if (right && right->value.isZero()) {
             replaceCurrent(curr->left);
             return;
           }
         } else if (curr->op == Abstract::getBinary(type, Abstract::Shl)) {
           // shifting a 0 is a 0, or anything by 0 has no effect, all unless the
           // shift has side effects
           if (((left && left->value.isZero()) ||
                (right && Bits::getEffectiveShifts(right) == 0)) &&
               !EffectAnalyzer(passOptions, *getModule(), curr->right)
                  .hasSideEffects()) {
             replaceCurrent(curr->left);
             return;
           }
         } else if (curr->op == Abstract::getBinary(type, Abstract::Mul)) {
           // multiplying by zero is a zero, unless the other side has side
           // effects
           if (left && left->value.isZero() &&
               !EffectAnalyzer(passOptions, *getModule(), curr->right)
                  .hasSideEffects()) {
             replaceCurrent(left);
             return;
           }
           if (right && right->value.isZero() &&
               !EffectAnalyzer(passOptions, *getModule(), curr->left)
                  .hasSideEffects()) {
             replaceCurrent(right);
             return;
           }
         }
       }
     };
     // Noting the type here not only simplifies the code below, but is also
     // necessary to avoid an error: if we look at walked->type then it may
     // actually differ from the original type, say if the walk ended up turning
     // |binary| into a simpler unreachable expression.
     auto type = binary->type;
     Expression* walked = binary;
     ZeroRemover remover(getPassOptions());
     remover.setModule(getModule());
     remover.walk(walked);
     if (constant == 0ULL) {
       return walked; // nothing more to do
     }
     // Add the total constant value we computed to the value remaining here.
     // Note that if the value is 32 bits then |makeFromInt64| will wrap to 32
     // bits for us; as all the operations before us and the add below us are
     // adds and subtracts, any overflow is not a problem.
     auto toAdd = Literal::makeFromInt64(constant, type);
     if (auto* c = walked->dynCast<Const>()) {
       // This is a constant, so just add it immediately (we could also leave
       // this for Precompute, in principle).
       c->value = c->value.add(toAdd);
       return c;
     }
     Builder builder(*getModule());
     return builder.makeBinary(Abstract::getBinary(type, Abstract::Add),
                               walked,
                               builder.makeConst(toAdd));
   }

   // Given an i64.wrap operation, see if we can remove it. If all the things
   // being operated on behave the same with or without wrapping, then we don't
   // need to go to 64 bits at all, e.g.:
   //
   //  int32_t(int64_t(x))               => x                 (extend, then wrap)
   //  int32_t(int64_t(x) + int64_t(10)) => x + int32_t(10)            (also add)
   //
   Expression* optimizeWrappedResult(Unary* wrap) {
     assert(wrap->op == WrapInt64);

     // Core processing logic. This goes through the children, in one of two
     // modes:
     //  * Scan: Find if there is anything we can't handle. Sets |canOptimize|
     //    with what it finds.
     //  * Optimize: Given we can handle everything, update things.
     enum Mode { Scan, Optimize };
     bool canOptimize = true;
     auto processChildren = [&](Mode mode) {
       // Use a simple stack as we go through the children. We use ** as we need
       // to replace children for some optimizations.
       SmallVector<Expression**, 2> stack;
       stack.emplace_back(&wrap->value);

       while (!stack.empty() && canOptimize) {
         auto* currp = stack.back();
         stack.pop_back();
         auto* curr = *currp;
         if (curr->type == Type::unreachable) {
           // Leave unreachability for other passes.
           canOptimize = false;
           return;
         } else if (auto* c = curr->dynCast<Const>()) {
           // A i64 const can be handled by just turning it into an i32.
           if (mode == Optimize) {
             c->value = Literal(int32_t(c->value.getInteger()));
             c->type = Type::i32;
           }
         } else if (auto* unary = curr->dynCast<Unary>()) {
           switch (unary->op) {
             case ExtendSInt32:
             case ExtendUInt32: {
               // Note that there is nothing to push to the stack here: the child
               // is 32-bit already, so we can stop looking. We just need to skip
               // the extend operation.
               if (mode == Optimize) {
                 *currp = unary->value;
               }
               break;
             }
             default: {
               // TODO: handle more cases here and below,
               //       https://github.com/WebAssembly/binaryen/issues/5004
               canOptimize = false;
               return;
             }
           }
         } else if (auto* binary = curr->dynCast<Binary>()) {
           // Turn the binary into a 32-bit one, if we can.
           switch (binary->op) {
             case AddInt64:
             case SubInt64:
             case MulInt64: {
               // We can optimize these.
               break;
             }
             default: {
               canOptimize = false;
               return;
             }
           }
           if (mode == Optimize) {
             switch (binary->op) {
               case AddInt64: {
                 binary->op = AddInt32;
                 break;
               }
               case SubInt64: {
                 binary->op = SubInt32;
                 break;
               }
               case MulInt64: {
                 binary->op = MulInt32;
                 break;
               }
               default: {
                 WASM_UNREACHABLE("bad op");
               }
             }
             // All things we can optimize change the type to i32.
             binary->type = Type::i32;
           }
           stack.push_back(&binary->left);
           stack.push_back(&binary->right);
         } else {
           // Anything else makes us give up.
           canOptimize = false;
           return;
         }
       }
     };

     processChildren(Scan);
     if (!canOptimize) {
       return nullptr;
     }

     // Optimize, and return the optimized results (in which we no longer need
     // the wrap operation itself).
     processChildren(Optimize);
     return wrap->value;
   }

   //   expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2,
   //   and expensive | cheap     can be turned into cheap     ? 1 : expensive,
   // so that we can avoid one expensive computation, if it has no side effects.
   Expression* conditionalizeExpensiveOnBitwise(Binary* binary) {
     // this operation can increase code size, so don't always do it
     auto& options = getPassRunner()->options;
     if (options.optimizeLevel < 2 || options.shrinkLevel > 0) {
       return nullptr;
     }
     const auto MIN_COST = 7;
     assert(binary->op == AndInt32 || binary->op == OrInt32);
     if (binary->right->is<Const>()) {
       return nullptr; // trivial
     }
     // bitwise logical operator on two non-numerical values, check if they are
     // boolean
     auto* left = binary->left;
     auto* right = binary->right;
     if (!Properties::emitsBoolean(left) || !Properties::emitsBoolean(right)) {
       return nullptr;
     }
     auto leftEffects = effects(left);
     auto rightEffects = effects(right);
     auto leftHasSideEffects = leftEffects.hasSideEffects();
     auto rightHasSideEffects = rightEffects.hasSideEffects();
     if (leftHasSideEffects && rightHasSideEffects) {
       return nullptr; // both must execute
     }
     // canonicalize with side effects, if any, happening on the left
     if (rightHasSideEffects) {
       if (CostAnalyzer(left).cost < MIN_COST) {
         return nullptr; // avoidable code is too cheap
       }
       if (leftEffects.invalidates(rightEffects)) {
         return nullptr; // cannot reorder
       }
       std::swap(left, right);
     } else if (leftHasSideEffects) {
       if (CostAnalyzer(right).cost < MIN_COST) {
         return nullptr; // avoidable code is too cheap
       }
     } else {
       // no side effects, reorder based on cost estimation
       auto leftCost = CostAnalyzer(left).cost;
       auto rightCost = CostAnalyzer(right).cost;
       if (std::max(leftCost, rightCost) < MIN_COST) {
         return nullptr; // avoidable code is too cheap
       }
       // canonicalize with expensive code on the right
       if (leftCost > rightCost) {
         std::swap(left, right);
       }
     }
     // worth it! perform conditionalization
     Builder builder(*getModule());
     if (binary->op == OrInt32) {
       return builder.makeIf(
         left, builder.makeConst(Literal(int32_t(1))), right);
     } else { // &
       return builder.makeIf(
         left, right, builder.makeConst(Literal(int32_t(0))));
     }
   }

   // We can combine `and` operations, e.g.
   //   (x == 0) & (y == 0)   ==>    (x | y) == 0
   Expression* combineAnd(Binary* curr) {
     assert(curr->op == AndInt32);

     using namespace Abstract;
     using namespace Match;

     {
       // (i32(x) == 0) & (i32(y) == 0)   ==>   i32(x | y) == 0
       // (i64(x) == 0) & (i64(y) == 0)   ==>   i64(x | y) == 0
       Expression *x, *y;
       if (matches(curr->left, unary(EqZ, any(&x))) &&
           matches(curr->right, unary(EqZ, any(&y))) && x->type == y->type) {
         auto* inner = curr->left->cast<Unary>();
         inner->value =
           Builder(*getModule()).makeBinary(getBinary(x->type, Or), x, y);
         return inner;
       }
     }
     {
       // Binary operations that inverse a bitwise AND can be
       // reordered. If F(x) = binary(x, c), and F(x) preserves AND,
       // that is,
       //
       //   F(x) & F(y) == F(x | y)
       //
       // Then also
       //
       //   binary(x, c) & binary(y, c)  =>  binary(x | y, c)
       Binary *bx, *by;
       Expression *x, *y;
       Const *cx, *cy;
       if (matches(curr->left, binary(&bx, any(&x), ival(&cx))) &&
           matches(curr->right, binary(&by, any(&y), ival(&cy))) &&
           bx->op == by->op && x->type == y->type && cx->value == cy->value &&
           inversesAnd(bx)) {
         by->op = getBinary(x->type, Or);
         by->type = x->type;
         by->left = x;
         by->right = y;
         bx->left = by;
         return bx;
       }
     }
     {
       // Binary operations that preserve a bitwise AND can be
       // reordered. If F(x) = binary(x, c), and F(x) preserves AND,
       // that is,
       //
       //   F(x) & F(y) == F(x & y)
       //
       // Then also
       //
       //   binary(x, c) & binary(y, c)  =>  binary(x & y, c)
       Binary *bx, *by;
       Expression *x, *y;
       Const *cx, *cy;
       if (matches(curr->left, binary(&bx, any(&x), ival(&cx))) &&
           matches(curr->right, binary(&by, any(&y), ival(&cy))) &&
           bx->op == by->op && x->type == y->type && cx->value == cy->value &&
           preserveAnd(bx)) {
         by->op = getBinary(x->type, And);
         by->type = x->type;
         by->left = x;
         by->right = y;
         bx->left = by;
         return bx;
       }
     }
     return nullptr;
   }

   // We can combine `or` operations, e.g.
   //   (x > y)  | (x == y)    ==>    x >= y
   //   (x != 0) | (y != 0)    ==>    (x | y) != 0
   Expression* combineOr(Binary* curr) {
     assert(curr->op == OrInt32);

     using namespace Abstract;
     using namespace Match;

     if (auto* left = curr->left->dynCast<Binary>()) {
       if (auto* right = curr->right->dynCast<Binary>()) {
         if (left->op != right->op &&
             ExpressionAnalyzer::equal(left->left, right->left) &&
             ExpressionAnalyzer::equal(left->right, right->right) &&
             !effects(left->left).hasSideEffects() &&
             !effects(left->right).hasSideEffects()) {
           switch (left->op) {
             //   (x > y) | (x == y)    ==>    x >= y
             case EqInt32: {
               if (right->op == GtSInt32) {
                 left->op = GeSInt32;
                 return left;
               }
               break;
             }
             default: {
             }
           }
         }
       }
     }
     {
       // Binary operations that inverses a bitwise OR to AND.
       // If F(x) = binary(x, c), and F(x) inverses OR,
       // that is,
       //
       //   F(x) | F(y) == F(x & y)
       //
       // Then also
       //
       //   binary(x, c) | binary(y, c)  =>  binary(x & y, c)
       Binary *bx, *by;
       Expression *x, *y;
       Const *cx, *cy;
       if (matches(curr->left, binary(&bx, any(&x), ival(&cx))) &&
           matches(curr->right, binary(&by, any(&y), ival(&cy))) &&
           bx->op == by->op && x->type == y->type && cx->value == cy->value &&
           inversesOr(bx)) {
         by->op = getBinary(x->type, And);
         by->type = x->type;
         by->left = x;
         by->right = y;
         bx->left = by;
         return bx;
       }
     }
     {
       // Binary operations that preserve a bitwise OR can be
       // reordered. If F(x) = binary(x, c), and F(x) preserves OR,
       // that is,
       //
       //   F(x) | F(y) == F(x | y)
       //
       // Then also
       //
       //   binary(x, c) | binary(y, c)  =>  binary(x | y, c)
       Binary *bx, *by;
       Expression *x, *y;
       Const *cx, *cy;
       if (matches(curr->left, binary(&bx, any(&x), ival(&cx))) &&
           matches(curr->right, binary(&by, any(&y), ival(&cy))) &&
           bx->op == by->op && x->type == y->type && cx->value == cy->value &&
           preserveOr(bx)) {
         by->op = getBinary(x->type, Or);
         by->type = x->type;
         by->left = x;
         by->right = y;
         bx->left = by;
         return bx;
       }
     }
     return nullptr;
   }

   // Check whether an operation preserves the Or operation through it, that is,
   //
   //   F(x | y) = F(x) | F(y)
   //
   // Mathematically that means F is homomorphic with respect to the | operation.
   //
   // F(x) is seen as taking a single parameter of its first child. That is, the
   // first child is |x|, and the rest is constant. For example, if we are given
   // a binary with operation != and the right child is a constant 0, then
   // F(x) = (x != 0).
   bool preserveOr(Binary* curr) {
     using namespace Abstract;
     using namespace Match;

     // (x != 0) | (y != 0)    ==>    (x | y) != 0
     // This effectively checks if any bits are set in x or y.
     if (matches(curr, binary(Ne, any(), ival(0)))) {
       return true;
     }
     // (x < 0) | (y < 0)    ==>    (x | y) < 0
     // This effectively checks if x or y have the sign bit set.
     if (matches(curr, binary(LtS, any(), ival(0)))) {
       return true;
     }
     return false;
   }

   // Check whether an operation inverses the Or operation to And, that is,
   //
   //   F(x | y) = F(x) & F(y)
   //
   // Mathematically that means F is homomorphic with respect to the | operation.
   //
   // F(x) is seen as taking a single parameter of its first child. That is, the
   // first child is |x|, and the rest is constant. For example, if we are given
   // a binary with operation != and the right child is a constant 0, then
   // F(x) = (x != 0).
   bool inversesOr(Binary* curr) {
     using namespace Abstract;
     using namespace Match;

     // (x >= 0) | (y >= 0)   ==>   (x & y) >= 0
     if (matches(curr, binary(GeS, any(), ival(0)))) {
       return true;
     }

     // (x !=-1) | (y !=-1)   ==>   (x & y) !=-1
     if (matches(curr, binary(Ne, any(), ival(-1)))) {
       return true;
     }

     return false;
   }

   // Check whether an operation preserves the And operation through it, that is,
   //
   //   F(x & y) = F(x) & F(y)
   //
   // Mathematically that means F is homomorphic with respect to the & operation.
   //
   // F(x) is seen as taking a single parameter of its first child. That is, the
   // first child is |x|, and the rest is constant. For example, if we are given
   // a binary with operation != and the right child is a constant 0, then
   // F(x) = (x != 0).
   bool preserveAnd(Binary* curr) {
     using namespace Abstract;
     using namespace Match;

     // (x < 0) & (y < 0)   ==>   (x & y) < 0
     if (matches(curr, binary(LtS, any(), ival(0)))) {
       return true;
     }

     // (x == -1) & (y == -1)   ==>   (x & y) == -1
     if (matches(curr, binary(Eq, any(), ival(-1)))) {
       return true;
     }

     return false;
   }

   // Check whether an operation inverses the And operation to Or, that is,
   //
   //   F(x & y) = F(x) | F(y)
   //
   // Mathematically that means F is homomorphic with respect to the & operation.
   //
   // F(x) is seen as taking a single parameter of its first child. That is, the
   // first child is |x|, and the rest is constant. For example, if we are given
   // a binary with operation != and the right child is a constant 0, then
   // F(x) = (x != 0).
   bool inversesAnd(Binary* curr) {
     using namespace Abstract;
     using namespace Match;

     // (x >= 0) & (y >= 0)   ==>   (x | y) >= 0
     if (matches(curr, binary(GeS, any(), ival(0)))) {
       return true;
     }

     return false;
   }

   // fold constant factors into the offset
   void optimizeMemoryAccess(Expression*& ptr, Address& offset, Name memory) {
     // ptr may be a const, but it isn't worth folding that in (we still have a
     // const); in fact, it's better to do the opposite for gzip purposes as well
     // as for readability.
     auto* last = ptr->dynCast<Const>();
     if (last) {
       uint64_t value64 = last->value.getInteger();
       uint64_t offset64 = offset;
       auto mem = getModule()->getMemory(memory);
       if (mem->is64()) {
         // Check for a 64-bit overflow.
         uint64_t sum;
         if (!std::ckd_add(&sum, value64, offset64)) {
           last->value = Literal(int64_t(sum));
           offset = 0;
         }
       } else {
         // don't do this if it would wrap the pointer
         if (value64 <= uint64_t(std::numeric_limits<int32_t>::max()) &&
             offset64 <= uint64_t(std::numeric_limits<int32_t>::max()) &&
             value64 + offset64 <=
               uint64_t(std::numeric_limits<int32_t>::max())) {
           last->value = Literal(int32_t(value64 + offset64));
           offset = 0;
         }
       }
     }
   }

   // Optimize a multiply by a power of two on the right, which
   // can be a shift.
   // This doesn't shrink code size, and VMs likely optimize it anyhow,
   // but it's still worth doing since
   //  * Often shifts are more common than muls.
   //  * The constant is smaller.
   template<typename T> Expression* optimizePowerOf2Mul(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
                   "type mismatch");
     auto shifts = Bits::countTrailingZeroes(c);
     binary->op = std::is_same<T, uint32_t>::value ? ShlInt32 : ShlInt64;
     binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
     return binary;
   }

   // Optimize an unsigned divide / remainder by a power of two on the right
   // This doesn't shrink code size, and VMs likely optimize it anyhow,
   // but it's still worth doing since
   //  * Usually ands are more common than urems.
   //  * The constant is slightly smaller.
   template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
                   "type mismatch");
     binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
     binary->right->cast<Const>()->value = Literal(c - 1);
     return binary;
   }

   template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
                   "type mismatch");
     auto shifts = Bits::countTrailingZeroes(c);
     binary->op = std::is_same<T, uint32_t>::value ? ShrUInt32 : ShrUInt64;
     binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
     return binary;
   }

   template<typename T> Expression* optimizePowerOf2FDiv(Binary* binary, T c) {
     //
     // x / C_pot    =>   x * (C_pot ^ -1)
     //
     // Explanation:
     // Floating point numbers are represented as:
     //    ((-1) ^ sign) * (2 ^ (exp - bias)) * (1 + significand)
     //
     // If we have power of two numbers, then the mantissa (significand)
     // is all zeros. Let's focus on the exponent, ignoring the sign part:
     //    (2 ^ (exp - bias))
     //
     // and for inverted power of two floating point:
     //     1.0 / (2 ^ (exp - bias))   ->   2 ^ -(exp - bias)
     //
     // So inversion of C_pot is valid because it changes only the sign
     // of the exponent part and doesn't touch the significand part,
     // which remains the same (zeros).
     static_assert(std::is_same<T, float>::value ||
                     std::is_same<T, double>::value,
                   "type mismatch");
     double invDivisor = 1.0 / (double)c;
     binary->op = std::is_same<T, float>::value ? MulFloat32 : MulFloat64;
     binary->right->cast<Const>()->value = Literal(static_cast<T>(invDivisor));
     return binary;
   }

   Expression* makeZeroExt(Expression* curr, int32_t bits) {
     Builder builder(*getModule());
     return builder.makeBinary(
       AndInt32, curr, builder.makeConst(Literal(Bits::lowBitMask(bits))));
   }

   // given an "almost" sign extend - either a proper one, or it
   // has too many shifts left - we remove the sign extend. If there are
   // too many shifts, we split the shifts first, so this removes the
   // two sign extend shifts and adds one (smaller one)
   Expression* removeAlmostSignExt(Binary* outer) {
     auto* inner = outer->left->cast<Binary>();
     auto* outerConst = outer->right->cast<Const>();
     auto* innerConst = inner->right->cast<Const>();
     auto* value = inner->left;
     if (outerConst->value == innerConst->value) {
       return value;
     }
     // add a shift, by reusing the existing node
     innerConst->value = innerConst->value.sub(outerConst->value);
     return inner;
   }

   // Check if an expression is already sign-extended to an exact number of bits.
   bool isSignExted(Expression* curr, Index bits) {
     return getSignExtBits(curr) == bits;
   }

   // Returns the number of bits an expression is sign-extended (or 0 if it is
   // not).
   Index getSignExtBits(Expression* curr) {
     if (Properties::getSignExtValue(curr)) {
       return Properties::getSignExtBits(curr);
     }
     if (auto* get = curr->dynCast<LocalGet>()) {
       // Check what we know about the local.
       return localInfo[get->index].signExtBits;
     }
     return 0;
   }

   // optimize trivial math operations, given that the right side of a binary
   // is a constant
   Expression* optimizeWithConstantOnRight(Binary* curr) {
     using namespace Match;
     using namespace Abstract;
     Builder builder(*getModule());
     Expression* left;
     auto* right = curr->right->cast<Const>();
     auto type = curr->right->type;

     // Operations on zero
     if (matches(curr, binary(Shl, any(&left), ival(0))) ||
         matches(curr, binary(ShrU, any(&left), ival(0))) ||
         matches(curr, binary(ShrS, any(&left), ival(0))) ||
         matches(curr, binary(Or, any(&left), ival(0))) ||
         matches(curr, binary(Xor, any(&left), ival(0)))) {
       return left;
     }
     if (matches(curr, binary(Mul, pure(&left), ival(0))) ||
         matches(curr, binary(And, pure(&left), ival(0)))) {
       return right;
     }
     // -x * C   ==>    x * -C,   if  shrinkLevel != 0  or  C != C_pot
     // -x * C   ==>   -(x * C),  otherwise
     //    where  x, C  are integers
     Binary* inner;
     if (matches(
           curr,
           binary(Mul, binary(&inner, Sub, ival(0), any(&left)), ival()))) {
       if (getPassOptions().shrinkLevel != 0 ||
           !Bits::isPowerOf2(right->value.getInteger())) {
         right->value = right->value.neg();
         curr->left = left;
         return curr;
       } else {
         curr->left = left;
         Const* zero = inner->left->cast<Const>();
         return builder.makeBinary(inner->op, zero, curr);
       }
     }
     // x == 0   ==>   eqz x
     if (matches(curr, binary(Eq, any(&left), ival(0)))) {
       return builder.makeUnary(Abstract::getUnary(type, EqZ), left);
     }
     // Operations on one
     // (signed)x % 1   ==>   0
     if (matches(curr, binary(RemS, pure(&left), ival(1)))) {
       right->value = Literal::makeZero(type);
       return right;
     }
     // (signed)x % C_pot != 0   ==>  (x & (abs(C_pot) - 1)) != 0
     {
       Const* c;
       Binary* inner;
       if (matches(curr,
                   binary(Ne, binary(&inner, RemS, any(), ival(&c)), ival(0))) &&
           (c->value.isSignedMin() ||
            Bits::isPowerOf2(c->value.abs().getInteger()))) {
         inner->op = Abstract::getBinary(c->type, And);
         if (c->value.isSignedMin()) {
           c->value = Literal::makeSignedMax(c->type);
         } else {
           c->value = c->value.abs().sub(Literal::makeOne(c->type));
         }
         return curr;
       }
     }
     // i32(bool(x)) == 1  ==>  i32(bool(x))
     // i32(bool(x)) != 0  ==>  i32(bool(x))
     // i32(bool(x)) & 1   ==>  i32(bool(x))
     // i64(bool(x)) & 1   ==>  i64(bool(x))
     if ((matches(curr, binary(EqInt32, any(&left), i32(1))) ||
          matches(curr, binary(NeInt32, any(&left), i32(0))) ||
          matches(curr, binary(And, any(&left), ival(1)))) &&
         Bits::getMaxBits(left, this) == 1) {
       return left;
     }
     // i64(bool(x)) == 1  ==>  i32(bool(x))
     // i64(bool(x)) != 0  ==>  i32(bool(x))
     if ((matches(curr, binary(EqInt64, any(&left), i64(1))) ||
          matches(curr, binary(NeInt64, any(&left), i64(0)))) &&
         Bits::getMaxBits(left, this) == 1) {
       return builder.makeUnary(WrapInt64, left);
     }
     // bool(x) != 1  ==>  !bool(x)
     if (matches(curr, binary(Ne, any(&left), ival(1))) &&
         Bits::getMaxBits(left, this) == 1) {
       return builder.makeUnary(Abstract::getUnary(type, EqZ), left);
     }
     // bool(x)  ^ 1  ==>  !bool(x)
     if (matches(curr, binary(Xor, any(&left), ival(1))) &&
         Bits::getMaxBits(left, this) == 1) {
       auto* result = builder.makeUnary(Abstract::getUnary(type, EqZ), left);
       if (left->type == Type::i64) {
         // Xor's result is also an i64 in this case, but EqZ returns i32, so we
         // must expand it so that we keep returning the same value as before.
         // This means we replace a xor and a const with a xor and an extend,
         // which is still smaller (the const is 2 bytes, the extend just 1), and
         // also the extend may be removed by further work.
         result = builder.makeUnary(ExtendUInt32, result);
       }
       return result;
     }
     // bool(x) | 1  ==>  1
     if (matches(curr, binary(Or, pure(&left), ival(1))) &&
         Bits::getMaxBits(left, this) == 1) {
       return right;
     }

     // Operations on all 1s
     // x & -1   ==>   x
     if (matches(curr, binary(And, any(&left), ival(-1)))) {
       return left;
     }
     // x | -1   ==>   -1
     if (matches(curr, binary(Or, pure(&left), ival(-1)))) {
       return right;
     }
     // (signed)x % -1   ==>   0
     if (matches(curr, binary(RemS, pure(&left), ival(-1)))) {
       right->value = Literal::makeZero(type);
       return right;
     }
     // i32(x) / i32.min_s   ==>   x == i32.min_s
     if (matches(
           curr,
           binary(DivSInt32, any(), i32(std::numeric_limits<int32_t>::min())))) {
       curr->op = EqInt32;
       return curr;
     }
     // i64(x) / i64.min_s   ==>   i64(x == i64.min_s)
     // only for zero shrink level
     if (getPassOptions().shrinkLevel == 0 &&
         matches(
           curr,
           binary(DivSInt64, any(), i64(std::numeric_limits<int64_t>::min())))) {
       curr->op = EqInt64;
       curr->type = Type::i32;
       return builder.makeUnary(ExtendUInt32, curr);
     }
     // (unsigned)x < 0   ==>   i32(0)
     if (matches(curr, binary(LtU, pure(&left), ival(0)))) {
       right->value = Literal::makeZero(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x <= -1  ==>   i32(1)
     if (matches(curr, binary(LeU, pure(&left), ival(-1)))) {
       right->value = Literal::makeOne(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x > -1   ==>   i32(0)
     if (matches(curr, binary(GtU, pure(&left), ival(-1)))) {
       right->value = Literal::makeZero(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x >= 0   ==>   i32(1)
     if (matches(curr, binary(GeU, pure(&left), ival(0)))) {
       right->value = Literal::makeOne(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x < -1   ==>   x != -1
     // Friendlier to JS emitting as we don't need to write an unsigned -1 value
     // which is large.
     if (matches(curr, binary(LtU, any(), ival(-1)))) {
       curr->op = Abstract::getBinary(type, Ne);
       return curr;
     }
     // (unsigned)x <= 0   ==>   x == 0
     if (matches(curr, binary(LeU, any(), ival(0)))) {
       curr->op = Abstract::getBinary(type, Eq);
       return curr;
     }
     // (unsigned)x > 0   ==>   x != 0
     if (matches(curr, binary(GtU, any(), ival(0)))) {
       curr->op = Abstract::getBinary(type, Ne);
       return curr;
     }
     // (unsigned)x >= -1  ==>   x == -1
     if (matches(curr, binary(GeU, any(), ival(-1)))) {
       curr->op = Abstract::getBinary(type, Eq);
       return curr;
     }
     {
       Const* c;
       // (signed)x < (i32|i64).min_s   ==>   i32(0)
       if (matches(curr, binary(LtS, pure(&left), ival(&c))) &&
           c->value.isSignedMin()) {
         right->value = Literal::makeZero(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x <= (i32|i64).max_s   ==>   i32(1)
       if (matches(curr, binary(LeS, pure(&left), ival(&c))) &&
           c->value.isSignedMax()) {
         right->value = Literal::makeOne(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x > (i32|i64).max_s   ==>   i32(0)
       if (matches(curr, binary(GtS, pure(&left), ival(&c))) &&
           c->value.isSignedMax()) {
         right->value = Literal::makeZero(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x >= (i32|i64).min_s   ==>   i32(1)
       if (matches(curr, binary(GeS, pure(&left), ival(&c))) &&
           c->value.isSignedMin()) {
         right->value = Literal::makeOne(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x < (i32|i64).max_s   ==>   x != (i32|i64).max_s
       if (matches(curr, binary(LtS, any(), ival(&c))) &&
           c->value.isSignedMax()) {
         curr->op = Abstract::getBinary(type, Ne);
         return curr;
       }
       // (signed)x <= (i32|i64).min_s   ==>   x == (i32|i64).min_s
       if (matches(curr, binary(LeS, any(), ival(&c))) &&
           c->value.isSignedMin()) {
         curr->op = Abstract::getBinary(type, Eq);
         return curr;
       }
       // (signed)x > (i32|i64).min_s   ==>   x != (i32|i64).min_s
       if (matches(curr, binary(GtS, any(), ival(&c))) &&
           c->value.isSignedMin()) {
         curr->op = Abstract::getBinary(type, Ne);
         return curr;
       }
       // (signed)x >= (i32|i64).max_s   ==>   x == (i32|i64).max_s
       if (matches(curr, binary(GeS, any(), ival(&c))) &&
           c->value.isSignedMax()) {
         curr->op = Abstract::getBinary(type, Eq);
         return curr;
       }
     }
     // x * -1   ==>   0 - x
     if (matches(curr, binary(Mul, any(&left), ival(-1)))) {
       right->value = Literal::makeZero(type);
       curr->op = Abstract::getBinary(type, Sub);
       curr->left = right;
       curr->right = left;
       return curr;
     }
     {
       // ~(1 << x) aka (1 << x) ^ -1  ==>  rotl(-2, x)
       Expression* x;
       // Note that we avoid this in JS mode, as emitting a rotation would
       // require lowering that rotation for JS in another cycle of work.
       if (matches(curr, binary(Xor, binary(Shl, ival(1), any(&x)), ival(-1))) &&
           !getPassOptions().targetJS) {
         curr->op = Abstract::getBinary(type, RotL);
         right->value = Literal::makeFromInt32(-2, type);
         curr->left = right;
         curr->right = x;
         return curr;
       }
     }
     {
       // x * 2.0  ==>  x + x
       // but we apply this only for simple expressions like
       // local.get and global.get for avoid using extra local
       // variable.
       Expression* x;
       if (matches(curr, binary(Mul, any(&x), fval(2.0))) &&
           (x->is<LocalGet>() || x->is<GlobalGet>())) {
         curr->op = Abstract::getBinary(type, Abstract::Add);
         curr->right = ExpressionManipulator::copy(x, *getModule());
         return curr;
       }
     }
     {
       // x + (-0.0)   ==>   x
       double value;
       if (fastMath && matches(curr, binary(Add, any(), fval(&value))) &&
           value == 0.0 && std::signbit(value)) {
         return curr->left;
       }
     }
     // -x * fval(C)   ==>   x * -C
     // -x / fval(C)   ==>   x / -C
     if (matches(curr, binary(Mul, unary(Neg, any(&left)), fval())) ||
         matches(curr, binary(DivS, unary(Neg, any(&left)), fval()))) {
       right->value = right->value.neg();
       curr->left = left;
       return curr;
     }
     // x * -1.0   ==>
     //       -x,  if fastMath == true
     // -0.0 - x,  if fastMath == false
     if (matches(curr, binary(Mul, any(), fval(-1.0)))) {
       if (fastMath) {
         return builder.makeUnary(Abstract::getUnary(type, Neg), left);
       }
       // x * -1.0   ==>  -0.0 - x
       curr->op = Abstract::getBinary(type, Sub);
       right->value = Literal::makeZero(type).neg();
       std::swap(curr->left, curr->right);
       return curr;
     }
     if (matches(curr, binary(Mul, any(&left), constant(1))) ||
         matches(curr, binary(DivS, any(&left), constant(1))) ||
         matches(curr, binary(DivU, any(&left), constant(1)))) {
       if (curr->type.isInteger() || fastMath) {
         return left;
       }
     }
     {
       //   x !=  NaN   ==>   1
       //   x <=> NaN   ==>   0
       //   x op  NaN'  ==>   NaN',  iff `op` != `copysign` and `x` != C
       Const* c;
       Binary* bin;
       Expression* x;
       if (matches(curr, binary(&bin, pure(&x), fval(&c))) &&
           std::isnan(c->value.getFloat()) &&
           bin->op != getBinary(x->type, CopySign)) {
         if (bin->isRelational()) {
           // reuse "c" (nan) constant
           c->type = Type::i32;
           if (bin->op == getBinary(x->type, Ne)) {
             // x != NaN  ==>  1
             c->value = Literal::makeOne(Type::i32);
           } else {
             // x == NaN,
             // x >  NaN,
             // x <= NaN
             // x .. NaN  ==>  0
             c->value = Literal::makeZero(Type::i32);
           }
           return c;
         }
         // propagate NaN of RHS but canonicalize it
         c->value = Literal::standardizeNaN(c->value);
         return c;
       }
     }
     return nullptr;
   }

   // Returns true if the given binary operation can overflow. If we can't be
   // sure either way, we return true, assuming the worst.
   //
   // We can check for an unsigned overflow (more than the max number of bits) or
   // a signed one (where even reaching the sign bit is an overflow, as that
   // would turn us from positive to negative).
   bool canOverflow(Binary* binary, bool signed_) {
     using namespace Abstract;

     // If we know nothing about a limit on the amount of bits on either side,
     // give up.
     auto typeMaxBits = getBitsForType(binary->type);
     auto leftMaxBits = Bits::getMaxBits(binary->left, this);
     auto rightMaxBits = Bits::getMaxBits(binary->right, this);
     if (std::max(leftMaxBits, rightMaxBits) == typeMaxBits) {
       return true;
     }

     if (binary->op == getBinary(binary->type, Add)) {
       if (!signed_) {
         // Proof this cannot overflow:
         //
         // left + right <  2^leftMaxBits + 2^rightMaxBits          (1)
         //              <= 2^(typeMaxBits-1) + 2^(typeMaxBits-1)   (2)
         //              =  2^typeMaxBits                           (3)
         //
         // (1) By the definition of the max bits (e.g. an int32 has 32 max bits,
         //     and its max value is 2^32 - 1, which is < 2^32).
         // (2) By the above checks and early returns.
         // (3) 2^x + 2^x === 2*2^x === 2^(x+1)
         return false;
       }

       // For a signed comparison, check that the total cannot reach the sign
       // bit.
       return leftMaxBits + rightMaxBits >= typeMaxBits;
     }

     // TODO subtraction etc.
     return true;
   }

   // Folding two expressions into one with similar operations and
   // constants on RHSs
   Expression* optimizeDoubletonWithConstantOnRight(Binary* curr) {
     using namespace Match;
     using namespace Abstract;
     {
       Binary* inner;
       Const *c1, *c2 = curr->right->cast<Const>();
       if (matches(curr->left, binary(&inner, any(), ival(&c1))) &&
           inner->op == curr->op) {
         Type type = inner->type;
         BinaryOp op = inner->op;
         // (x & C1) & C2   =>   x & (C1 & C2)
         if (op == getBinary(type, And)) {
           c1->value = c1->value.and_(c2->value);
           return inner;
         }
         // (x | C1) | C2   =>   x | (C1 | C2)
         if (op == getBinary(type, Or)) {
           c1->value = c1->value.or_(c2->value);
           return inner;
         }
         // (x ^ C1) ^ C2   =>   x ^ (C1 ^ C2)
         if (op == getBinary(type, Xor)) {
           c1->value = c1->value.xor_(c2->value);
           return inner;
         }
         // (x * C1) * C2   =>   x * (C1 * C2)
         if (op == getBinary(type, Mul)) {
           c1->value = c1->value.mul(c2->value);
           return inner;
         }
         // TODO:
         // handle signed / unsigned divisions. They are more complex

         // (x <<>> C1) <<>> C2   =>   x <<>> (C1 + C2)
         if (hasAnyShift(op)) {
           // shifts only use an effective amount from the constant, so
           // adding must be done carefully
           auto total =
             Bits::getEffectiveShifts(c1) + Bits::getEffectiveShifts(c2);
           auto effectiveTotal = Bits::getEffectiveShifts(total, c1->type);
           if (total == effectiveTotal) {
             // no overflow, we can do this
             c1->value = Literal::makeFromInt32(total, c1->type);
             return inner;
           } else {
             // overflow. Handle different scenarious
             if (hasAnyRotateShift(op)) {
               // overflow always accepted in rotation shifts
               c1->value = Literal::makeFromInt32(effectiveTotal, c1->type);
               return inner;
             }
             // handle overflows for general shifts
             //   x << C1 << C2    =>   0 or { drop(x), 0 }
             //   x >>> C1 >>> C2  =>   0 or { drop(x), 0 }
             // iff `C1 + C2` -> overflows
             if ((op == getBinary(type, Shl) || op == getBinary(type, ShrU))) {
               auto* x = inner->left;
               c1->value = Literal::makeZero(c1->type);
               if (!effects(x).hasSideEffects()) {
                 //  =>  0
                 return c1;
               } else {
                 //  =>  { drop(x), 0 }
                 Builder builder(*getModule());
                 return builder.makeBlock({builder.makeDrop(x), c1});
               }
             }
             //   i32(x) >> C1 >> C2   =>   x >> 31
             //   i64(x) >> C1 >> C2   =>   x >> 63
             // iff `C1 + C2` -> overflows
             if (op == getBinary(type, ShrS)) {
               c1->value = Literal::makeFromInt32(c1->type.getByteSize() * 8 - 1,
                                                  c1->type);
               return inner;
             }
           }
         }
       }
     }
     {
       // (x << C1) * C2   =>   x * (C2 << C1)
       Binary* inner;
       Const *c1, *c2;
       if (matches(
             curr,
             binary(Mul, binary(&inner, Shl, any(), ival(&c1)), ival(&c2)))) {
         inner->op = getBinary(inner->type, Mul);
         c1->value = c2->value.shl(c1->value);
         return inner;
       }
     }
     {
       // (x * C1) << C2   =>   x * (C1 << C2)
       Binary* inner;
       Const *c1, *c2;
       if (matches(
             curr,
             binary(Shl, binary(&inner, Mul, any(), ival(&c1)), ival(&c2)))) {
         c1->value = c1->value.shl(c2->value);
         return inner;
       }
     }
     {
       // TODO: Add cancelation for some large constants when shrinkLevel > 0
       // in FinalOptimizer.

       // (x >> C)  << C   =>   x & -(1 << C)
       // (x >>> C) << C   =>   x & -(1 << C)
       Binary* inner;
       Const *c1, *c2;
       if (matches(curr,
                   binary(Shl, binary(&inner, any(), ival(&c1)), ival(&c2))) &&
           (inner->op == getBinary(inner->type, ShrS) ||
            inner->op == getBinary(inner->type, ShrU)) &&
           Bits::getEffectiveShifts(c1) == Bits::getEffectiveShifts(c2)) {
         auto type = c1->type;
         if (type == Type::i32) {
           c1->value = Literal::makeFromInt32(
             -(1U << Bits::getEffectiveShifts(c1)), Type::i32);
         } else {
           c1->value = Literal::makeFromInt64(
             -(1ULL << Bits::getEffectiveShifts(c1)), Type::i64);
         }
         inner->op = getBinary(type, And);
         return inner;
       }
     }
     {
       // TODO: Add cancelation for some large constants when shrinkLevel > 0
       // in FinalOptimizer.

       // (x << C) >>> C   =>   x & (-1 >>> C)
       // (x << C) >> C    =>   skip
       Binary* inner;
       Const *c1, *c2;
       if (matches(
             curr,
             binary(ShrU, binary(&inner, Shl, any(), ival(&c1)), ival(&c2))) &&
           Bits::getEffectiveShifts(c1) == Bits::getEffectiveShifts(c2)) {
         auto type = c1->type;
         if (type == Type::i32) {
           c1->value = Literal::makeFromInt32(
             -1U >> Bits::getEffectiveShifts(c1), Type::i32);
         } else {
           c1->value = Literal::makeFromInt64(
             -1ULL >> Bits::getEffectiveShifts(c1), Type::i64);
         }
         inner->op = getBinary(type, And);
         return inner;
       }
     }
     {
       // TODO: Add canonicalization rotr to rotl and remove these rules.
       // rotl(rotr(x, C1), C2)   =>   rotr(x, C1 - C2)
       // rotr(rotl(x, C1), C2)   =>   rotl(x, C1 - C2)
       Binary* inner;
       Const *c1, *c2;
       if (matches(
             curr,
             binary(RotL, binary(&inner, RotR, any(), ival(&c1)), ival(&c2))) ||
           matches(
             curr,
             binary(RotR, binary(&inner, RotL, any(), ival(&c1)), ival(&c2)))) {
         auto diff = Bits::getEffectiveShifts(c1) - Bits::getEffectiveShifts(c2);
         c1->value = Literal::makeFromInt32(
           Bits::getEffectiveShifts(diff, c2->type), c2->type);
         return inner;
       }
     }
     return nullptr;
   }

   // optimize trivial math operations, given that the left side of a binary
   // is a constant. since we canonicalize constants to the right for symmetrical
   // operations, we only need to handle asymmetrical ones here
   // TODO: templatize on type?
   Expression* optimizeWithConstantOnLeft(Binary* curr) {
     using namespace Match;
     using namespace Abstract;

     auto type = curr->left->type;
     auto* left = curr->left->cast<Const>();
     // 0 <<>> x   ==>   0
     if (Abstract::hasAnyShift(curr->op) && left->value.isZero() &&
         !effects(curr->right).hasSideEffects()) {
       return curr->left;
     }
     // (signed)-1 >> x   ==>   -1
     // rotl(-1, x)       ==>   -1
     // rotr(-1, x)       ==>   -1
     if ((curr->op == Abstract::getBinary(type, ShrS) ||
          curr->op == Abstract::getBinary(type, RotL) ||
          curr->op == Abstract::getBinary(type, RotR)) &&
         left->value.getInteger() == -1LL &&
         !effects(curr->right).hasSideEffects()) {
       return curr->left;
     }
     {
       // C1 - (x + C2)  ==>  (C1 - C2) - x
       Const *c1, *c2;
       Expression* x;
       if (matches(curr,
                   binary(Sub, ival(&c1), binary(Add, any(&x), ival(&c2))))) {
         left->value = c1->value.sub(c2->value);
         curr->right = x;
         return curr;
       }
       // C1 - (C2 - x)  ==>   x + (C1 - C2)
       if (matches(curr,
                   binary(Sub, ival(&c1), binary(Sub, ival(&c2), any(&x))))) {
         left->value = c1->value.sub(c2->value);
         curr->op = Abstract::getBinary(type, Add);
         curr->right = x;
         std::swap(curr->left, curr->right);
         return curr;
       }
     }
     {
       // fval(C) / -x   ==>  -C / x
       Expression* right;
       if (matches(curr, binary(DivS, fval(), unary(Neg, any(&right))))) {
         left->value = left->value.neg();
         curr->right = right;
         return curr;
       }
     }
     return nullptr;
   }

   // TODO: templatize on type?
   Expression* optimizeRelational(Binary* curr) {
     using namespace Abstract;
     using namespace Match;

     auto type = curr->right->type;
     if (curr->left->type.isInteger()) {
       if (curr->op == Abstract::getBinary(type, Abstract::Eq) ||
           curr->op == Abstract::getBinary(type, Abstract::Ne)) {
         if (auto* left = curr->left->dynCast<Binary>()) {
           // TODO: inequalities can also work, if the constants do not overflow
           // integer math, even on 2s complement, allows stuff like
           // x + 5 == 7
           //   =>
           //     x == 2
           if (left->op == Abstract::getBinary(type, Abstract::Add)) {
             if (auto* leftConst = left->right->dynCast<Const>()) {
               if (auto* rightConst = curr->right->dynCast<Const>()) {
                 return combineRelationalConstants(
                   curr, left, leftConst, nullptr, rightConst);
               } else if (auto* rightBinary = curr->right->dynCast<Binary>()) {
                 if (rightBinary->op ==
                     Abstract::getBinary(type, Abstract::Add)) {
                   if (auto* rightConst = rightBinary->right->dynCast<Const>()) {
                     return combineRelationalConstants(
                       curr, left, leftConst, rightBinary, rightConst);
                   }
                 }
               }
             }
           }
         }
       }
       // x - y == 0  =>  x == y
       // x - y != 0  =>  x != y
       // unsigned(x - y) > 0    =>   x != y
       // unsigned(x - y) <= 0   =>   x == y
       {
         Binary* inner;
         // unsigned(x - y) > 0    =>   x != y
         if (matches(curr,
                     binary(GtU, binary(&inner, Sub, any(), any()), ival(0)))) {
           curr->op = Abstract::getBinary(type, Ne);
           curr->right = inner->right;
           curr->left = inner->left;
           return curr;
         }
         // unsigned(x - y) <= 0   =>   x == y
         if (matches(curr,
                     binary(LeU, binary(&inner, Sub, any(), any()), ival(0)))) {
           curr->op = Abstract::getBinary(type, Eq);
           curr->right = inner->right;
           curr->left = inner->left;
           return curr;
         }
         // x - y == 0  =>  x == y
         // x - y != 0  =>  x != y
         // This is not true for signed comparisons like x -y < 0 due to overflow
         // effects (e.g. 8 - 0x80000000 < 0 is not the same as 8 < 0x80000000).
         if (matches(curr,
                     binary(Eq, binary(&inner, Sub, any(), any()), ival(0))) ||
             matches(curr,
                     binary(Ne, binary(&inner, Sub, any(), any()), ival(0)))) {
           curr->right = inner->right;
           curr->left = inner->left;
           return curr;
         }
       }

       // x + C1 > C2   ==>  x > (C2-C1)      if no overflowing, C2 >= C1
       // x + C1 > C2   ==>  x + (C1-C2) > 0  if no overflowing, C2 <  C1
       // And similarly for other relational operations on integers with a "+"
       // on the left.
       // TODO: support - and not just +
       {
         Binary* add;
         Const* c1;
         Const* c2;
         if (matches(curr,
                     binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) &&
             !canOverflow(add, isSignedOp(curr->op))) {
           // We want to subtract C2-C1 or C1-C2. When doing so, we must avoid an
           // overflow in that subtraction (so that we keep all the math here
           // properly linear in the mathematical sense). Overflows that concern
           // us include an underflow with unsigned values (e.g. 10 - 20, which
           // flips the result to a large positive number), and a sign bit
           // overflow for signed values (e.g. 0x80000000 - 1 = 0x7fffffff flips
           // from a negative number, -1, to a positive one). We also need to be
           // careful of signed handling of 0x80000000, for whom 0 - 0x80000000
           // is equal to 0x80000000, leading to
           //   x + 0x80000000 > 0                ;; always false
           // (apply the rule)
           //   x > 0 - 0x80000000 = 0x80000000   ;; depends on x
           // The general principle in all of this is that when we go from
           //   (a)    x + C1 > C2
           // to
           //   (b)    x      > (C2-C1)
           // then we want to adjust both sides in the same (linear) manner. That
           // is, we can write the latter as
           //   (b')   x + 0  > (C2-C1)
           // Comparing (a) and (b'), we want the constants to change in a
           // consistent way: C1 changes to 0, and C2 changes to C2-C1. Both
           // transformations should decrease the value, which is violated in all
           // the overflows described above:
           //   * Unsigned overflow: C1=20, C2=10, then C1 decreases but C2-C1
           //     is larger than C2.
           //   * Sign flip: C1=1, C2=0x80000000, then C1 decreases but C2-C1 is
           //     is larger than C2.
           //   * C1=0x80000000, C2=0, then C1 increases while C2-C1 stays the
           //     same.
           // In the first and second case we can apply the other rule using
           // C1-C2 rather than C2-C1. The third case, however, doesn't even work
           // that way.
           auto C1 = c1->value;
           auto C2 = c2->value;
           auto C1SubC2 = C1.sub(C2);
           auto C2SubC1 = C2.sub(C1);
           auto zero = Literal::makeZero(add->type);
           auto doC1SubC2 = false;
           auto doC2SubC1 = false;
           // Ignore the case of C1 or C2 being zero, as then C2-C1 or C1-C2
           // does not change anything (and we don't want the optimizer to think
           // we improved anything, or we could infinite loop on the mirage of
           // progress).
           if (C1 != zero && C2 != zero) {
             if (isSignedOp(curr->op)) {
               if (C2SubC1.leS(C2).getInteger() && zero.leS(C1).getInteger()) {
                 // C2=>C2-C1 and C1=>0 both decrease, which means we can do the
                 // rule
                 //   (a)    x + C1   > C2
                 //   (b')   x (+ 0)  > (C2-C1)
                 // That is, subtracting C1 from both sides is ok; the constants
                 // on both sides change in the same manner.
                 doC2SubC1 = true;
               } else if (C1SubC2.leS(C1).getInteger() &&
                          zero.leS(C2).getInteger()) {
                 // N.B. this code path is not tested atm as other optimizations
                 // will canonicalize x + C into x - C, and so we would need to
                 // implement the TODO above on subtraction and not only support
                 // addition here.
                 doC1SubC2 = true;
               }
             } else {
               // Unsigned.
               if (C2SubC1.leU(C2).getInteger() && zero.leU(C1).getInteger()) {
                 doC2SubC1 = true;
               } else if (C1SubC2.leU(C1).getInteger() &&
                          zero.leU(C2).getInteger()) {
                 doC1SubC2 = true;
               }
               // For unsigned, one of the cases must work out, as there are no
               // corner cases with the sign bit.
               assert(doC2SubC1 || doC1SubC2);
             }
           }
           if (doC2SubC1) {
             // This is the first line above, we turn into x > (C2-C1).
             c2->value = C2SubC1;
             curr->left = add->left;
             return curr;
           }
           // This is the second line above, we turn into x + (C1-C2) > 0.
           if (doC1SubC2) {
             c1->value = C1SubC2;
             c2->value = zero;
             return curr;
           }
         }
       }

       // Comparisons can sometimes be simplified depending on the number of
       // bits, e.g.  (unsigned)x > y  must be true if x has strictly more bits.
       // A common case is a constant on the right, e.g. (x & 255) < 256 must be
       // true.
       // TODO: use getMinBits in more places, see ideas in
       //       https://github.com/WebAssembly/binaryen/issues/2898
       {
         // Check if there is a nontrivial amount of bits on the left, which may
         // provide enough to optimize.
         auto leftMaxBits = Bits::getMaxBits(curr->left, this);
         auto type = curr->left->type;
         if (leftMaxBits < getBitsForType(type)) {
           using namespace Abstract;
           auto rightMinBits = Bits::getMinBits(curr->right);
           auto rightIsNegative = rightMinBits == getBitsForType(type);
           if (leftMaxBits < rightMinBits) {
             // There are not enough bits on the left for it to be equal to the
             // right, making various comparisons obviously false:
             //             x == y
             //   (unsigned)x >  y
             //   (unsigned)x >= y
             // and the same for signed, if y does not have the sign bit set
             // (in that case, the comparison is effectively unsigned).
             //
             // TODO: In addition to leftMaxBits < rightMinBits, we could
             //       handle the reverse, and also special cases like all bits
             //       being 1 on the right, things like (x & 255) <= 255  ->  1
             if (curr->op == Abstract::getBinary(type, Eq) ||
                 curr->op == Abstract::getBinary(type, GtU) ||
                 curr->op == Abstract::getBinary(type, GeU) ||
                 (!rightIsNegative &&
                  (curr->op == Abstract::getBinary(type, GtS) ||
                   curr->op == Abstract::getBinary(type, GeS)))) {
               return getDroppedChildrenAndAppend(curr,
                                                  Literal::makeZero(Type::i32));
             }

             // And some are obviously true:
             //             x != y
             //   (unsigned)x <  y
             //   (unsigned)x <= y
             // and likewise for signed, as above.
             if (curr->op == Abstract::getBinary(type, Ne) ||
                 curr->op == Abstract::getBinary(type, LtU) ||
                 curr->op == Abstract::getBinary(type, LeU) ||
                 (!rightIsNegative &&
                  (curr->op == Abstract::getBinary(type, LtS) ||
                   curr->op == Abstract::getBinary(type, LeS)))) {
               return getDroppedChildrenAndAppend(curr,
                                                  Literal::makeOne(Type::i32));
             }

             // For truly signed comparisons, where y's sign bit is set, we can
             // also infer some things, since we know y is signed but x is not
             // (since x does not have enough bits for the sign bit to be set).
             if (rightIsNegative) {
               //   (signed, non-negative)x >  (negative)y   =>   1
               //   (signed, non-negative)x >= (negative)y   =>   1
               if (curr->op == Abstract::getBinary(type, GtS) ||
                   curr->op == Abstract::getBinary(type, GeS)) {
                 return getDroppedChildrenAndAppend(curr,
                                                    Literal::makeOne(Type::i32));
               }
               //   (signed, non-negative)x <  (negative)y   =>   0
               //   (signed, non-negative)x <= (negative)y   =>   0
               if (curr->op == Abstract::getBinary(type, LtS) ||
                   curr->op == Abstract::getBinary(type, LeS)) {
                 return getDroppedChildrenAndAppend(
                   curr, Literal::makeZero(Type::i32));
               }
             }
           }
         }
       }
     }
     return nullptr;
   }

   Expression* simplifyRoundingsAndConversions(Unary* curr) {
     using namespace Abstract;
     using namespace Match;

     switch (curr->op) {
       case TruncSFloat64ToInt32:
       case TruncSatSFloat64ToInt32: {
         // i32 -> f64 -> i32 rountripping optimization:
         //   i32.trunc(_sat)_f64_s(f64.convert_i32_s(x))  ==>  x
         Expression* x;
         if (matches(curr->value, unary(ConvertSInt32ToFloat64, any(&x)))) {
           return x;
         }
         break;
       }
       case TruncUFloat64ToInt32:
       case TruncSatUFloat64ToInt32: {
         // u32 -> f64 -> u32 rountripping optimization:
         //   i32.trunc(_sat)_f64_u(f64.convert_i32_u(x))  ==>  x
         Expression* x;
         if (matches(curr->value, unary(ConvertUInt32ToFloat64, any(&x)))) {
           return x;
         }
         break;
       }
       case CeilFloat32:
       case CeilFloat64:
       case FloorFloat32:
       case FloorFloat64:
       case TruncFloat32:
       case TruncFloat64:
       case NearestFloat32:
       case NearestFloat64: {
         // Rounding after integer to float conversion may be skipped
         //   ceil(float(int(x)))     ==>  float(int(x))
         //   floor(float(int(x)))    ==>  float(int(x))
         //   trunc(float(int(x)))    ==>  float(int(x))
         //   nearest(float(int(x)))  ==>  float(int(x))
         Unary* inner;
         if (matches(curr->value, unary(&inner, any()))) {
           switch (inner->op) {
             case ConvertSInt32ToFloat32:
             case ConvertSInt32ToFloat64:
             case ConvertUInt32ToFloat32:
             case ConvertUInt32ToFloat64:
             case ConvertSInt64ToFloat32:
             case ConvertSInt64ToFloat64:
             case ConvertUInt64ToFloat32:
             case ConvertUInt64ToFloat64: {
               return inner;
             }
             default: {
             }
           }
         }
         break;
       }
       default: {
       }
     }
     return nullptr;
   }

   Expression* deduplicateUnary(Unary* unaryOuter) {
     if (auto* unaryInner = unaryOuter->value->dynCast<Unary>()) {
       if (unaryInner->op == unaryOuter->op) {
         switch (unaryInner->op) {
           case NegFloat32:
           case NegFloat64: {
             // neg(neg(x))  ==>   x
             return unaryInner->value;
           }
           case AbsFloat32:
           case CeilFloat32:
           case FloorFloat32:
           case TruncFloat32:
           case NearestFloat32:
           case AbsFloat64:
           case CeilFloat64:
           case FloorFloat64:
           case TruncFloat64:
           case NearestFloat64: {
             // unaryOp(unaryOp(x))  ==>   unaryOp(x)
             return unaryInner;
           }
           case ExtendS8Int32:
           case ExtendS16Int32: {
             assert(getModule()->features.hasSignExt());
             return unaryInner;
           }
           case EqZInt32: {
             // eqz(eqz(bool(x)))  ==>   bool(x)
             if (Bits::getMaxBits(unaryInner->value, this) == 1) {
               return unaryInner->value;
             }
             break;
           }
           default: {
           }
         }
       }
     }
     return nullptr;
   }

   Expression* deduplicateBinary(Binary* outer) {
     Type type = outer->type;
     if (type.isInteger()) {
       if (auto* inner = outer->right->dynCast<Binary>()) {
         if (outer->op == inner->op) {
           if (!EffectAnalyzer(getPassOptions(), *getModule(), outer->left)
                  .hasSideEffects()) {
             if (ExpressionAnalyzer::equal(inner->left, outer->left)) {
               // x - (x - y)  ==>   y
               // x ^ (x ^ y)  ==>   y
               if (outer->op == Abstract::getBinary(type, Abstract::Sub) ||
                   outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->right;
               }
               // x & (x & y)  ==>   x & y
               // x | (x | y)  ==>   x | y
               if (outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
             if (ExpressionAnalyzer::equal(inner->right, outer->left) &&
                 canReorder(outer->left, inner->left)) {
               // x ^ (y ^ x)  ==>   y
               // (note that we need the check for reordering here because if
               // e.g. y writes to a local that x reads, the second appearance
               // of x would be different from the first)
               if (outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->left;
               }

               // x & (y & x)  ==>   y & x
               // x | (y | x)  ==>   y | x
               // (here we need the check for reordering for the more obvious
               // reason that previously x appeared before y, and now y appears
               // first; or, if we tried to emit x [&|] y here, reversing the
               // order, we'd be in the same situation as the previous comment)
               if (outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
           }
         }
       }
       if (auto* inner = outer->left->dynCast<Binary>()) {
         if (outer->op == inner->op) {
           if (!EffectAnalyzer(getPassOptions(), *getModule(), outer->right)
                  .hasSideEffects()) {
             if (ExpressionAnalyzer::equal(inner->right, outer->right)) {
               // (x ^ y) ^ y  ==>   x
               if (outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->left;
               }
               // (x % y) % y  ==>   x % y
               // (x & y) & y  ==>   x & y
               // (x | y) | y  ==>   x | y
               if (outer->op == Abstract::getBinary(type, Abstract::RemS) ||
                   outer->op == Abstract::getBinary(type, Abstract::RemU) ||
                   outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
             // See comments in the parallel code earlier about ordering here.
             if (ExpressionAnalyzer::equal(inner->left, outer->right) &&
                 canReorder(inner->left, inner->right)) {
               // (x ^ y) ^ x  ==>   y
               if (outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->right;
               }
               // (x & y) & x  ==>   x & y
               // (x | y) | x  ==>   x | y
               if (outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
           }
         }
       }
     }
     return nullptr;
   }

   // given a relational binary with a const on both sides, combine the constants
   // left is also a binary, and has a constant; right may be just a constant, in
   // which case right is nullptr
   Expression* combineRelationalConstants(Binary* binary,
                                          Binary* left,
                                          Const* leftConst,
                                          Binary* right,
                                          Const* rightConst) {
     auto type = binary->right->type;
     // we fold constants to the right
     Literal extra = leftConst->value;
     if (left->op == Abstract::getBinary(type, Abstract::Sub)) {
       extra = extra.neg();
     }
     if (right && right->op == Abstract::getBinary(type, Abstract::Sub)) {
       extra = extra.neg();
     }
     rightConst->value = rightConst->value.sub(extra);
     binary->left = left->left;
     return binary;
   }

   Expression* optimizeMemoryCopy(MemoryCopy* memCopy) {
     auto& options = getPassOptions();

     if (options.ignoreImplicitTraps || options.trapsNeverHappen) {
       if (areConsecutiveInputsEqual(memCopy->dest, memCopy->source)) {
         // memory.copy(x, x, sz)  ==>  {drop(x), drop(x), drop(sz)}
         Builder builder(*getModule());
         return builder.makeBlock({builder.makeDrop(memCopy->dest),
                                   builder.makeDrop(memCopy->source),
                                   builder.makeDrop(memCopy->size)});
       }
     }

     // memory.copy(dst, src, C)  ==>  store(dst, load(src))
     if (auto* csize = memCopy->size->dynCast<Const>()) {
       auto bytes = csize->value.getInteger();
       Builder builder(*getModule());

       switch (bytes) {
         case 0: {
           if (options.ignoreImplicitTraps || options.trapsNeverHappen) {
             // memory.copy(dst, src, 0)  ==>  {drop(dst), drop(src)}
             return builder.makeBlock({builder.makeDrop(memCopy->dest),
                                       builder.makeDrop(memCopy->source)});
           }
           break;
         }
         case 1:
         case 2:
         case 4: {
           return builder.makeStore(bytes, // bytes
                                    0,     // offset
                                    1,     // align
                                    memCopy->dest,
                                    builder.makeLoad(bytes,
                                                     false,
                                                     0,
                                                     1,
                                                     memCopy->source,
                                                     Type::i32,
                                                     memCopy->sourceMemory),
                                    Type::i32,
                                    memCopy->destMemory);
         }
         case 8: {
           return builder.makeStore(bytes, // bytes
                                    0,     // offset
                                    1,     // align
                                    memCopy->dest,
                                    builder.makeLoad(bytes,
                                                     false,
                                                     0,
                                                     1,
                                                     memCopy->source,
                                                     Type::i64,
                                                     memCopy->sourceMemory),
                                    Type::i64,
                                    memCopy->destMemory);
         }
         case 16: {
           if (options.shrinkLevel == 0) {
             // This adds an extra 2 bytes so apply it only for
             // minimal shrink level
             if (getModule()->features.hasSIMD()) {
               return builder.makeStore(bytes, // bytes
                                        0,     // offset
                                        1,     // align
                                        memCopy->dest,
                                        builder.makeLoad(bytes,
                                                         false,
                                                         0,
                                                         1,
                                                         memCopy->source,
                                                         Type::v128,
                                                         memCopy->sourceMemory),
                                        Type::v128,
                                        memCopy->destMemory);
             }
           }
           break;
         }
         default: {
         }
       }
     }
     return nullptr;
   }

   Expression* optimizeMemoryFill(MemoryFill* memFill) {
     if (memFill->type == Type::unreachable) {
       return nullptr;
     }

     if (!memFill->size->is<Const>()) {
       return nullptr;
     }

     auto& options = getPassOptions();
     Builder builder(*getModule());

     auto* csize = memFill->size->cast<Const>();
     auto bytes = csize->value.getInteger();

     if (bytes == 0LL &&
         (options.ignoreImplicitTraps || options.trapsNeverHappen)) {
       // memory.fill(d, v, 0)  ==>  { drop(d), drop(v) }
       return builder.makeBlock(
         {builder.makeDrop(memFill->dest), builder.makeDrop(memFill->value)});
     }

     const uint32_t offset = 0, align = 1;

     if (auto* cvalue = memFill->value->dynCast<Const>()) {
       uint32_t value = cvalue->value.geti32() & 0xFF;
       // memory.fill(d, C1, C2)  ==>
       //   store(d, (C1 & 0xFF) * (-1U / max(bytes)))
       switch (bytes) {
         case 1: {
           return builder.makeStore(1, // bytes
                                    offset,
                                    align,
                                    memFill->dest,
                                    builder.makeConst<uint32_t>(value),
                                    Type::i32,
                                    memFill->memory);
         }
         case 2: {
           return builder.makeStore(2,
                                    offset,
                                    align,
                                    memFill->dest,
                                    builder.makeConst<uint32_t>(value * 0x0101U),
                                    Type::i32,
                                    memFill->memory);
         }
         case 4: {
           // transform only when "value" or shrinkLevel equal to zero due to
           // it could increase size by several bytes
           if (value == 0 || options.shrinkLevel == 0) {
             return builder.makeStore(
               4,
               offset,
               align,
               memFill->dest,
               builder.makeConst<uint32_t>(value * 0x01010101U),
               Type::i32,
               memFill->memory);
           }
           break;
         }
         case 8: {
           // transform only when "value" or shrinkLevel equal to zero due to
           // it could increase size by several bytes
           if (value == 0 || options.shrinkLevel == 0) {
             return builder.makeStore(
               8,
               offset,
               align,
               memFill->dest,
               builder.makeConst<uint64_t>(value * 0x0101010101010101ULL),
               Type::i64,
               memFill->memory);
           }
           break;
         }
         case 16: {
           if (options.shrinkLevel == 0) {
             if (getModule()->features.hasSIMD()) {
               uint8_t values[16];
               std::fill_n(values, 16, (uint8_t)value);
               return builder.makeStore(16,
                                        offset,
                                        align,
                                        memFill->dest,
                                        builder.makeConst<uint8_t[16]>(values),
                                        Type::v128,
                                        memFill->memory);
             } else {
               // { i64.store(d, C', 0), i64.store(d, C', 8) }
               auto destType = memFill->dest->type;
               Index tempLocal = builder.addVar(getFunction(), destType);
               return builder.makeBlock({
                 builder.makeStore(
                   8,
                   offset,
                   align,
                   builder.makeLocalTee(tempLocal, memFill->dest, destType),
                   builder.makeConst<uint64_t>(value * 0x0101010101010101ULL),
                   Type::i64,
                   memFill->memory),
                 builder.makeStore(
                   8,
                   offset + 8,
                   align,
                   builder.makeLocalGet(tempLocal, destType),
                   builder.makeConst<uint64_t>(value * 0x0101010101010101ULL),
                   Type::i64,
                   memFill->memory),
               });
             }
           }
           break;
         }
         default: {
         }
       }
     }
     // memory.fill(d, v, 1)  ==>  store8(d, v)
     if (bytes == 1LL) {
       return builder.makeStore(1,
                                offset,
                                align,
                                memFill->dest,
                                memFill->value,
                                Type::i32,
                                memFill->memory);
     }

     return nullptr;
   }

   // given a binary expression with equal children and no side effects in
   // either, we can fold various things
   Expression* optimizeBinaryWithEqualEffectlessChildren(Binary* binary) {
     // TODO add: perhaps worth doing 2*x if x is quite large?
     switch (binary->op) {
       case SubInt32:
       case XorInt32:
       case SubInt64:
       case XorInt64:
         return LiteralUtils::makeZero(binary->left->type, *getModule());
       case NeInt32:
       case LtSInt32:
       case LtUInt32:
       case GtSInt32:
       case GtUInt32:
       case NeInt64:
       case LtSInt64:
       case LtUInt64:
       case GtSInt64:
       case GtUInt64:
         return LiteralUtils::makeZero(Type::i32, *getModule());
       case AndInt32:
       case OrInt32:
       case AndInt64:
       case OrInt64:
         return binary->left;
       case EqInt32:
       case LeSInt32:
       case LeUInt32:
       case GeSInt32:
       case GeUInt32:
       case EqInt64:
       case LeSInt64:
       case LeUInt64:
       case GeSInt64:
       case GeUInt64:
         return LiteralUtils::makeFromInt32(1, Type::i32, *getModule());
       default:
         return nullptr;
     }
   }

   // Invert (negate) the opcode, so that it has the exact negative meaning as it
   // had before.
   BinaryOp invertBinaryOp(BinaryOp op) {
     switch (op) {
       case EqInt32:
         return NeInt32;
       case NeInt32:
         return EqInt32;
       case LtSInt32:
         return GeSInt32;
       case LtUInt32:
         return GeUInt32;
       case LeSInt32:
         return GtSInt32;
       case LeUInt32:
         return GtUInt32;
       case GtSInt32:
         return LeSInt32;
       case GtUInt32:
         return LeUInt32;
       case GeSInt32:
         return LtSInt32;
       case GeUInt32:
         return LtUInt32;

       case EqInt64:
         return NeInt64;
       case NeInt64:
         return EqInt64;
       case LtSInt64:
         return GeSInt64;
       case LtUInt64:
         return GeUInt64;
       case LeSInt64:
         return GtSInt64;
       case LeUInt64:
         return GtUInt64;
       case GtSInt64:
         return LeSInt64;
       case GtUInt64:
         return LeUInt64;
       case GeSInt64:
         return LtSInt64;
       case GeUInt64:
         return LtUInt64;

       case EqFloat32:
         return NeFloat32;
       case NeFloat32:
         return EqFloat32;

       case EqFloat64:
         return NeFloat64;
       case NeFloat64:
         return EqFloat64;

       default:
         return InvalidBinary;
     }
   }

   // Change the opcode so it is correct after reversing the operands. That is,
   // we had  X OP  Y  and we need OP' so that this is equivalent to that:
   //         Y OP' X
   BinaryOp reverseRelationalOp(BinaryOp op) {
     switch (op) {
       case EqInt32:
         return EqInt32;
       case NeInt32:
         return NeInt32;
       case LtSInt32:
         return GtSInt32;
       case LtUInt32:
         return GtUInt32;
       case LeSInt32:
         return GeSInt32;
       case LeUInt32:
         return GeUInt32;
       case GtSInt32:
         return LtSInt32;
       case GtUInt32:
         return LtUInt32;
       case GeSInt32:
         return LeSInt32;
       case GeUInt32:
         return LeUInt32;

       case EqInt64:
         return EqInt64;
       case NeInt64:
         return NeInt64;
       case LtSInt64:
         return GtSInt64;
       case LtUInt64:
         return GtUInt64;
       case LeSInt64:
         return GeSInt64;
       case LeUInt64:
         return GeUInt64;
       case GtSInt64:
         return LtSInt64;
       case GtUInt64:
         return LtUInt64;
       case GeSInt64:
         return LeSInt64;
       case GeUInt64:
         return LeUInt64;

       case EqFloat32:
         return EqFloat32;
       case NeFloat32:
         return NeFloat32;
       case LtFloat32:
         return GtFloat32;
       case LeFloat32:
         return GeFloat32;
       case GtFloat32:
         return LtFloat32;
       case GeFloat32:
         return LeFloat32;

       case EqFloat64:
         return EqFloat64;
       case NeFloat64:
         return NeFloat64;
       case LtFloat64:
         return GtFloat64;
       case LeFloat64:
         return GeFloat64;
       case GtFloat64:
         return LtFloat64;
       case GeFloat64:
         return LeFloat64;

       default:
         return InvalidBinary;
     }
   }

   BinaryOp makeUnsignedBinaryOp(BinaryOp op) {
     switch (op) {
       case DivSInt32:
         return DivUInt32;
       case RemSInt32:
         return RemUInt32;
       case ShrSInt32:
         return ShrUInt32;
       case LtSInt32:
         return LtUInt32;
       case LeSInt32:
         return LeUInt32;
       case GtSInt32:
         return GtUInt32;
       case GeSInt32:
         return GeUInt32;

       case DivSInt64:
         return DivUInt64;
       case RemSInt64:
         return RemUInt64;
       case ShrSInt64:
         return ShrUInt64;
       case LtSInt64:
         return LtUInt64;
       case LeSInt64:
         return LeUInt64;
       case GtSInt64:
         return GtUInt64;
       case GeSInt64:
         return GeUInt64;

       default:
         return InvalidBinary;
     }
   }

   bool shouldCanonicalize(Binary* binary) {
     if ((binary->op == SubInt32 || binary->op == SubInt64) &&
         binary->right->is<Const>() && !binary->left->is<Const>()) {
       return true;
     }
     if (Properties::isSymmetric(binary) || binary->isRelational()) {
       return true;
     }
     switch (binary->op) {
       case SubFloat32:
       case SubFloat64: {
         // Should apply  x - C  ->  x + (-C)
         return binary->right->is<Const>();
       }
       case AddFloat32:
       case MulFloat32:
       case AddFloat64:
       case MulFloat64: {
         // If the LHS is known to be non-NaN, the operands can commute.
         // We don't care about the RHS because right now we only know if
         // an expression is non-NaN if it is constant, but if the RHS is
         // constant, then this expression is already canonicalized.
         if (auto* c = binary->left->dynCast<Const>()) {
           return !c->value.isNaN();
         }
         return false;
       }
       default:
         return false;
     }
   }

   // Optimize an if-else or a select, something with a condition and two
   // arms with outputs.
   template<typename T> void optimizeTernary(T* curr) {
     using namespace Abstract;
     using namespace Match;
     Builder builder(*getModule());

     // If one arm is an operation and the other is an appropriate constant, we
     // can move the operation outside (where it may be further optimized), e.g.
     //
     //  (select
     //    (i32.eqz (X))
     //    (i32.const 0|1)
     //    (Y)
     //  )
     // =>
     //  (i32.eqz
     //    (select
     //      (X)
     //      (i32.const 1|0)
     //      (Y)
     //    )
     //  )
     //
     // Ignore unreachable code here; leave that for DCE.
     if (curr->type != Type::unreachable &&
         curr->ifTrue->type != Type::unreachable &&
         curr->ifFalse->type != Type::unreachable) {
       Unary* un;
       Const* c;
       auto check = [&](Expression* a, Expression* b) {
         return matches(b, bval(&c)) && matches(a, unary(&un, EqZ, any()));
       };
       if (check(curr->ifTrue, curr->ifFalse) ||
           check(curr->ifFalse, curr->ifTrue)) {
         // The new type of curr will be that of the value of the unary, as after
         // we move the unary out, its value is curr's direct child.
         auto newType = un->value->type;
         auto updateArm = [&](Expression* arm) -> Expression* {
           if (arm == un) {
             // This is the arm that had the eqz, which we need to remove.
             return un->value;
           } else {
             // This is the arm with the constant, which we need to flip.
             // Note that we also need to set the type to match the other arm.
             c->value =
               Literal::makeFromInt32(1 - c->value.getInteger(), newType);
             c->type = newType;
             return c;
           }
         };
         curr->ifTrue = updateArm(curr->ifTrue);
         curr->ifFalse = updateArm(curr->ifFalse);
         un->value = curr;
         curr->finalize();
         return replaceCurrent(un);
       }
     }

     {
       // Identical code on both arms can be folded out, e.g.
       //
       //  (select
       //    (i32.eqz (X))
       //    (i32.eqz (Y))
       //    (Z)
       //  )
       // =>
       //  (i32.eqz
       //    (select
       //      (X)
       //      (Y)
       //      (Z)
       //    )
       //  )
       //
       // Continue doing this while we can, noting the chain of moved expressions
       // as we go, then do a single replaceCurrent() at the end.
       SmallVector<Expression*, 1> chain;
       while (1) {
         // Ignore control flow structures (which are handled in MergeBlocks).
         if (!Properties::isControlFlowStructure(curr->ifTrue) &&
             ExpressionAnalyzer::shallowEqual(curr->ifTrue, curr->ifFalse)) {
           // TODO: consider the case with more than one child.
           ChildIterator ifTrueChildren(curr->ifTrue);
           if (ifTrueChildren.children.size() == 1) {
             // ifTrue and ifFalse's children will become the direct children of
             // curr, and so they must be compatible to allow for a proper new
             // type after the transformation.
             //
             // At minimum an LUB is required, as shown here:
             //
             //  (if
             //    (condition)
             //    (drop (i32.const 1))
             //    (drop (f64.const 2.0))
             //  )
             //
             // However, that may not be enough, as with nominal types we can
             // have things like this:
             //
             //  (if
             //    (condition)
             //    (struct.get $A 1 (..))
             //    (struct.get $B 1 (..))
             //  )
             //
             // It is possible that the LUB of $A and $B does not contain field
             // "1". With structural types this specific problem is not possible,
             // and it appears to be the case that with the GC MVP there is no
             // instruction that poses a problem, but in principle it can happen
             // there as well, if we add an instruction that returns the number
             // of fields in a type, for example. For that reason, and to avoid
             // a difference between structural and nominal typing here, disallow
             // subtyping in both. (Note: In that example, the problem only
             // happens because the type is not part of the struct.get - we infer
             // it from the reference. That is why after hoisting the struct.get
             // out, and computing a new type for the if that is now the child of
             // the single struct.get, we get a struct.get of a supertype. So in
             // principle we could fix this by modifying the IR as well, but the
             // problem is more general, so avoid that.)
             ChildIterator ifFalseChildren(curr->ifFalse);
             auto* ifTrueChild = *ifTrueChildren.begin();
             auto* ifFalseChild = *ifFalseChildren.begin();
             bool validTypes = ifTrueChild->type == ifFalseChild->type;

             // In addition, after we move code outside of curr then we need to
             // not change unreachability - if we did, we'd need to propagate
             // that further, and we leave such work to DCE and Vacuum anyhow.
             // This can happen in something like this for example, where the
             // outer type changes from i32 to unreachable if we move the
             // returns outside:
             //
             //  (if (result i32)
             //    (local.get $x)
             //    (return
             //      (local.get $y)
             //    )
             //    (return
             //      (local.get $z)
             //    )
             //  )
             assert(curr->ifTrue->type == curr->ifFalse->type);
             auto newOuterType = curr->ifTrue->type;
             if ((newOuterType == Type::unreachable) !=
                 (curr->type == Type::unreachable)) {
               validTypes = false;
             }

             // If the expression we are about to move outside has side effects,
             // then we cannot do so in general with a select: we'd be reducing
             // the amount of the effects as well as moving them. For an if,
             // the side effects execute once, so there is no problem.
             // TODO: handle certain side effects when possible in select
             bool validEffects = std::is_same<T, If>::value ||
                                 !ShallowEffectAnalyzer(
                                    getPassOptions(), *getModule(), curr->ifTrue)
                                    .hasSideEffects();

             // In addition, check for specific limitations of select.
             bool validChildren =
               !std::is_same<T, Select>::value ||
               Properties::canEmitSelectWithArms(ifTrueChild, ifFalseChild);

             if (validTypes && validEffects && validChildren) {
               // Replace ifTrue with its child.
               curr->ifTrue = ifTrueChild;
               // Relace ifFalse with its child, and reuse that node outside.
               auto* reuse = curr->ifFalse;
               curr->ifFalse = ifFalseChild;
               // curr's type may have changed, if the instructions we moved out
               // had different input types than output types.
               curr->finalize();
               // Point to curr from the code that is now outside of it.
               *ChildIterator(reuse).begin() = curr;
               if (!chain.empty()) {
                 // We've already moved things out, so chain them to there. That
                 // is, the end of the chain should now point to reuse (which
                 // in turn already points to curr).
                 *ChildIterator(chain.back()).begin() = reuse;
               }
               chain.push_back(reuse);
               continue;
             }
           }
         }
         break;
       }
       if (!chain.empty()) {
         // The beginning of the chain is the new top parent.
         return replaceCurrent(chain[0]);
       }
     }
   }
 };

 Pass* createOptimizeInstructionsPass() { return new OptimizeInstructions; }

 } // namespace wasm