src/passes/OptimizeInstructions.cpp - external/github.com/WebAssembly/binaryen - Git at Google

 /*
  * Copyright 2016 WebAssembly Community Group participants
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 //
 // Optimize combinations of instructions
 //

 #include <algorithm>
 #include <cmath>
 #include <type_traits>

 #include <ir/abstract.h>
 #include <ir/bits.h>
 #include <ir/cost.h>
 #include <ir/effects.h>
 #include <ir/find_all.h>
 #include <ir/gc-type-utils.h>
 #include <ir/iteration.h>
 #include <ir/literal-utils.h>
 #include <ir/load-utils.h>
 #include <ir/manipulation.h>
 #include <ir/match.h>
 #include <ir/properties.h>
 #include <ir/utils.h>
 #include <pass.h>
 #include <support/threads.h>
 #include <wasm.h>

 // TODO: Use the new sign-extension opcodes where appropriate. This needs to be
 // conditionalized on the availability of atomics.

 namespace wasm {

 Name I32_EXPR = "i32.expr";
 Name I64_EXPR = "i64.expr";
 Name F32_EXPR = "f32.expr";
 Name F64_EXPR = "f64.expr";
 Name ANY_EXPR = "any.expr";

 // Useful information about locals
 struct LocalInfo {
   static const Index kUnknown = Index(-1);

   Index maxBits;
   Index signExtedBits;
 };

 struct LocalScanner : PostWalker<LocalScanner> {
   std::vector<LocalInfo>& localInfo;
   const PassOptions& passOptions;

   LocalScanner(std::vector<LocalInfo>& localInfo,
                const PassOptions& passOptions)
     : localInfo(localInfo), passOptions(passOptions) {}

   void doWalkFunction(Function* func) {
     // prepare
     localInfo.resize(func->getNumLocals());
     for (Index i = 0; i < func->getNumLocals(); i++) {
       auto& info = localInfo[i];
       if (func->isParam(i)) {
         info.maxBits = getBitsForType(func->getLocalType(i)); // worst-case
         info.signExtedBits = LocalInfo::kUnknown; // we will never know anything
       } else {
         info.maxBits = info.signExtedBits = 0; // we are open to learning
       }
     }
     // walk
     PostWalker<LocalScanner>::doWalkFunction(func);
     // finalize
     for (Index i = 0; i < func->getNumLocals(); i++) {
       auto& info = localInfo[i];
       if (info.signExtedBits == LocalInfo::kUnknown) {
         info.signExtedBits = 0;
       }
     }
   }

   void visitLocalSet(LocalSet* curr) {
     auto* func = getFunction();
     if (func->isParam(curr->index)) {
       return;
     }
     auto type = getFunction()->getLocalType(curr->index);
     if (type != Type::i32 && type != Type::i64) {
       return;
     }
     // an integer var, worth processing
     auto* value = Properties::getFallthrough(
       curr->value, passOptions, getModule()->features);
     auto& info = localInfo[curr->index];
     info.maxBits = std::max(info.maxBits, Bits::getMaxBits(value, this));
     auto signExtBits = LocalInfo::kUnknown;
     if (Properties::getSignExtValue(value)) {
       signExtBits = Properties::getSignExtBits(value);
     } else if (auto* load = value->dynCast<Load>()) {
       if (LoadUtils::isSignRelevant(load) && load->signed_) {
         signExtBits = load->bytes * 8;
       }
     }
     if (info.signExtedBits == 0) {
       info.signExtedBits = signExtBits; // first info we see
     } else if (info.signExtedBits != signExtBits) {
       // contradictory information, give up
       info.signExtedBits = LocalInfo::kUnknown;
     }
   }

   // define this for the templated getMaxBits method. we know nothing here yet
   // about locals, so return the maxes
   Index getMaxBitsForLocal(LocalGet* get) { return getBitsForType(get->type); }

   Index getBitsForType(Type type) {
     if (!type.isBasic()) {
       return -1;
     }
     switch (type.getBasic()) {
       case Type::i32:
         return 32;
       case Type::i64:
         return 64;
       default:
         return -1;
     }
   }
 };

 namespace {
 // perform some final optimizations
 struct FinalOptimizer : public PostWalker<FinalOptimizer> {
   const PassOptions& passOptions;

   FinalOptimizer(const PassOptions& passOptions) : passOptions(passOptions) {}

   void visitBinary(Binary* curr) {
     if (auto* replacement = optimize(curr)) {
       replaceCurrent(replacement);
     }
   }

   Binary* optimize(Binary* curr) {
     using namespace Abstract;
     using namespace Match;
     {
       Const* c;
       if (matches(curr, binary(Add, any(), ival(&c)))) {
         // normalize x + (-C)  ==>   x - C
         if (c->value.isNegative()) {
           c->value = c->value.neg();
           curr->op = Abstract::getBinary(c->type, Sub);
         }
         // Wasm binary encoding uses signed LEBs, which slightly favor negative
         // numbers: -64 is more efficient than +64 etc., as well as other powers
         // of two 7 bits etc. higher. we therefore prefer x - -64 over x + 64.
         // in theory we could just prefer negative numbers over positive, but
         // that can have bad effects on gzip compression (as it would mean more
         // subtractions than the more common additions).
         int64_t value = c->value.getInteger();
         if (value == 0x40LL || value == 0x2000LL || value == 0x100000LL ||
             value == 0x8000000LL || value == 0x400000000LL ||
             value == 0x20000000000LL || value == 0x1000000000000LL ||
             value == 0x80000000000000LL || value == 0x4000000000000000LL) {
           c->value = c->value.neg();
           if (curr->op == Abstract::getBinary(c->type, Add)) {
             curr->op = Abstract::getBinary(c->type, Sub);
           } else {
             curr->op = Abstract::getBinary(c->type, Add);
           }
         }
         return curr;
       }
     }
     return nullptr;
   }
 };

 } // anonymous namespace

 // Create a custom matcher for checking side effects
 template<class Opt> struct PureMatcherKind {};
 template<class Opt>
 struct Match::Internal::KindTypeRegistry<PureMatcherKind<Opt>> {
   using matched_t = Expression*;
   using data_t = Opt*;
 };
 template<class Opt> struct Match::Internal::MatchSelf<PureMatcherKind<Opt>> {
   bool operator()(Expression* curr, Opt* opt) {
     return !opt->effects(curr).hasSideEffects();
   }
 };

 // Main pass class
 struct OptimizeInstructions
   : public WalkerPass<PostWalker<OptimizeInstructions>> {
   bool isFunctionParallel() override { return true; }

   Pass* create() override { return new OptimizeInstructions; }

   bool fastMath;

   void doWalkFunction(Function* func) {
     fastMath = getPassOptions().fastMath;
     // first, scan locals
     {
       LocalScanner scanner(localInfo, getPassOptions());
       scanner.setModule(getModule());
       scanner.walkFunction(func);
     }
     // main walk
     super::doWalkFunction(func);
     {
       FinalOptimizer optimizer(getPassOptions());
       optimizer.walkFunction(func);
     }
   }

   // Set to true when one of the visitors makes a change (either replacing the
   // node or modifying it).
   bool changed;

   // Used to avoid recursion in replaceCurrent, see below.
   bool inReplaceCurrent = false;

   void replaceCurrent(Expression* rep) {
     WalkerPass<PostWalker<OptimizeInstructions>>::replaceCurrent(rep);
     // We may be able to apply multiple patterns as one may open opportunities
     // for others. NB: patterns must not have cycles

     // To avoid recursion, this uses the following pattern: the initial call to
     // this method comes from one of the visit*() methods. We then loop in here,
     // and if we are called again we set |changed| instead of recursing, so that
     // we can loop on that value.
     if (inReplaceCurrent) {
       // We are in the loop below so just note a change and return to there.
       changed = true;
       return;
     }
     // Loop on further changes.
     inReplaceCurrent = true;
     do {
       changed = false;
       visit(getCurrent());
     } while (changed);
     inReplaceCurrent = false;
   }

   EffectAnalyzer effects(Expression* expr) {
     return EffectAnalyzer(getPassOptions(), getModule()->features, expr);
   }

   decltype(auto) pure(Expression** binder) {
     using namespace Match::Internal;
     return Matcher<PureMatcherKind<OptimizeInstructions>>(binder, this);
   }

   bool canReorder(Expression* a, Expression* b) {
     return EffectAnalyzer::canReorder(
       getPassOptions(), getModule()->features, a, b);
   }

   void visitBinary(Binary* curr) {
     // If this contains dead code, don't bother trying to optimize it, the type
     // might change (if might not be unreachable if just one arm is, for
     // example). This optimization pass focuses on actually executing code.
     if (curr->type == Type::unreachable) {
       return;
     }

     if (shouldCanonicalize(curr)) {
       canonicalize(curr);
     }

     {
       // TODO: It is an ongoing project to port more transformations to the
       // match API. Once most of the transformations have been ported, the
       // `using namespace Match` can be hoisted to function scope and this extra
       // block scope can be removed.
       using namespace Match;
       using namespace Abstract;
       Builder builder(*getModule());
       {
         // try to get rid of (0 - ..), that is, a zero only used to negate an
         // int. an add of a subtract can be flipped in order to remove it:
         //   (ival.add
         //     (ival.sub
         //       (ival.const 0)
         //       X
         //     )
         //     Y
         //   )
         // =>
         //   (ival.sub
         //     Y
         //     X
         //   )
         // Note that this reorders X and Y, so we need to be careful about that.
         Expression *x, *y;
         Binary* sub;
         if (matches(
               curr,
               binary(Add, binary(&sub, Sub, ival(0), any(&x)), any(&y))) &&
             canReorder(x, y)) {
           sub->left = y;
           sub->right = x;
           return replaceCurrent(sub);
         }
       }
       {
         // The flip case is even easier, as no reordering occurs:
         //   (ival.add
         //     Y
         //     (ival.sub
         //       (ival.const 0)
         //       X
         //     )
         //   )
         // =>
         //   (ival.sub
         //     Y
         //     X
         //   )
         Expression* y;
         Binary* sub;
         if (matches(curr,
                     binary(Add, any(&y), binary(&sub, Sub, ival(0), any())))) {
           sub->left = y;
           return replaceCurrent(sub);
         }
       }
       {
         // try de-morgan's AND law,
         //  (eqz X) and (eqz Y) === eqz (X or Y)
         // Note that the OR and XOR laws do not work here, as these
         // are not booleans (we could check if they are, but a boolean
         // would already optimize with the eqz anyhow, unless propagating).
         // But for AND, the left is true iff X and Y are each all zero bits,
         // and the right is true if the union of their bits is zero; same.
         Unary* un;
         Binary* bin;
         Expression *x, *y;
         if (matches(curr,
                     binary(&bin,
                            AndInt32,
                            unary(&un, EqZInt32, any(&x)),
                            unary(EqZInt32, any(&y))))) {
           bin->op = OrInt32;
           bin->left = x;
           bin->right = y;
           un->value = bin;
           return replaceCurrent(un);
         }
       }
       {
         // x <<>> (C & (31 | 63))   ==>   x <<>> C'
         // x <<>> (y & (31 | 63))   ==>   x <<>> y
         // x <<>> (y & (32 | 64))   ==>   x
         // where '<<>>':
         //   '<<', '>>', '>>>'. 'rotl' or 'rotr'
         BinaryOp op;
         Const* c;
         Expression *x, *y;

         // x <<>> C
         if (matches(curr, binary(&op, any(&x), ival(&c))) &&
             Abstract::hasAnyShift(op)) {
           // truncate RHS constant to effective size as:
           // i32(x) <<>> const(C & 31))
           // i64(x) <<>> const(C & 63))
           c->value = c->value.and_(
             Literal::makeFromInt32(c->type.getByteSize() * 8 - 1, c->type));
           // x <<>> 0   ==>   x
           if (c->value.isZero()) {
             return replaceCurrent(x);
           }
         }
         if (matches(curr,
                     binary(&op, any(&x), binary(And, any(&y), ival(&c)))) &&
             Abstract::hasAnyShift(op)) {
           // i32(x) <<>> (y & 31)   ==>   x <<>> y
           // i64(x) <<>> (y & 63)   ==>   x <<>> y
           if ((c->type == Type::i32 && (c->value.geti32() & 31) == 31) ||
               (c->type == Type::i64 && (c->value.geti64() & 63LL) == 63LL)) {
             curr->cast<Binary>()->right = y;
             return replaceCurrent(curr);
           }
           // i32(x) <<>> (y & C)   ==>   x,  where (C & 31) == 0
           // i64(x) <<>> (y & C)   ==>   x,  where (C & 63) == 0
           if (((c->type == Type::i32 && (c->value.geti32() & 31) == 0) ||
                (c->type == Type::i64 && (c->value.geti64() & 63LL) == 0LL)) &&
               !effects(y).hasSideEffects()) {
             return replaceCurrent(x);
           }
         }
       }
       {
         // unsigned(x) >= 0   =>   i32(1)
         Const* c;
         Expression* x;
         if (matches(curr, binary(GeU, pure(&x), ival(&c))) &&
             c->value.isZero()) {
           c->value = Literal::makeOne(Type::i32);
           c->type = Type::i32;
           return replaceCurrent(c);
         }
         // unsigned(x) < 0   =>   i32(0)
         if (matches(curr, binary(LtU, pure(&x), ival(&c))) &&
             c->value.isZero()) {
           c->value = Literal::makeZero(Type::i32);
           c->type = Type::i32;
           return replaceCurrent(c);
         }
       }
     }
     if (auto* ext = Properties::getAlmostSignExt(curr)) {
       Index extraLeftShifts;
       auto bits = Properties::getAlmostSignExtBits(curr, extraLeftShifts);
       if (extraLeftShifts == 0) {
         if (auto* load = Properties::getFallthrough(
                            ext, getPassOptions(), getModule()->features)
                            ->dynCast<Load>()) {
           // pattern match a load of 8 bits and a sign extend using a shl of
           // 24 then shr_s of 24 as well, etc.
           if (LoadUtils::canBeSigned(load) &&
               ((load->bytes == 1 && bits == 8) ||
                (load->bytes == 2 && bits == 16))) {
             // if the value falls through, we can't alter the load, as it
             // might be captured in a tee
             if (load->signed_ == true || load == ext) {
               load->signed_ = true;
               return replaceCurrent(ext);
             }
           }
         }
       }
       // We can in some cases remove part of a sign extend, that is,
       //   (x << A) >> B   =>   x << (A - B)
       // If the sign-extend input cannot have a sign bit, we don't need it.
       if (Bits::getMaxBits(ext, this) + extraLeftShifts < bits) {
         return replaceCurrent(removeAlmostSignExt(curr));
       }
       // We also don't need it if it already has an identical-sized sign
       // extend applied to it. That is, if it is already a sign-extended
       // value, then another sign extend will do nothing. We do need to be
       // careful of the extra shifts, though.
       if (isSignExted(ext, bits) && extraLeftShifts == 0) {
         return replaceCurrent(removeAlmostSignExt(curr));
       }
     } else if (curr->op == EqInt32 || curr->op == NeInt32) {
       if (auto* c = curr->right->dynCast<Const>()) {
         if (auto* ext = Properties::getSignExtValue(curr->left)) {
           // We are comparing a sign extend to a constant, which means we can
           // use a cheaper zero-extend in some cases. That is,
           //  (x << S) >> S ==/!= C    =>    x & T ==/!= C
           // where S and T are the matching values for sign/zero extend of the
           // same size. For example, for an effective 8-bit value:
           //  (x << 24) >> 24 ==/!= C    =>    x & 255 ==/!= C
           //
           // The key thing to track here are the upper bits plus the sign bit;
           // call those the "relevant bits". This is crucial because x is
           // sign-extended, that is, its effective sign bit is spread to all
           // the upper bits, which means that the relevant bits on the left
           // side are either all 0, or all 1.
           auto bits = Properties::getSignExtBits(curr->left);
           uint32_t right = c->value.geti32();
           uint32_t numRelevantBits = 32 - bits + 1;
           uint32_t setRelevantBits =
             Bits::popCount(right >> uint32_t(bits - 1));
           // If all the relevant bits on C are zero
           // then we can mask off the high bits instead of sign-extending x.
           // This is valid because if x is negative, then the comparison was
           // false before (negative vs positive), and will still be false
           // as the sign bit will remain to cause a difference. And if x is
           // positive then the upper bits would be zero anyhow.
           if (setRelevantBits == 0) {
             curr->left = makeZeroExt(ext, bits);
             return replaceCurrent(curr);
           } else if (setRelevantBits == numRelevantBits) {
             // If all those bits are one, then we can do something similar if
             // we also zero-extend on the right as well. This is valid
             // because, as in the previous case, the sign bit differentiates
             // the two sides when they are different, and if the sign bit is
             // identical, then the upper bits don't matter, so masking them
             // off both sides is fine.
             curr->left = makeZeroExt(ext, bits);
             c->value = c->value.and_(Literal(Bits::lowBitMask(bits)));
             return replaceCurrent(curr);
           } else {
             // Otherwise, C's relevant bits are mixed, and then the two sides
             // can never be equal, as the left side's bits cannot be mixed.
             Builder builder(*getModule());
             // The result is either always true, or always false.
             c->value = Literal::makeFromInt32(curr->op == NeInt32, c->type);
             return replaceCurrent(
               builder.makeSequence(builder.makeDrop(ext), c));
           }
         }
       } else if (auto* left = Properties::getSignExtValue(curr->left)) {
         if (auto* right = Properties::getSignExtValue(curr->right)) {
           auto bits = Properties::getSignExtBits(curr->left);
           if (Properties::getSignExtBits(curr->right) == bits) {
             // we are comparing two sign-exts with the same bits, so we may as
             // well replace both with cheaper zexts
             curr->left = makeZeroExt(left, bits);
             curr->right = makeZeroExt(right, bits);
             return replaceCurrent(curr);
           }
         } else if (auto* load = curr->right->dynCast<Load>()) {
           // we are comparing a load to a sign-ext, we may be able to switch
           // to zext
           auto leftBits = Properties::getSignExtBits(curr->left);
           if (load->signed_ && leftBits == load->bytes * 8) {
             load->signed_ = false;
             curr->left = makeZeroExt(left, leftBits);
             return replaceCurrent(curr);
           }
         }
       } else if (auto* load = curr->left->dynCast<Load>()) {
         if (auto* right = Properties::getSignExtValue(curr->right)) {
           // we are comparing a load to a sign-ext, we may be able to switch
           // to zext
           auto rightBits = Properties::getSignExtBits(curr->right);
           if (load->signed_ && rightBits == load->bytes * 8) {
             load->signed_ = false;
             curr->right = makeZeroExt(right, rightBits);
             return replaceCurrent(curr);
           }
         }
       }
       // note that both left and right may be consts, but then we let
       // precompute compute the constant result
     } else if (curr->op == AddInt32 || curr->op == AddInt64 ||
                curr->op == SubInt32 || curr->op == SubInt64) {
       if (auto* ret = optimizeAddedConstants(curr)) {
         return replaceCurrent(ret);
       }
     } else if (curr->op == MulFloat32 || curr->op == MulFloat64 ||
                curr->op == DivFloat32 || curr->op == DivFloat64) {
       if (curr->left->type == curr->right->type) {
         if (auto* leftUnary = curr->left->dynCast<Unary>()) {
           if (leftUnary->op == Abstract::getUnary(curr->type, Abstract::Abs)) {
             if (auto* rightUnary = curr->right->dynCast<Unary>()) {
               if (leftUnary->op == rightUnary->op) { // both are abs ops
                 // abs(x) * abs(y)   ==>   abs(x * y)
                 // abs(x) / abs(y)   ==>   abs(x / y)
                 curr->left = leftUnary->value;
                 curr->right = rightUnary->value;
                 leftUnary->value = curr;
                 return replaceCurrent(leftUnary);
               }
             }
           }
         }
       }
     }
     // a bunch of operations on a constant right side can be simplified
     if (auto* right = curr->right->dynCast<Const>()) {
       if (curr->op == AndInt32) {
         auto mask = right->value.geti32();
         // and with -1 does nothing (common in asm.js output)
         if (mask == -1) {
           return replaceCurrent(curr->left);
         }
         // small loads do not need to be masked, the load itself masks
         if (auto* load = curr->left->dynCast<Load>()) {
           if ((load->bytes == 1 && mask == 0xff) ||
               (load->bytes == 2 && mask == 0xffff)) {
             load->signed_ = false;
             return replaceCurrent(curr->left);
           }
         } else if (auto maskedBits = Bits::getMaskedBits(mask)) {
           if (Bits::getMaxBits(curr->left, this) <= maskedBits) {
             // a mask of lower bits is not needed if we are already smaller
             return replaceCurrent(curr->left);
           }
         }
       }
       // some math operations have trivial results
       if (auto* ret = optimizeWithConstantOnRight(curr)) {
         return replaceCurrent(ret);
       }
       // the square of some operations can be merged
       if (auto* left = curr->left->dynCast<Binary>()) {
         if (left->op == curr->op) {
           if (auto* leftRight = left->right->dynCast<Const>()) {
             if (left->op == AndInt32 || left->op == AndInt64) {
               leftRight->value = leftRight->value.and_(right->value);
               return replaceCurrent(left);
             } else if (left->op == OrInt32 || left->op == OrInt64) {
               leftRight->value = leftRight->value.or_(right->value);
               return replaceCurrent(left);
             } else if (left->op == XorInt32 || left->op == XorInt64) {
               leftRight->value = leftRight->value.xor_(right->value);
               return replaceCurrent(left);
             } else if (left->op == MulInt32 || left->op == MulInt64) {
               leftRight->value = leftRight->value.mul(right->value);
               return replaceCurrent(left);

               // TODO:
               // handle signed / unsigned divisions. They are more complex
             } else if (left->op == ShlInt32 || left->op == ShrUInt32 ||
                        left->op == ShrSInt32 || left->op == ShlInt64 ||
                        left->op == ShrUInt64 || left->op == ShrSInt64) {
               // shifts only use an effective amount from the constant, so
               // adding must be done carefully
               auto total = Bits::getEffectiveShifts(leftRight) +
                            Bits::getEffectiveShifts(right);
               if (total == Bits::getEffectiveShifts(total, right->type)) {
                 // no overflow, we can do this
                 leftRight->value = Literal::makeFromInt32(total, right->type);
                 return replaceCurrent(left);
               } // TODO: handle overflows
             }
           }
         }
       }
       if (right->type == Type::i32) {
         BinaryOp op;
         int32_t c = right->value.geti32();
         // First, try to lower signed operations to unsigned if that is
         // possible. Some unsigned operations like div_u or rem_u are usually
         // faster on VMs. Also this opens more possibilities for further
         // simplifications afterwards.
         if (c >= 0 && (op = makeUnsignedBinaryOp(curr->op)) != InvalidBinary &&
             Bits::getMaxBits(curr->left, this) <= 31) {
           curr->op = op;
         }
         if (c < 0 && c > std::numeric_limits<int32_t>::min() &&
             curr->op == DivUInt32) {
           // u32(x) / C   ==>   u32(x) >= C  iff C > 2^31
           // We avoid applying this for C == 2^31 due to conflict
           // with other rule which transform to more prefereble
           // right shift operation.
           curr->op = c == -1 ? EqInt32 : GeUInt32;
           return replaceCurrent(curr);
         }
         if (Bits::isPowerOf2((uint32_t)c)) {
           switch (curr->op) {
             case MulInt32:
               return replaceCurrent(optimizePowerOf2Mul(curr, (uint32_t)c));
             case RemUInt32:
               return replaceCurrent(optimizePowerOf2URem(curr, (uint32_t)c));
             case DivUInt32:
               return replaceCurrent(optimizePowerOf2UDiv(curr, (uint32_t)c));
             default:
               break;
           }
         }
       }
       if (right->type == Type::i64) {
         BinaryOp op;
         int64_t c = right->value.geti64();
         // See description above for Type::i32
         if (c >= 0 && (op = makeUnsignedBinaryOp(curr->op)) != InvalidBinary &&
             Bits::getMaxBits(curr->left, this) <= 63) {
           curr->op = op;
         }
         if (getPassOptions().shrinkLevel == 0 && c < 0 &&
             c > std::numeric_limits<int64_t>::min() && curr->op == DivUInt64) {
           // u64(x) / C   ==>   u64(u64(x) >= C)  iff C > 2^63
           // We avoid applying this for C == 2^31 due to conflict
           // with other rule which transform to more prefereble
           // right shift operation.
           // And apply this only for shrinkLevel == 0 due to it
           // increasing size by one byte.
           curr->op = c == -1LL ? EqInt64 : GeUInt64;
           curr->type = Type::i32;
           return replaceCurrent(
             Builder(*getModule()).makeUnary(ExtendUInt32, curr));
         }
         if (Bits::isPowerOf2((uint64_t)c)) {
           switch (curr->op) {
             case MulInt64:
               return replaceCurrent(optimizePowerOf2Mul(curr, (uint64_t)c));
             case RemUInt64:
               return replaceCurrent(optimizePowerOf2URem(curr, (uint64_t)c));
             case DivUInt64:
               return replaceCurrent(optimizePowerOf2UDiv(curr, (uint64_t)c));
             default:
               break;
           }
         }
       }
       if (curr->op == DivFloat32) {
         float c = right->value.getf32();
         if (Bits::isPowerOf2InvertibleFloat(c)) {
           return replaceCurrent(optimizePowerOf2FDiv(curr, c));
         }
       }
       if (curr->op == DivFloat64) {
         double c = right->value.getf64();
         if (Bits::isPowerOf2InvertibleFloat(c)) {
           return replaceCurrent(optimizePowerOf2FDiv(curr, c));
         }
       }
     }
     // a bunch of operations on a constant left side can be simplified
     if (curr->left->is<Const>()) {
       if (auto* ret = optimizeWithConstantOnLeft(curr)) {
         return replaceCurrent(ret);
       }
     }
     // bitwise operations
     // for and and or, we can potentially conditionalize
     if (curr->op == AndInt32 || curr->op == OrInt32) {
       if (auto* ret = conditionalizeExpensiveOnBitwise(curr)) {
         return replaceCurrent(ret);
       }
     }
     // for or, we can potentially combine
     if (curr->op == OrInt32) {
       if (auto* ret = combineOr(curr)) {
         return replaceCurrent(ret);
       }
     }
     // relation/comparisons allow for math optimizations
     if (curr->isRelational()) {
       if (auto* ret = optimizeRelational(curr)) {
         return replaceCurrent(ret);
       }
     }
     // finally, try more expensive operations on the curr in
     // the case that they have no side effects
     if (!effects(curr->left).hasSideEffects()) {
       if (ExpressionAnalyzer::equal(curr->left, curr->right)) {
         if (auto* ret = optimizeBinaryWithEqualEffectlessChildren(curr)) {
           return replaceCurrent(ret);
         }
       }
     }

     if (auto* ret = deduplicateBinary(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitUnary(Unary* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     {
       using namespace Match;
       using namespace Abstract;
       Builder builder(*getModule());
       {
         // eqz(x - y)  =>  x == y
         Binary* inner;
         if (matches(curr, unary(EqZ, binary(&inner, Sub, any(), any())))) {
           inner->op = Abstract::getBinary(inner->left->type, Eq);
           inner->type = Type::i32;
           return replaceCurrent(inner);
         }
       }
       {
         // eqz(x + C)  =>  x == -C
         Const* c;
         Binary* inner;
         if (matches(curr, unary(EqZ, binary(&inner, Add, any(), ival(&c))))) {
           c->value = c->value.neg();
           inner->op = Abstract::getBinary(c->type, Eq);
           inner->type = Type::i32;
           return replaceCurrent(inner);
         }
       }
       {
         // eqz((signed)x % C_pot)  =>  eqz(x & (abs(C_pot) - 1))
         Const* c;
         Binary* inner;
         if (matches(curr, unary(EqZ, binary(&inner, RemS, any(), ival(&c)))) &&
             (c->value.isSignedMin() ||
              Bits::isPowerOf2(c->value.abs().getInteger()))) {
           inner->op = Abstract::getBinary(c->type, And);
           if (c->value.isSignedMin()) {
             c->value = Literal::makeSignedMax(c->type);
           } else {
             c->value = c->value.abs().sub(Literal::makeOne(c->type));
           }
           return replaceCurrent(curr);
         }
       }
       {
         // i32.eqz(i32.wrap_i64(x))  =>  i64.eqz(x)
         //   where maxBits(x) <= 32
         Unary* inner;
         Expression* x;
         if (matches(curr, unary(EqZInt32, unary(&inner, WrapInt64, any(&x)))) &&
             Bits::getMaxBits(x, this) <= 32) {
           inner->op = EqZInt64;
           inner->value = x;
           return replaceCurrent(inner);
         }
       }
     }

     if (curr->op == EqZInt32) {
       if (auto* inner = curr->value->dynCast<Binary>()) {
         // Try to invert a relational operation using De Morgan's law
         auto op = invertBinaryOp(inner->op);
         if (op != InvalidBinary) {
           inner->op = op;
           return replaceCurrent(inner);
         }
       }
       // eqz of a sign extension can be of zero-extension
       if (auto* ext = Properties::getSignExtValue(curr->value)) {
         // we are comparing a sign extend to a constant, which means we can
         // use a cheaper zext
         auto bits = Properties::getSignExtBits(curr->value);
         curr->value = makeZeroExt(ext, bits);
         return replaceCurrent(curr);
       }
     } else if (curr->op == AbsFloat32 || curr->op == AbsFloat64) {
       // abs(-x)   ==>   abs(x)
       if (auto* unaryInner = curr->value->dynCast<Unary>()) {
         if (unaryInner->op ==
             Abstract::getUnary(unaryInner->type, Abstract::Neg)) {
           curr->value = unaryInner->value;
           return replaceCurrent(curr);
         }
       }
       // abs(x * x)   ==>   x * x
       // abs(x / x)   ==>   x / x
       if (auto* binary = curr->value->dynCast<Binary>()) {
         if ((binary->op == Abstract::getBinary(binary->type, Abstract::Mul) ||
              binary->op == Abstract::getBinary(binary->type, Abstract::DivS)) &&
             ExpressionAnalyzer::equal(binary->left, binary->right)) {
           return replaceCurrent(binary);
         }
         // abs(0 - x)   ==>   abs(x),
         // only for fast math
         if (fastMath &&
             binary->op == Abstract::getBinary(binary->type, Abstract::Sub)) {
           if (auto* c = binary->left->dynCast<Const>()) {
             if (c->value.isZero()) {
               curr->value = binary->right;
               return replaceCurrent(curr);
             }
           }
         }
       }
     }

     if (auto* ret = deduplicateUnary(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitSelect(Select* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     if (auto* ret = optimizeSelect(curr)) {
       return replaceCurrent(ret);
     }
     optimizeTernary(curr);
   }

   void visitGlobalSet(GlobalSet* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     // optimize out a set of a get
     auto* get = curr->value->dynCast<GlobalGet>();
     if (get && get->name == curr->name) {
       ExpressionManipulator::nop(curr);
       return replaceCurrent(curr);
     }
   }

   void visitIf(If* curr) {
     curr->condition = optimizeBoolean(curr->condition);
     if (curr->ifFalse) {
       if (auto* unary = curr->condition->dynCast<Unary>()) {
         if (unary->op == EqZInt32) {
           // flip if-else arms to get rid of an eqz
           curr->condition = unary->value;
           std::swap(curr->ifTrue, curr->ifFalse);
         }
       }
       if (curr->condition->type != Type::unreachable &&
           ExpressionAnalyzer::equal(curr->ifTrue, curr->ifFalse)) {
         // The sides are identical, so fold. If we can replace the If with one
         // arm and there are no side effects in the condition, replace it. But
         // make sure not to change a concrete expression to an unreachable
         // expression because we want to avoid having to refinalize.
         bool needCondition = effects(curr->condition).hasSideEffects();
         bool wouldBecomeUnreachable =
           curr->type.isConcrete() && curr->ifTrue->type == Type::unreachable;
         Builder builder(*getModule());
         if (!wouldBecomeUnreachable && !needCondition) {
           return replaceCurrent(curr->ifTrue);
         } else if (!wouldBecomeUnreachable) {
           return replaceCurrent(builder.makeSequence(
             builder.makeDrop(curr->condition), curr->ifTrue));
         } else {
           // Emit a block with the original concrete type.
           auto* ret = builder.makeBlock();
           if (needCondition) {
             ret->list.push_back(builder.makeDrop(curr->condition));
           }
           ret->list.push_back(curr->ifTrue);
           ret->finalize(curr->type);
           return replaceCurrent(ret);
         }
       }
       optimizeTernary(curr);
     }
   }

   void visitLocalSet(LocalSet* curr) {
     //   (local.tee (ref.as_non_null ..))
     // can be reordered to
     //   (ref.as_non_null (local.tee ..))
     // if the local is nullable (which it must be until some form of let is
     // added). The reordering allows the ref.as to be potentially optimized
     // further based on where the value flows to.
     if (curr->isTee()) {
       if (auto* as = curr->value->dynCast<RefAs>()) {
         if (as->op == RefAsNonNull &&
             getFunction()->getLocalType(curr->index).isNullable()) {
           curr->value = as->value;
           curr->finalize();
           as->value = curr;
           as->finalize();
           replaceCurrent(as);
         }
       }
     }
   }

   void visitBreak(Break* curr) {
     if (curr->condition) {
       curr->condition = optimizeBoolean(curr->condition);
     }
   }

   void visitLoad(Load* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     optimizeMemoryAccess(curr->ptr, curr->offset);
   }

   void visitStore(Store* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     optimizeMemoryAccess(curr->ptr, curr->offset);
     optimizeStoredValue(curr->value, curr->bytes);
     if (auto* unary = curr->value->dynCast<Unary>()) {
       if (unary->op == WrapInt64) {
         // instead of wrapping to 32, just store some of the bits in the i64
         curr->valueType = Type::i64;
         curr->value = unary->value;
       }
     }
   }

   void optimizeStoredValue(Expression*& value, Index bytes) {
     if (!value->type.isInteger()) {
       return;
     }
     // truncates constant values during stores
     // (i32|i64).store(8|16|32)(p, C)   ==>
     //    (i32|i64).store(8|16|32)(p, C & mask)
     if (auto* c = value->dynCast<Const>()) {
       if (value->type == Type::i64 && bytes == 4) {
         c->value = c->value.and_(Literal(uint64_t(0xffffffff)));
       } else {
         c->value = c->value.and_(
           Literal::makeFromInt32(Bits::lowBitMask(bytes * 8), value->type));
       }
     }
     // stores of fewer bits truncates anyhow
     if (auto* binary = value->dynCast<Binary>()) {
       if (binary->op == AndInt32) {
         if (auto* right = binary->right->dynCast<Const>()) {
           if (right->type == Type::i32) {
             auto mask = right->value.geti32();
             if ((bytes == 1 && mask == 0xff) ||
                 (bytes == 2 && mask == 0xffff)) {
               value = binary->left;
             }
           }
         }
       } else if (auto* ext = Properties::getSignExtValue(binary)) {
         // if sign extending the exact bit size we store, we can skip the
         // extension if extending something bigger, then we just alter bits we
         // don't save anyhow
         if (Properties::getSignExtBits(binary) >= Index(bytes) * 8) {
           value = ext;
         }
       }
     }
   }

   void visitMemoryCopy(MemoryCopy* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     assert(getModule()->features.hasBulkMemory());
     if (auto* ret = optimizeMemoryCopy(curr)) {
       return replaceCurrent(ret);
     }
   }

   void visitRefEq(RefEq* curr) {
     // Identical references compare equal.
     if (areConsecutiveInputsEqual(curr->left, curr->right)) {
       replaceCurrent(
         Builder(*getModule()).makeConst(Literal::makeOne(Type::i32)));
     }
   }

   // If an instruction traps on a null input, there is no need for a
   // ref.as_non_null on that input: we will trap either way (and the binaryen
   // optimizer does not differentiate traps).
   void skipNonNullCast(Expression*& input) {
     while (1) {
       if (auto* as = input->dynCast<RefAs>()) {
         if (as->op == RefAsNonNull) {
           input = as->value;
           continue;
         }
       }
       break;
     }
   }

   void visitStructGet(StructGet* curr) { skipNonNullCast(curr->ref); }

   void visitStructSet(StructSet* curr) {
     skipNonNullCast(curr->ref);

     if (curr->ref->type != Type::unreachable && curr->value->type.isInteger()) {
       const auto& fields = curr->ref->type.getHeapType().getStruct().fields;
       optimizeStoredValue(curr->value, fields[curr->index].getByteSize());
     }
   }

   void visitArrayGet(ArrayGet* curr) { skipNonNullCast(curr->ref); }

   void visitArraySet(ArraySet* curr) {
     skipNonNullCast(curr->ref);

     if (curr->ref->type != Type::unreachable && curr->value->type.isInteger()) {
       auto element = curr->ref->type.getHeapType().getArray().element;
       optimizeStoredValue(curr->value, element.getByteSize());
     }
   }

   void visitArrayLen(ArrayLen* curr) { skipNonNullCast(curr->ref); }

   void visitArrayCopy(ArrayCopy* curr) {
     skipNonNullCast(curr->destRef);
     skipNonNullCast(curr->srcRef);
   }

   void visitRefCast(RefCast* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
     if (getPassOptions().ignoreImplicitTraps) {
       // A ref.cast traps when the RTTs do not line up, which can be of one of
       // two sorts of issues:
       //  1. The value being cast is not even a subtype of the cast type. In
       //     that case the RTTs trivially cannot indicate subtyping, because
       //     RTT subtyping is a subset of static subtyping. For example, maybe
       //     we are trying to cast a {i32} struct to an [f64] array.
       //  2. The value is a subtype of the cast type, but the RTTs still do not
       //     fit. That indicates a difference between RTT subtyping and static
       //     subtyping. That is, the type may be right but the chain of rtt.subs
       //     is not.
       // If we ignore implicit traps then we would like to assume that neither
       // of those two situations can happen. However, we still cannot do
       // anything if point 1 is a problem, that is, if the value is not a
       // subtype of the cast type, as we can't remove the cast in that case -
       // the wasm would not validate. But if the type *is* a subtype, then we
       // can ignore a possible trap on 2 and remove it.
       //
       // We also do not do this if the arguments cannot be reordered. If we
       // can't do that then we need to add a drop, at minimum (which may still
       // be worthwhile, but depends on other optimizations kicking in, so it's
       // not clearly worthwhile).
       if (HeapType::isSubType(curr->ref->type.getHeapType(),
                               curr->rtt->type.getHeapType()) &&
           canReorder(curr->ref, curr->rtt)) {
         Builder builder(*getModule());
         replaceCurrent(
           builder.makeSequence(builder.makeDrop(curr->rtt), curr->ref));
         return;
       }
     }

     // ref.cast can be reordered with ref.as_non_null,
     //
     //   (ref.cast (ref.as_non_null ..))
     // =>
     //   (ref.as_non_null (ref.cast ..))
     //
     // This is valid because both pass through the value if they do not trap,
     // and so reordering does not change whether a trap happens (and reordering
     // traps is allowed), and does not change the value flowing out at the end.
     // It is better to have the ref.as_non_null on the outside since it allows
     // outer instructions to potentially optimize it away (should we find
     // optimizations that can fold away a ref.cast on an outer instruction, that
     // might motivate changing this).
     //
     // Note that other ref.as* methods, like ref.as_func, are not obviously
     // worth reordering with ref.cast. For example, the type of ref.as_data is
     // (ref data), which is less specific than what ref.cast would have.
     // TODO optimize ref.cast of ref.as_[func|data|i31] in other ways.
     if (auto* as = curr->ref->dynCast<RefAs>()) {
       if (as->op == RefAsNonNull) {
         curr->ref = as->value;
         curr->finalize();
         as->value = curr;
         as->finalize();
         replaceCurrent(as);
         return;
       }
     }
   }

   void visitRefIs(RefIs* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     // Optimizating RefIs is not that obvious, since even if we know the result
     // evaluates to 0 or 1 then the replacement may not actually save code size,
     // since RefIsNull is a single byte (the others are 2), while adding a Const
     // of 0 would be two bytes. Other factors are that we can remove the input
     // and the added drop on it if it has no side effects, and that replacing
     // with a constant may allow further optimizations later. For now, replace
     // with a constant, but this warrants more investigation. TODO

     Builder builder(*getModule());

     auto nonNull = !curr->value->type.isNullable();

     if (curr->op == RefIsNull) {
       if (nonNull) {
         replaceCurrent(builder.makeSequence(
           builder.makeDrop(curr->value),
           builder.makeConst(Literal::makeZero(Type::i32))));
       }
       return;
     }

     // Check if the type is the kind we are checking for.
     auto result = GCTypeUtils::evaluateKindCheck(curr);

     if (result != GCTypeUtils::Unknown) {
       // We know the kind. Now we must also take into account nullability.
       if (nonNull) {
         // We know the entire result.
         replaceCurrent(
           builder.makeSequence(builder.makeDrop(curr->value),
                                builder.makeConst(Literal::makeFromInt32(
                                  result == GCTypeUtils::Success, Type::i32))));
       } else {
         // The value may be null. Leave only a check for that.
         curr->op = RefIsNull;
         if (result == GCTypeUtils::Success) {
           // The input is of the right kind. If it is not null then the result
           // is 1, and otherwise it is 0, so we need to flip the result of
           // RefIsNull.
           // Note that even after adding an eqz here we do not regress code size
           // as RefIsNull is a single byte while the others are two. So we keep
           // code size identical. However, in theory this may be more work, if
           // a VM considers ref.is_X to be as fast as ref.is_null, and if eqz is
           // not free, so this is worth more investigation. TODO
           replaceCurrent(builder.makeUnary(EqZInt32, curr));
         } else {
           // The input is of the wrong kind. In this case if it is null we
           // return zero because of that, and if it is not then we return zero
           // because of the kind, so the result is always the same.
           assert(result == GCTypeUtils::Failure);
           replaceCurrent(builder.makeSequence(
             builder.makeDrop(curr->value),
             builder.makeConst(Literal::makeZero(Type::i32))));
         }
       }
     }
   }

   void visitRefAs(RefAs* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }

     skipNonNullCast(curr->value);

     // Check if the type is the kind we are checking for.
     auto result = GCTypeUtils::evaluateKindCheck(curr);

     if (result == GCTypeUtils::Success) {
       // We know the kind is correct, so all that is left is a check for
       // non-nullability, which we do lower down.
       curr->op = RefAsNonNull;
     } else if (result == GCTypeUtils::Failure) {
       // This is the wrong kind, so it will trap. The binaryen optimizer does
       // not differentiate traps, so we can perform a replacement here. We
       // replace 2 bytes of ref.as_* with one byte of unreachable and one of a
       // drop, which is no worse, and the value and the drop can be optimized
       // out later if the value has no side effects.
       Builder builder(*getModule());
       replaceCurrent(builder.makeSequence(builder.makeDrop(curr->value),
                                           builder.makeUnreachable()));
       return;
     }

     if (curr->op == RefAsNonNull && !curr->value->type.isNullable()) {
       replaceCurrent(curr->value);
     }
   }

   Index getMaxBitsForLocal(LocalGet* get) {
     // check what we know about the local
     return localInfo[get->index].maxBits;
   }

 private:
   // Information about our locals
   std::vector<LocalInfo> localInfo;

   // Check if two consecutive inputs to an instruction are equal. As they are
   // consecutive, no code can execeute in between them, which simplies the
   // problem here (and which is the case we care about in this pass, which does
   // simple peephole optimizations - all we care about is a single instruction
   // at a time, and its inputs).
   bool areConsecutiveInputsEqual(Expression* left, Expression* right) {
     // First, check for side effects. If there are any, then we can't even
     // assume things like local.get's of the same index being identical.
     PassOptions passOptions = getPassOptions();
     if (EffectAnalyzer(passOptions, getModule()->features, left)
           .hasSideEffects() ||
         EffectAnalyzer(passOptions, getModule()->features, right)
           .hasSideEffects()) {
       return false;
     }
     // Ignore extraneous things and compare them structurally.
     left = Properties::getFallthrough(left, passOptions, getModule()->features);
     right =
       Properties::getFallthrough(right, passOptions, getModule()->features);
     if (!ExpressionAnalyzer::equal(left, right)) {
       return false;
     }
     // Reference equality also needs to check for allocation, which is *not* a
     // side effect, but which results in different references:
     // repeatedly calling (struct.new $foo)'s output will return different
     // results (while i32.const etc. of course does not).
     // Note that allocations may have appeared in the original inputs to this
     // function, and skipped when we focused on what falls through; since there
     // are no side effects, any allocations there cannot reach the fallthrough.
     if (left->type.isRef()) {
       if (FindAll<StructNew>(left).has() || FindAll<ArrayNew>(left).has()) {
         return false;
       }
     }
     return true;
   }

   // Canonicalizing the order of a symmetric binary helps us
   // write more concise pattern matching code elsewhere.
   void canonicalize(Binary* binary) {
     assert(shouldCanonicalize(binary));
     auto swap = [&]() {
       assert(canReorder(binary->left, binary->right));
       if (binary->isRelational()) {
         binary->op = reverseRelationalOp(binary->op);
       }
       std::swap(binary->left, binary->right);
     };
     auto maybeSwap = [&]() {
       if (canReorder(binary->left, binary->right)) {
         swap();
       }
     };
     // Prefer a const on the right.
     if (binary->left->is<Const>() && !binary->right->is<Const>()) {
       return swap();
     }
     if (auto* c = binary->right->dynCast<Const>()) {
       // x - C  ==>   x + (-C)
       // Prefer use addition if there is a constant on the right.
       if (binary->op == Abstract::getBinary(c->type, Abstract::Sub)) {
         c->value = c->value.neg();
         binary->op = Abstract::getBinary(c->type, Abstract::Add);
       }
       return;
     }
     // Prefer a get on the right.
     if (binary->left->is<LocalGet>() && !binary->right->is<LocalGet>()) {
       return maybeSwap();
     }
     // Sort by the node id type, if different.
     if (binary->left->_id != binary->right->_id) {
       if (binary->left->_id > binary->right->_id) {
         return maybeSwap();
       }
       return;
     }
     // If the children have the same node id, we have to go deeper.
     if (auto* left = binary->left->dynCast<Unary>()) {
       auto* right = binary->right->cast<Unary>();
       if (left->op > right->op) {
         return maybeSwap();
       }
     }
     if (auto* left = binary->left->dynCast<Binary>()) {
       auto* right = binary->right->cast<Binary>();
       if (left->op > right->op) {
         return maybeSwap();
       }
     }
     if (auto* left = binary->left->dynCast<LocalGet>()) {
       auto* right = binary->right->cast<LocalGet>();
       if (left->index > right->index) {
         return maybeSwap();
       }
     }
   }

   // Optimize given that the expression is flowing into a boolean context
   Expression* optimizeBoolean(Expression* boolean) {
     // TODO use a general getFallthroughs
     if (auto* unary = boolean->dynCast<Unary>()) {
       if (unary) {
         if (unary->op == EqZInt32) {
           auto* unary2 = unary->value->dynCast<Unary>();
           if (unary2 && unary2->op == EqZInt32) {
             // double eqz
             return unary2->value;
           }
           if (auto* binary = unary->value->dynCast<Binary>()) {
             // !(x <=> y)   ==>   x <!=> y
             auto op = invertBinaryOp(binary->op);
             if (op != InvalidBinary) {
               binary->op = op;
               return binary;
             }
           }
         }
       }
     } else if (auto* binary = boolean->dynCast<Binary>()) {
       if (binary->op == SubInt32) {
         if (auto* c = binary->left->dynCast<Const>()) {
           if (c->value.geti32() == 0) {
             // bool(0 - x)   ==>   bool(x)
             return binary->right;
           }
         }
       } else if (binary->op == OrInt32) {
         // an or flowing into a boolean context can consider each input as
         // boolean
         binary->left = optimizeBoolean(binary->left);
         binary->right = optimizeBoolean(binary->right);
       } else if (binary->op == NeInt32) {
         if (auto* c = binary->right->dynCast<Const>()) {
           // x != 0 is just x if it's used as a bool
           if (c->value.geti32() == 0) {
             return binary->left;
           }
           // TODO: Perhaps use it for separate final pass???
           // x != -1   ==>    x ^ -1
           // if (num->value.geti32() == -1) {
           //   binary->op = XorInt32;
           //   return binary;
           // }
         }
       } else if (binary->op == RemSInt32) {
         // bool(i32(x) % C_pot)  ==>  bool(x & (C_pot - 1))
         // bool(i32(x) % min_s)  ==>  bool(x & max_s)
         if (auto* c = binary->right->dynCast<Const>()) {
           if (c->value.isSignedMin() ||
               Bits::isPowerOf2(c->value.abs().geti32())) {
             binary->op = AndInt32;
             if (c->value.isSignedMin()) {
               c->value = Literal::makeSignedMax(Type::i32);
             } else {
               c->value = c->value.abs().sub(Literal::makeOne(Type::i32));
             }
             return binary;
           }
         }
       }
       if (auto* ext = Properties::getSignExtValue(binary)) {
         // use a cheaper zero-extent, we just care about the boolean value
         // anyhow
         return makeZeroExt(ext, Properties::getSignExtBits(binary));
       }
     } else if (auto* block = boolean->dynCast<Block>()) {
       if (block->type == Type::i32 && block->list.size() > 0) {
         block->list.back() = optimizeBoolean(block->list.back());
       }
     } else if (auto* iff = boolean->dynCast<If>()) {
       if (iff->type == Type::i32) {
         iff->ifTrue = optimizeBoolean(iff->ifTrue);
         iff->ifFalse = optimizeBoolean(iff->ifFalse);
       }
     } else if (auto* select = boolean->dynCast<Select>()) {
       select->ifTrue = optimizeBoolean(select->ifTrue);
       select->ifFalse = optimizeBoolean(select->ifFalse);
     } else if (auto* tryy = boolean->dynCast<Try>()) {
       if (tryy->type == Type::i32) {
         tryy->body = optimizeBoolean(tryy->body);
         for (Index i = 0; i < tryy->catchBodies.size(); i++) {
           tryy->catchBodies[i] = optimizeBoolean(tryy->catchBodies[i]);
         }
       }
     }
     // TODO: recurse into br values?
     return boolean;
   }

   Expression* optimizeSelect(Select* curr) {
     using namespace Match;
     Builder builder(*getModule());
     curr->condition = optimizeBoolean(curr->condition);
     {
       // Constant condition, we can just pick the correct side (barring side
       // effects)
       Expression *ifTrue, *ifFalse;
       if (matches(curr, select(pure(&ifTrue), any(&ifFalse), i32(0)))) {
         return ifFalse;
       }
       if (matches(curr, select(any(&ifTrue), any(&ifFalse), i32(0)))) {
         return builder.makeSequence(builder.makeDrop(ifTrue), ifFalse);
       }
       int32_t cond;
       if (matches(curr, select(any(&ifTrue), pure(&ifFalse), i32(&cond)))) {
         // The condition must be non-zero because a zero would have matched one
         // of the previous patterns.
         assert(cond != 0);
         return ifTrue;
       }
       // Don't bother when `ifFalse` isn't pure - we would need to reverse the
       // order using a temp local, which would be bad
     }
     {
       // Flip select to remove eqz if we can reorder
       Select* s;
       Expression *ifTrue, *ifFalse, *c;
       if (matches(
             curr,
             select(
               &s, any(&ifTrue), any(&ifFalse), unary(EqZInt32, any(&c)))) &&
           canReorder(ifTrue, ifFalse)) {
         s->ifTrue = ifFalse;
         s->ifFalse = ifTrue;
         s->condition = c;
       }
     }
     {
       // Simplify selects between 0 and 1
       Expression* c;
       bool reversed = matches(curr, select(ival(0), ival(1), any(&c)));
       if (reversed || matches(curr, select(ival(1), ival(0), any(&c)))) {
         if (reversed) {
           c = optimizeBoolean(builder.makeUnary(EqZInt32, c));
         }
         if (!Properties::emitsBoolean(c)) {
           // cond ? 1 : 0 ==> !!cond
           c = builder.makeUnary(EqZInt32, builder.makeUnary(EqZInt32, c));
         }
         return curr->type == Type::i64 ? builder.makeUnary(ExtendUInt32, c) : c;
       }
     }
     {
       // Sides are identical, fold
       Expression *ifTrue, *ifFalse, *c;
       if (matches(curr, select(any(&ifTrue), any(&ifFalse), any(&c))) &&
           ExpressionAnalyzer::equal(ifTrue, ifFalse)) {
         auto value = effects(ifTrue);
         if (value.hasSideEffects()) {
           // At best we don't need the condition, but need to execute the
           // value twice. a block is larger than a select by 2 bytes, and we
           // must drop one value, so 3, while we save the condition, so it's
           // not clear this is worth it, TODO
         } else {
           // value has no side effects
           auto condition = effects(c);
           if (!condition.hasSideEffects()) {
             return ifTrue;
           } else {
             // The condition is last, so we need a new local, and it may be a
             // bad idea to use a block like we do for an if. Do it only if we
             // can reorder
             if (!condition.invalidates(value)) {
               return builder.makeSequence(builder.makeDrop(c), ifTrue);
             }
           }
         }
       }
     }
     return nullptr;
   }

   // find added constants in an expression tree, including multiplied/shifted,
   // and combine them note that we ignore division/shift-right, as rounding
   // makes this nonlinear, so not a valid opt
   Expression* optimizeAddedConstants(Binary* binary) {
     assert(binary->type.isInteger());

     uint64_t constant = 0;
     std::vector<Const*> constants;

     struct SeekState {
       Expression* curr;
       uint64_t mul;
       SeekState(Expression* curr, uint64_t mul) : curr(curr), mul(mul) {}
     };
     std::vector<SeekState> seekStack;
     seekStack.emplace_back(binary, 1);
     while (!seekStack.empty()) {
       auto state = seekStack.back();
       seekStack.pop_back();
       auto curr = state.curr;
       auto mul = state.mul;
       if (auto* c = curr->dynCast<Const>()) {
         uint64_t value = c->value.getInteger();
         if (value != 0ULL) {
           constant += value * mul;
           constants.push_back(c);
         }
         continue;
       } else if (auto* binary = curr->dynCast<Binary>()) {
         if (binary->op == Abstract::getBinary(binary->type, Abstract::Add)) {
           seekStack.emplace_back(binary->right, mul);
           seekStack.emplace_back(binary->left, mul);
           continue;
         } else if (binary->op ==
                    Abstract::getBinary(binary->type, Abstract::Sub)) {
           // if the left is a zero, ignore it, it's how we negate ints
           auto* left = binary->left->dynCast<Const>();
           seekStack.emplace_back(binary->right, -mul);
           if (!left || !left->value.isZero()) {
             seekStack.emplace_back(binary->left, mul);
           }
           continue;
         } else if (binary->op ==
                    Abstract::getBinary(binary->type, Abstract::Shl)) {
           if (auto* c = binary->right->dynCast<Const>()) {
             seekStack.emplace_back(binary->left,
                                    mul << Bits::getEffectiveShifts(c));
             continue;
           }
         } else if (binary->op ==
                    Abstract::getBinary(binary->type, Abstract::Mul)) {
           if (auto* c = binary->left->dynCast<Const>()) {
             seekStack.emplace_back(binary->right,
                                    mul * (uint64_t)c->value.getInteger());
             continue;
           } else if (auto* c = binary->right->dynCast<Const>()) {
             seekStack.emplace_back(binary->left,
                                    mul * (uint64_t)c->value.getInteger());
             continue;
           }
         }
       }
     };
     // find all factors
     if (constants.size() <= 1) {
       // nothing much to do, except for the trivial case of adding/subbing a
       // zero
       if (auto* c = binary->right->dynCast<Const>()) {
         if (c->value.isZero()) {
           return binary->left;
         }
       }
       return nullptr;
     }
     // wipe out all constants, we'll replace with a single added one
     for (auto* c : constants) {
       c->value = Literal::makeZero(c->type);
     }
     // remove added/subbed zeros
     struct ZeroRemover : public PostWalker<ZeroRemover> {
       // TODO: we could save the binarys and costs we drop, and reuse them later

       PassOptions& passOptions;

       ZeroRemover(PassOptions& passOptions) : passOptions(passOptions) {}

       void visitBinary(Binary* curr) {
         if (!curr->type.isInteger()) {
           return;
         }
         FeatureSet features = getModule()->features;
         auto type = curr->type;
         auto* left = curr->left->dynCast<Const>();
         auto* right = curr->right->dynCast<Const>();
         // Canonicalization prefers an add instead of a subtract wherever
         // possible. That prevents a subtracted constant on the right,
         // as it would be added. And for a zero on the left, it can't be
         // removed (it is how we negate ints).
         if (curr->op == Abstract::getBinary(type, Abstract::Add)) {
           if (left && left->value.isZero()) {
             replaceCurrent(curr->right);
             return;
           }
           if (right && right->value.isZero()) {
             replaceCurrent(curr->left);
             return;
           }
         } else if (curr->op == Abstract::getBinary(type, Abstract::Shl)) {
           // shifting a 0 is a 0, or anything by 0 has no effect, all unless the
           // shift has side effects
           if (((left && left->value.isZero()) ||
                (right && Bits::getEffectiveShifts(right) == 0)) &&
               !EffectAnalyzer(passOptions, features, curr->right)
                  .hasSideEffects()) {
             replaceCurrent(curr->left);
             return;
           }
         } else if (curr->op == Abstract::getBinary(type, Abstract::Mul)) {
           // multiplying by zero is a zero, unless the other side has side
           // effects
           if (left && left->value.isZero() &&
               !EffectAnalyzer(passOptions, features, curr->right)
                  .hasSideEffects()) {
             replaceCurrent(left);
             return;
           }
           if (right && right->value.isZero() &&
               !EffectAnalyzer(passOptions, features, curr->left)
                  .hasSideEffects()) {
             replaceCurrent(right);
             return;
           }
         }
       }
     };
     Expression* walked = binary;
     ZeroRemover remover(getPassOptions());
     remover.setModule(getModule());
     remover.walk(walked);
     if (constant == 0ULL) {
       return walked; // nothing more to do
     }
     if (auto* c = walked->dynCast<Const>()) {
       assert(c->value.isZero());
       // Accumulated 64-bit constant value in 32-bit context will be wrapped
       // during downcasting. So it's valid unification for 32-bit and 64-bit
       // values.
       c->value = Literal::makeFromInt64(constant, c->type);
       return c;
     }
     Builder builder(*getModule());
     return builder.makeBinary(
       Abstract::getBinary(walked->type, Abstract::Add),
       walked,
       builder.makeConst(Literal::makeFromInt64(constant, walked->type)));
   }

   //   expensive1 | expensive2 can be turned into expensive1 ? 1 : expensive2,
   //   and expensive | cheap     can be turned into cheap     ? 1 : expensive,
   // so that we can avoid one expensive computation, if it has no side effects.
   Expression* conditionalizeExpensiveOnBitwise(Binary* binary) {
     // this operation can increase code size, so don't always do it
     auto& options = getPassRunner()->options;
     if (options.optimizeLevel < 2 || options.shrinkLevel > 0) {
       return nullptr;
     }
     const auto MIN_COST = 7;
     assert(binary->op == AndInt32 || binary->op == OrInt32);
     if (binary->right->is<Const>()) {
       return nullptr; // trivial
     }
     // bitwise logical operator on two non-numerical values, check if they are
     // boolean
     auto* left = binary->left;
     auto* right = binary->right;
     if (!Properties::emitsBoolean(left) || !Properties::emitsBoolean(right)) {
       return nullptr;
     }
     auto leftEffects = effects(left);
     auto rightEffects = effects(right);
     auto leftHasSideEffects = leftEffects.hasSideEffects();
     auto rightHasSideEffects = rightEffects.hasSideEffects();
     if (leftHasSideEffects && rightHasSideEffects) {
       return nullptr; // both must execute
     }
     // canonicalize with side effects, if any, happening on the left
     if (rightHasSideEffects) {
       if (CostAnalyzer(left).cost < MIN_COST) {
         return nullptr; // avoidable code is too cheap
       }
       if (leftEffects.invalidates(rightEffects)) {
         return nullptr; // cannot reorder
       }
       std::swap(left, right);
     } else if (leftHasSideEffects) {
       if (CostAnalyzer(right).cost < MIN_COST) {
         return nullptr; // avoidable code is too cheap
       }
     } else {
       // no side effects, reorder based on cost estimation
       auto leftCost = CostAnalyzer(left).cost;
       auto rightCost = CostAnalyzer(right).cost;
       if (std::max(leftCost, rightCost) < MIN_COST) {
         return nullptr; // avoidable code is too cheap
       }
       // canonicalize with expensive code on the right
       if (leftCost > rightCost) {
         std::swap(left, right);
       }
     }
     // worth it! perform conditionalization
     Builder builder(*getModule());
     if (binary->op == OrInt32) {
       return builder.makeIf(
         left, builder.makeConst(Literal(int32_t(1))), right);
     } else { // &
       return builder.makeIf(
         left, right, builder.makeConst(Literal(int32_t(0))));
     }
   }

   // We can combine `or` operations, e.g.
   //   (x > y) | (x == y)    ==>    x >= y
   Expression* combineOr(Binary* binary) {
     assert(binary->op == OrInt32);
     if (auto* left = binary->left->dynCast<Binary>()) {
       if (auto* right = binary->right->dynCast<Binary>()) {
         if (left->op != right->op &&
             ExpressionAnalyzer::equal(left->left, right->left) &&
             ExpressionAnalyzer::equal(left->right, right->right) &&
             !effects(left->left).hasSideEffects() &&
             !effects(left->right).hasSideEffects()) {
           switch (left->op) {
             //   (x > y) | (x == y)    ==>    x >= y
             case EqInt32: {
               if (right->op == GtSInt32) {
                 left->op = GeSInt32;
                 return left;
               }
               break;
             }
             default: {
             }
           }
         }
       }
     }
     return nullptr;
   }

   // fold constant factors into the offset
   void optimizeMemoryAccess(Expression*& ptr, Address& offset) {
     // ptr may be a const, but it isn't worth folding that in (we still have a
     // const); in fact, it's better to do the opposite for gzip purposes as well
     // as for readability.
     auto* last = ptr->dynCast<Const>();
     if (last) {
       uint64_t value64 = last->value.getInteger();
       uint64_t offset64 = offset;
       if (getModule()->memory.is64()) {
         last->value = Literal(int64_t(value64 + offset64));
         offset = 0;
       } else {
         // don't do this if it would wrap the pointer
         if (value64 <= uint64_t(std::numeric_limits<int32_t>::max()) &&
             offset64 <= uint64_t(std::numeric_limits<int32_t>::max()) &&
             value64 + offset64 <=
               uint64_t(std::numeric_limits<int32_t>::max())) {
           last->value = Literal(int32_t(value64 + offset64));
           offset = 0;
         }
       }
     }
   }

   // Optimize a multiply by a power of two on the right, which
   // can be a shift.
   // This doesn't shrink code size, and VMs likely optimize it anyhow,
   // but it's still worth doing since
   //  * Often shifts are more common than muls.
   //  * The constant is smaller.
   template<typename T> Expression* optimizePowerOf2Mul(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
                   "type mismatch");
     auto shifts = Bits::countTrailingZeroes(c);
     binary->op = std::is_same<T, uint32_t>::value ? ShlInt32 : ShlInt64;
     binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
     return binary;
   }

   // Optimize an unsigned divide / remainder by a power of two on the right
   // This doesn't shrink code size, and VMs likely optimize it anyhow,
   // but it's still worth doing since
   //  * Usually ands are more common than urems.
   //  * The constant is slightly smaller.
   template<typename T> Expression* optimizePowerOf2URem(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
                   "type mismatch");
     binary->op = std::is_same<T, uint32_t>::value ? AndInt32 : AndInt64;
     binary->right->cast<Const>()->value = Literal(c - 1);
     return binary;
   }

   template<typename T> Expression* optimizePowerOf2UDiv(Binary* binary, T c) {
     static_assert(std::is_same<T, uint32_t>::value ||
                     std::is_same<T, uint64_t>::value,
                   "type mismatch");
     auto shifts = Bits::countTrailingZeroes(c);
     binary->op = std::is_same<T, uint32_t>::value ? ShrUInt32 : ShrUInt64;
     binary->right->cast<Const>()->value = Literal(static_cast<T>(shifts));
     return binary;
   }

   template<typename T> Expression* optimizePowerOf2FDiv(Binary* binary, T c) {
     //
     // x / C_pot    =>   x * (C_pot ^ -1)
     //
     // Explanation:
     // Floating point numbers are represented as:
     //    ((-1) ^ sign) * (2 ^ (exp - bias)) * (1 + significand)
     //
     // If we have power of two numbers, then the mantissa (significand)
     // is all zeros. Let's focus on the exponent, ignoring the sign part:
     //    (2 ^ (exp - bias))
     //
     // and for inverted power of two floating point:
     //     1.0 / (2 ^ (exp - bias))   ->   2 ^ -(exp - bias)
     //
     // So inversion of C_pot is valid because it changes only the sign
     // of the exponent part and doesn't touch the significand part,
     // which remains the same (zeros).
     static_assert(std::is_same<T, float>::value ||
                     std::is_same<T, double>::value,
                   "type mismatch");
     double invDivisor = 1.0 / (double)c;
     binary->op = std::is_same<T, float>::value ? MulFloat32 : MulFloat64;
     binary->right->cast<Const>()->value = Literal(static_cast<T>(invDivisor));
     return binary;
   }

   Expression* makeZeroExt(Expression* curr, int32_t bits) {
     Builder builder(*getModule());
     return builder.makeBinary(
       AndInt32, curr, builder.makeConst(Literal(Bits::lowBitMask(bits))));
   }

   // given an "almost" sign extend - either a proper one, or it
   // has too many shifts left - we remove the sign extend. If there are
   // too many shifts, we split the shifts first, so this removes the
   // two sign extend shifts and adds one (smaller one)
   Expression* removeAlmostSignExt(Binary* outer) {
     auto* inner = outer->left->cast<Binary>();
     auto* outerConst = outer->right->cast<Const>();
     auto* innerConst = inner->right->cast<Const>();
     auto* value = inner->left;
     if (outerConst->value == innerConst->value) {
       return value;
     }
     // add a shift, by reusing the existing node
     innerConst->value = innerConst->value.sub(outerConst->value);
     return inner;
   }

   // check if an expression is already sign-extended
   bool isSignExted(Expression* curr, Index bits) {
     if (Properties::getSignExtValue(curr)) {
       return Properties::getSignExtBits(curr) == bits;
     }
     if (auto* get = curr->dynCast<LocalGet>()) {
       // check what we know about the local
       return localInfo[get->index].signExtedBits == bits;
     }
     return false;
   }

   // optimize trivial math operations, given that the right side of a binary
   // is a constant
   Expression* optimizeWithConstantOnRight(Binary* curr) {
     using namespace Match;
     using namespace Abstract;
     Builder builder(*getModule());
     Expression* left;
     auto* right = curr->right->cast<Const>();
     auto type = curr->right->type;

     // Operations on zero
     if (matches(curr, binary(Shl, any(&left), ival(0))) ||
         matches(curr, binary(ShrU, any(&left), ival(0))) ||
         matches(curr, binary(ShrS, any(&left), ival(0))) ||
         matches(curr, binary(Or, any(&left), ival(0))) ||
         matches(curr, binary(Xor, any(&left), ival(0)))) {
       return left;
     }
     if (matches(curr, binary(Mul, pure(&left), ival(0))) ||
         matches(curr, binary(And, pure(&left), ival(0)))) {
       return right;
     }
     // x == 0   ==>   eqz x
     if (matches(curr, binary(Eq, any(&left), ival(0)))) {
       return builder.makeUnary(Abstract::getUnary(type, EqZ), left);
     }
     // Operations on one
     // (signed)x % 1   ==>   0
     if (matches(curr, binary(RemS, pure(&left), ival(1)))) {
       right->value = Literal::makeZero(type);
       return right;
     }
     // (signed)x % C_pot != 0   ==>  (x & (abs(C_pot) - 1)) != 0
     {
       Const* c;
       Binary* inner;
       if (matches(curr,
                   binary(Ne, binary(&inner, RemS, any(), ival(&c)), ival(0))) &&
           (c->value.isSignedMin() ||
            Bits::isPowerOf2(c->value.abs().getInteger()))) {
         inner->op = Abstract::getBinary(c->type, And);
         if (c->value.isSignedMin()) {
           c->value = Literal::makeSignedMax(c->type);
         } else {
           c->value = c->value.abs().sub(Literal::makeOne(c->type));
         }
         return curr;
       }
     }
     // i32(bool(x)) == 1  ==>  i32(bool(x))
     // i32(bool(x)) != 0  ==>  i32(bool(x))
     // i32(bool(x)) & 1   ==>  i32(bool(x))
     // i64(bool(x)) & 1   ==>  i64(bool(x))
     if ((matches(curr, binary(EqInt32, any(&left), i32(1))) ||
          matches(curr, binary(NeInt32, any(&left), i32(0))) ||
          matches(curr, binary(And, any(&left), ival(1)))) &&
         Bits::getMaxBits(left, this) == 1) {
       return left;
     }
     // i64(bool(x)) == 1  ==>  i32(bool(x))
     // i64(bool(x)) != 0  ==>  i32(bool(x))
     if ((matches(curr, binary(EqInt64, any(&left), i64(1))) ||
          matches(curr, binary(NeInt64, any(&left), i64(0)))) &&
         Bits::getMaxBits(left, this) == 1) {
       return builder.makeUnary(WrapInt64, left);
     }
     // bool(x) != 1  ==>  !bool(x)
     if (matches(curr, binary(Ne, any(&left), ival(1))) &&
         Bits::getMaxBits(left, this) == 1) {
       return builder.makeUnary(Abstract::getUnary(type, EqZ), left);
     }
     // bool(x)  ^ 1  ==>  !bool(x)
     if (matches(curr, binary(Xor, any(&left), ival(1))) &&
         Bits::getMaxBits(left, this) == 1) {
       auto* result = builder.makeUnary(Abstract::getUnary(type, EqZ), left);
       if (left->type == Type::i64) {
         // Xor's result is also an i64 in this case, but EqZ returns i32, so we
         // must expand it so that we keep returning the same value as before.
         // This means we replace a xor and a const with a xor and an extend,
         // which is still smaller (the const is 2 bytes, the extend just 1), and
         // also the extend may be removed by further work.
         result = builder.makeUnary(ExtendUInt32, result);
       }
       return result;
     }
     // bool(x) | 1  ==>  1
     if (matches(curr, binary(Or, pure(&left), ival(1))) &&
         Bits::getMaxBits(left, this) == 1) {
       return right;
     }

     // Operations on all 1s
     // x & -1   ==>   x
     if (matches(curr, binary(And, any(&left), ival(-1)))) {
       return left;
     }
     // x | -1   ==>   -1
     if (matches(curr, binary(Or, pure(&left), ival(-1)))) {
       return right;
     }
     // (signed)x % -1   ==>   0
     if (matches(curr, binary(RemS, pure(&left), ival(-1)))) {
       right->value = Literal::makeZero(type);
       return right;
     }
     // i32(x) / i32.min_s   ==>   x == i32.min_s
     if (matches(
           curr,
           binary(DivSInt32, any(), i32(std::numeric_limits<int32_t>::min())))) {
       curr->op = EqInt32;
       return curr;
     }
     // i64(x) / i64.min_s   ==>   i64(x == i64.min_s)
     // only for zero shrink level
     if (getPassOptions().shrinkLevel == 0 &&
         matches(
           curr,
           binary(DivSInt64, any(), i64(std::numeric_limits<int64_t>::min())))) {
       curr->op = EqInt64;
       curr->type = Type::i32;
       return Builder(*getModule()).makeUnary(ExtendUInt32, curr);
     }
     // (unsigned)x < 0   ==>   i32(0)
     if (matches(curr, binary(LtU, pure(&left), ival(0)))) {
       right->value = Literal::makeZero(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x <= -1  ==>   i32(1)
     if (matches(curr, binary(LeU, pure(&left), ival(-1)))) {
       right->value = Literal::makeOne(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x > -1   ==>   i32(0)
     if (matches(curr, binary(GtU, pure(&left), ival(-1)))) {
       right->value = Literal::makeZero(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x >= 0   ==>   i32(1)
     if (matches(curr, binary(GeU, pure(&left), ival(0)))) {
       right->value = Literal::makeOne(Type::i32);
       right->type = Type::i32;
       return right;
     }
     // (unsigned)x < -1   ==>   x != -1
     // Friendlier to JS emitting as we don't need to write an unsigned -1 value
     // which is large.
     if (matches(curr, binary(LtU, any(), ival(-1)))) {
       curr->op = Abstract::getBinary(type, Ne);
       return curr;
     }
     // (unsigned)x <= 0   ==>   x == 0
     if (matches(curr, binary(LeU, any(), ival(0)))) {
       curr->op = Abstract::getBinary(type, Eq);
       return curr;
     }
     // (unsigned)x > 0   ==>   x != 0
     if (matches(curr, binary(GtU, any(), ival(0)))) {
       curr->op = Abstract::getBinary(type, Ne);
       return curr;
     }
     // (unsigned)x >= -1  ==>   x == -1
     if (matches(curr, binary(GeU, any(), ival(-1)))) {
       curr->op = Abstract::getBinary(type, Eq);
       return curr;
     }
     {
       Const* c;
       // (signed)x < (i32|i64).min_s   ==>   i32(0)
       if (matches(curr, binary(LtS, pure(&left), ival(&c))) &&
           c->value.isSignedMin()) {
         right->value = Literal::makeZero(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x <= (i32|i64).max_s   ==>   i32(1)
       if (matches(curr, binary(LeS, pure(&left), ival(&c))) &&
           c->value.isSignedMax()) {
         right->value = Literal::makeOne(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x > (i32|i64).max_s   ==>   i32(0)
       if (matches(curr, binary(GtS, pure(&left), ival(&c))) &&
           c->value.isSignedMax()) {
         right->value = Literal::makeZero(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x >= (i32|i64).min_s   ==>   i32(1)
       if (matches(curr, binary(GeS, pure(&left), ival(&c))) &&
           c->value.isSignedMin()) {
         right->value = Literal::makeOne(Type::i32);
         right->type = Type::i32;
         return right;
       }
       // (signed)x < (i32|i64).max_s   ==>   x != (i32|i64).max_s
       if (matches(curr, binary(LtS, any(), ival(&c))) &&
           c->value.isSignedMax()) {
         curr->op = Abstract::getBinary(type, Ne);
         return curr;
       }
       // (signed)x <= (i32|i64).min_s   ==>   x == (i32|i64).min_s
       if (matches(curr, binary(LeS, any(), ival(&c))) &&
           c->value.isSignedMin()) {
         curr->op = Abstract::getBinary(type, Eq);
         return curr;
       }
       // (signed)x > (i32|i64).min_s   ==>   x != (i32|i64).min_s
       if (matches(curr, binary(GtS, any(), ival(&c))) &&
           c->value.isSignedMin()) {
         curr->op = Abstract::getBinary(type, Ne);
         return curr;
       }
       // (signed)x >= (i32|i64).max_s   ==>   x == (i32|i64).max_s
       if (matches(curr, binary(GeS, any(), ival(&c))) &&
           c->value.isSignedMax()) {
         curr->op = Abstract::getBinary(type, Eq);
         return curr;
       }
     }
     // x * -1   ==>   0 - x
     if (matches(curr, binary(Mul, any(&left), ival(-1)))) {
       right->value = Literal::makeZero(type);
       curr->op = Abstract::getBinary(type, Sub);
       curr->left = right;
       curr->right = left;
       return curr;
     }
     {
       // ~(1 << x) aka (1 << x) ^ -1  ==>  rotl(-2, x)
       Expression* x;
       if (matches(curr, binary(Xor, binary(Shl, ival(1), any(&x)), ival(-1)))) {
         curr->op = Abstract::getBinary(type, RotL);
         right->value = Literal::makeFromInt32(-2, type);
         curr->left = right;
         curr->right = x;
         return curr;
       }
     }
     {
       double value;
       if (matches(curr, binary(Sub, any(), fval(&value))) && value == 0.0) {
         // x - (-0.0)   ==>   x + 0.0
         if (std::signbit(value)) {
           curr->op = Abstract::getBinary(type, Add);
           right->value = right->value.neg();
           return curr;
         } else if (fastMath) {
           // x - 0.0   ==>   x
           return curr->left;
         }
       }
     }
     {
       // x * 2.0  ==>  x + x
       // but we apply this only for simple expressions like
       // local.get and global.get for avoid using extra local
       // variable.
       Expression* x;
       if (matches(curr, binary(Mul, any(&x), fval(2.0))) &&
           (x->is<LocalGet>() || x->is<GlobalGet>())) {
         curr->op = Abstract::getBinary(type, Abstract::Add);
         curr->right = ExpressionManipulator::copy(x, *getModule());
         return curr;
       }
     }
     {
       // x + (-0.0)   ==>   x
       double value;
       if (fastMath && matches(curr, binary(Add, any(), fval(&value))) &&
           value == 0.0 && std::signbit(value)) {
         return curr->left;
       }
     }
     // -x * fval(C)   ==>   x * -C
     // -x / fval(C)   ==>   x / -C
     if (matches(curr, binary(Mul, unary(Neg, any(&left)), fval())) ||
         matches(curr, binary(DivS, unary(Neg, any(&left)), fval()))) {
       right->value = right->value.neg();
       curr->left = left;
       return curr;
     }
     // x * -1.0   ==>
     //       -x,  if fastMath == true
     // -0.0 - x,  if fastMath == false
     if (matches(curr, binary(Mul, any(), fval(-1.0)))) {
       if (fastMath) {
         return builder.makeUnary(Abstract::getUnary(type, Neg), left);
       }
       // x * -1.0   ==>  -0.0 - x
       curr->op = Abstract::getBinary(type, Sub);
       right->value = Literal::makeZero(type).neg();
       std::swap(curr->left, curr->right);
       return curr;
     }
     if (matches(curr, binary(Mul, any(&left), constant(1))) ||
         matches(curr, binary(DivS, any(&left), constant(1))) ||
         matches(curr, binary(DivU, any(&left), constant(1)))) {
       if (curr->type.isInteger() || fastMath) {
         return left;
       }
     }
     return nullptr;
   }

   // optimize trivial math operations, given that the left side of a binary
   // is a constant. since we canonicalize constants to the right for symmetrical
   // operations, we only need to handle asymmetrical ones here
   // TODO: templatize on type?
   Expression* optimizeWithConstantOnLeft(Binary* curr) {
     using namespace Match;
     using namespace Abstract;

     auto type = curr->left->type;
     auto* left = curr->left->cast<Const>();
     // 0 <<>> x   ==>   0
     if (Abstract::hasAnyShift(curr->op) && left->value.isZero() &&
         !effects(curr->right).hasSideEffects()) {
       return curr->left;
     }
     // (signed)-1 >> x   ==>   -1
     // rotl(-1, x)       ==>   -1
     // rotr(-1, x)       ==>   -1
     if ((curr->op == Abstract::getBinary(type, ShrS) ||
          curr->op == Abstract::getBinary(type, RotL) ||
          curr->op == Abstract::getBinary(type, RotR)) &&
         left->value.getInteger() == -1LL &&
         !effects(curr->right).hasSideEffects()) {
       return curr->left;
     }
     {
       // C1 - (x + C2)  ==>  (C1 - C2) - x
       Const *c1, *c2;
       Expression* x;
       if (matches(curr,
                   binary(Sub, ival(&c1), binary(Add, any(&x), ival(&c2))))) {
         left->value = c1->value.sub(c2->value);
         curr->right = x;
         return curr;
       }
       // C1 - (C2 - x)  ==>   x + (C1 - C2)
       if (matches(curr,
                   binary(Sub, ival(&c1), binary(Sub, ival(&c2), any(&x))))) {
         left->value = c1->value.sub(c2->value);
         curr->op = Abstract::getBinary(type, Add);
         curr->right = x;
         std::swap(curr->left, curr->right);
         return curr;
       }
     }
     {
       // fval(C) / -x   ==>  -C / x
       Expression* right;
       if (matches(curr, binary(DivS, fval(), unary(Neg, any(&right))))) {
         left->value = left->value.neg();
         curr->right = right;
         return curr;
       }
     }
     return nullptr;
   }

   // TODO: templatize on type?
   Expression* optimizeRelational(Binary* curr) {
     auto type = curr->right->type;
     if (curr->left->type.isInteger()) {
       if (curr->op == Abstract::getBinary(type, Abstract::Eq) ||
           curr->op == Abstract::getBinary(type, Abstract::Ne)) {
         if (auto* left = curr->left->dynCast<Binary>()) {
           // TODO: inequalities can also work, if the constants do not overflow
           // integer math, even on 2s complement, allows stuff like
           // x + 5 == 7
           //   =>
           //     x == 2
           if (left->op == Abstract::getBinary(type, Abstract::Add)) {
             if (auto* leftConst = left->right->dynCast<Const>()) {
               if (auto* rightConst = curr->right->dynCast<Const>()) {
                 return combineRelationalConstants(
                   curr, left, leftConst, nullptr, rightConst);
               } else if (auto* rightBinary = curr->right->dynCast<Binary>()) {
                 if (rightBinary->op ==
                     Abstract::getBinary(type, Abstract::Add)) {
                   if (auto* rightConst = rightBinary->right->dynCast<Const>()) {
                     return combineRelationalConstants(
                       curr, left, leftConst, rightBinary, rightConst);
                   }
                 }
               }
             }
           }
         }
       }
       // x - y == 0  =>  x == y
       // x - y != 0  =>  x != y
       // unsigned(x - y) > 0    =>   x != y
       // unsigned(x - y) <= 0   =>   x == y
       {
         using namespace Abstract;
         using namespace Match;

         Binary* inner;
         // unsigned(x - y) > 0    =>   x != y
         if (matches(curr,
                     binary(GtU, binary(&inner, Sub, any(), any()), ival(0)))) {
           curr->op = Abstract::getBinary(type, Ne);
           curr->right = inner->right;
           curr->left = inner->left;
           return curr;
         }
         // unsigned(x - y) <= 0   =>   x == y
         if (matches(curr,
                     binary(LeU, binary(&inner, Sub, any(), any()), ival(0)))) {
           curr->op = Abstract::getBinary(type, Eq);
           curr->right = inner->right;
           curr->left = inner->left;
           return curr;
         }
         // x - y == 0  =>  x == y
         // x - y != 0  =>  x != y
         // This is not true for signed comparisons like x -y < 0 due to overflow
         // effects (e.g. 8 - 0x80000000 < 0 is not the same as 8 < 0x80000000).
         if (matches(curr,
                     binary(Eq, binary(&inner, Sub, any(), any()), ival(0))) ||
             matches(curr,
                     binary(Ne, binary(&inner, Sub, any(), any()), ival(0)))) {
           curr->right = inner->right;
           curr->left = inner->left;
           return curr;
         }
       }
     }
     return nullptr;
   }

   Expression* deduplicateUnary(Unary* unaryOuter) {
     if (auto* unaryInner = unaryOuter->value->dynCast<Unary>()) {
       if (unaryInner->op == unaryOuter->op) {
         switch (unaryInner->op) {
           case NegFloat32:
           case NegFloat64: {
             // neg(neg(x))  ==>   x
             return unaryInner->value;
           }
           case AbsFloat32:
           case CeilFloat32:
           case FloorFloat32:
           case TruncFloat32:
           case NearestFloat32:
           case AbsFloat64:
           case CeilFloat64:
           case FloorFloat64:
           case TruncFloat64:
           case NearestFloat64: {
             // unaryOp(unaryOp(x))  ==>   unaryOp(x)
             return unaryInner;
           }
           case ExtendS8Int32:
           case ExtendS16Int32: {
             assert(getModule()->features.hasSignExt());
             return unaryInner;
           }
           case EqZInt32: {
             // eqz(eqz(bool(x)))  ==>   bool(x)
             if (Bits::getMaxBits(unaryInner->value, this) == 1) {
               return unaryInner->value;
             }
             break;
           }
           default: {
           }
         }
       }
     }
     return nullptr;
   }

   Expression* deduplicateBinary(Binary* outer) {
     Type type = outer->type;
     if (type.isInteger()) {
       if (auto* inner = outer->right->dynCast<Binary>()) {
         if (outer->op == inner->op) {
           if (!EffectAnalyzer(
                  getPassOptions(), getModule()->features, outer->left)
                  .hasSideEffects()) {
             if (ExpressionAnalyzer::equal(inner->left, outer->left)) {
               // x - (x - y)  ==>   y
               // x ^ (x ^ y)  ==>   y
               if (outer->op == Abstract::getBinary(type, Abstract::Sub) ||
                   outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->right;
               }
               // x & (x & y)  ==>   x & y
               // x | (x | y)  ==>   x | y
               if (outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
             if (ExpressionAnalyzer::equal(inner->right, outer->left) &&
                 canReorder(outer->left, inner->left)) {
               // x ^ (y ^ x)  ==>   y
               // (note that we need the check for reordering here because if
               // e.g. y writes to a local that x reads, the second appearance
               // of x would be different from the first)
               if (outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->left;
               }

               // x & (y & x)  ==>   y & x
               // x | (y | x)  ==>   y | x
               // (here we need the check for reordering for the more obvious
               // reason that previously x appeared before y, and now y appears
               // first; or, if we tried to emit x [&|] y here, reversing the
               // order, we'd be in the same situation as the previous comment)
               if (outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
           }
         }
       }
       if (auto* inner = outer->left->dynCast<Binary>()) {
         if (outer->op == inner->op) {
           if (!EffectAnalyzer(
                  getPassOptions(), getModule()->features, outer->right)
                  .hasSideEffects()) {
             if (ExpressionAnalyzer::equal(inner->right, outer->right)) {
               // (x ^ y) ^ y  ==>   x
               if (outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->left;
               }
               // (x % y) % y  ==>   x % y
               // (x & y) & y  ==>   x & y
               // (x | y) | y  ==>   x | y
               if (outer->op == Abstract::getBinary(type, Abstract::RemS) ||
                   outer->op == Abstract::getBinary(type, Abstract::RemU) ||
                   outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
             // See comments in the parallel code earlier about ordering here.
             if (ExpressionAnalyzer::equal(inner->left, outer->right) &&
                 canReorder(inner->left, inner->right)) {
               // (x ^ y) ^ x  ==>   y
               if (outer->op == Abstract::getBinary(type, Abstract::Xor)) {
                 return inner->right;
               }
               // (x & y) & x  ==>   x & y
               // (x | y) | x  ==>   x | y
               if (outer->op == Abstract::getBinary(type, Abstract::And) ||
                   outer->op == Abstract::getBinary(type, Abstract::Or)) {
                 return inner;
               }
             }
           }
         }
       }
     }
     return nullptr;
   }

   // given a relational binary with a const on both sides, combine the constants
   // left is also a binary, and has a constant; right may be just a constant, in
   // which case right is nullptr
   Expression* combineRelationalConstants(Binary* binary,
                                          Binary* left,
                                          Const* leftConst,
                                          Binary* right,
                                          Const* rightConst) {
     auto type = binary->right->type;
     // we fold constants to the right
     Literal extra = leftConst->value;
     if (left->op == Abstract::getBinary(type, Abstract::Sub)) {
       extra = extra.neg();
     }
     if (right && right->op == Abstract::getBinary(type, Abstract::Sub)) {
       extra = extra.neg();
     }
     rightConst->value = rightConst->value.sub(extra);
     binary->left = left->left;
     return binary;
   }

   Expression* optimizeMemoryCopy(MemoryCopy* memCopy) {
     PassOptions options = getPassOptions();

     if (options.ignoreImplicitTraps) {
       if (ExpressionAnalyzer::equal(memCopy->dest, memCopy->source)) {
         // memory.copy(x, x, sz)  ==>  {drop(x), drop(x), drop(sz)}
         Builder builder(*getModule());
         return builder.makeBlock({builder.makeDrop(memCopy->dest),
                                   builder.makeDrop(memCopy->source),
                                   builder.makeDrop(memCopy->size)});
       }
     }

     // memory.copy(dst, src, C)  ==>  store(dst, load(src))
     if (auto* csize = memCopy->size->dynCast<Const>()) {
       auto bytes = csize->value.geti32();
       Builder builder(*getModule());

       switch (bytes) {
         case 0: {
           if (options.ignoreImplicitTraps) {
             // memory.copy(dst, src, 0)  ==>  {drop(dst), drop(src)}
             return builder.makeBlock({builder.makeDrop(memCopy->dest),
                                       builder.makeDrop(memCopy->source)});
           }
           break;
         }
         case 1:
         case 2:
         case 4: {
           return builder.makeStore(
             bytes, // bytes
             0,     // offset
             1,     // align
             memCopy->dest,
             builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i32),
             Type::i32);
         }
         case 8: {
           return builder.makeStore(
             bytes, // bytes
             0,     // offset
             1,     // align
             memCopy->dest,
             builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i64),
             Type::i64);
         }
         case 16: {
           if (options.shrinkLevel == 0) {
             // This adds an extra 2 bytes so apply it only for
             // minimal shrink level
             if (getModule()->features.hasSIMD()) {
               return builder.makeStore(
                 bytes, // bytes
                 0,     // offset
                 1,     // align
                 memCopy->dest,
                 builder.makeLoad(
                   bytes, false, 0, 1, memCopy->source, Type::v128),
                 Type::v128);
             }
           }
           break;
         }
         default: {
         }
       }
     }
     return nullptr;
   }

   // given a binary expression with equal children and no side effects in
   // either, we can fold various things
   Expression* optimizeBinaryWithEqualEffectlessChildren(Binary* binary) {
     // TODO add: perhaps worth doing 2*x if x is quite large?
     switch (binary->op) {
       case SubInt32:
       case XorInt32:
       case SubInt64:
       case XorInt64:
         return LiteralUtils::makeZero(binary->left->type, *getModule());
       case NeInt32:
       case LtSInt32:
       case LtUInt32:
       case GtSInt32:
       case GtUInt32:
       case NeInt64:
       case LtSInt64:
       case LtUInt64:
       case GtSInt64:
       case GtUInt64:
         return LiteralUtils::makeZero(Type::i32, *getModule());
       case AndInt32:
       case OrInt32:
       case AndInt64:
       case OrInt64:
         return binary->left;
       case EqInt32:
       case LeSInt32:
       case LeUInt32:
       case GeSInt32:
       case GeUInt32:
       case EqInt64:
       case LeSInt64:
       case LeUInt64:
       case GeSInt64:
       case GeUInt64:
         return LiteralUtils::makeFromInt32(1, Type::i32, *getModule());
       default:
         return nullptr;
     }
   }

   BinaryOp invertBinaryOp(BinaryOp op) {
     // use de-morgan's laws
     switch (op) {
       case EqInt32:
         return NeInt32;
       case NeInt32:
         return EqInt32;
       case LtSInt32:
         return GeSInt32;
       case LtUInt32:
         return GeUInt32;
       case LeSInt32:
         return GtSInt32;
       case LeUInt32:
         return GtUInt32;
       case GtSInt32:
         return LeSInt32;
       case GtUInt32:
         return LeUInt32;
       case GeSInt32:
         return LtSInt32;
       case GeUInt32:
         return LtUInt32;

       case EqInt64:
         return NeInt64;
       case NeInt64:
         return EqInt64;
       case LtSInt64:
         return GeSInt64;
       case LtUInt64:
         return GeUInt64;
       case LeSInt64:
         return GtSInt64;
       case LeUInt64:
         return GtUInt64;
       case GtSInt64:
         return LeSInt64;
       case GtUInt64:
         return LeUInt64;
       case GeSInt64:
         return LtSInt64;
       case GeUInt64:
         return LtUInt64;

       case EqFloat32:
         return NeFloat32;
       case NeFloat32:
         return EqFloat32;

       case EqFloat64:
         return NeFloat64;
       case NeFloat64:
         return EqFloat64;

       default:
         return InvalidBinary;
     }
   }

   BinaryOp reverseRelationalOp(BinaryOp op) {
     switch (op) {
       case EqInt32:
         return EqInt32;
       case NeInt32:
         return NeInt32;
       case LtSInt32:
         return GtSInt32;
       case LtUInt32:
         return GtUInt32;
       case LeSInt32:
         return GeSInt32;
       case LeUInt32:
         return GeUInt32;
       case GtSInt32:
         return LtSInt32;
       case GtUInt32:
         return LtUInt32;
       case GeSInt32:
         return LeSInt32;
       case GeUInt32:
         return LeUInt32;

       case EqInt64:
         return EqInt64;
       case NeInt64:
         return NeInt64;
       case LtSInt64:
         return GtSInt64;
       case LtUInt64:
         return GtUInt64;
       case LeSInt64:
         return GeSInt64;
       case LeUInt64:
         return GeUInt64;
       case GtSInt64:
         return LtSInt64;
       case GtUInt64:
         return LtUInt64;
       case GeSInt64:
         return LeSInt64;
       case GeUInt64:
         return LeUInt64;

       case EqFloat32:
         return EqFloat32;
       case NeFloat32:
         return NeFloat32;
       case LtFloat32:
         return GtFloat32;
       case LeFloat32:
         return GeFloat32;
       case GtFloat32:
         return LtFloat32;
       case GeFloat32:
         return LeFloat32;

       case EqFloat64:
         return EqFloat64;
       case NeFloat64:
         return NeFloat64;
       case LtFloat64:
         return GtFloat64;
       case LeFloat64:
         return GeFloat64;
       case GtFloat64:
         return LtFloat64;
       case GeFloat64:
         return LeFloat64;

       default:
         return InvalidBinary;
     }
   }

   BinaryOp makeUnsignedBinaryOp(BinaryOp op) {
     switch (op) {
       case DivSInt32:
         return DivUInt32;
       case RemSInt32:
         return RemUInt32;
       case ShrSInt32:
         return ShrUInt32;
       case LtSInt32:
         return LtUInt32;
       case LeSInt32:
         return LeUInt32;
       case GtSInt32:
         return GtUInt32;
       case GeSInt32:
         return GeUInt32;

       case DivSInt64:
         return DivUInt64;
       case RemSInt64:
         return RemUInt64;
       case ShrSInt64:
         return ShrUInt64;
       case LtSInt64:
         return LtUInt64;
       case LeSInt64:
         return LeUInt64;
       case GtSInt64:
         return GtUInt64;
       case GeSInt64:
         return GeUInt64;

       default:
         return InvalidBinary;
     }
   }

   bool shouldCanonicalize(Binary* binary) {
     if ((binary->op == SubInt32 || binary->op == SubInt64) &&
         binary->right->is<Const>() && !binary->left->is<Const>()) {
       return true;
     }
     if (Properties::isSymmetric(binary) || binary->isRelational()) {
       return true;
     }
     switch (binary->op) {
       case AddFloat32:
       case MulFloat32:
       case AddFloat64:
       case MulFloat64: {
         // If the LHS is known to be non-NaN, the operands can commute.
         // We don't care about the RHS because right now we only know if
         // an expression is non-NaN if it is constant, but if the RHS is
         // constant, then this expression is already canonicalized.
         if (auto* c = binary->left->dynCast<Const>()) {
           return !c->value.isNaN();
         }
         return false;
       }
       default:
         return false;
     }
   }

   // Optimize an if-else or a select, something with a condition and two
   // arms with outputs.
   template<typename T> void optimizeTernary(T* curr) {
     using namespace Abstract;
     using namespace Match;
     Builder builder(*getModule());

     // If one arm is an operation and the other is an appropriate constant, we
     // can move the operation outside (where it may be further optimized), e.g.
     //
     //  (select
     //    (i32.eqz (X))
     //    (i32.const 0|1)
     //    (Y)
     //  )
     // =>
     //  (i32.eqz
     //    (select
     //      (X)
     //      (i32.const 1|0)
     //      (Y)
     //    )
     //  )
     //
     // Ignore unreachable code here; leave that for DCE.
     if (curr->type != Type::unreachable &&
         curr->ifTrue->type != Type::unreachable &&
         curr->ifFalse->type != Type::unreachable) {
       Unary* un;
       Expression* x;
       Const* c;
       auto check = [&](Expression* a, Expression* b) {
         if (matches(a, unary(&un, EqZ, any(&x))) && matches(b, ival(&c))) {
           auto value = c->value.getInteger();
           return value == 0 || value == 1;
         }
         return false;
       };
       if (check(curr->ifTrue, curr->ifFalse) ||
           check(curr->ifFalse, curr->ifTrue)) {
         // The new type of curr will be that of the value of the unary, as after
         // we move the unary out, its value is curr's direct child.
         auto newType = un->value->type;
         auto updateArm = [&](Expression* arm) -> Expression* {
           if (arm == un) {
             // This is the arm that had the eqz, which we need to remove.
             return un->value;
           } else {
             // This is the arm with the constant, which we need to flip.
             // Note that we also need to set the type to match the other arm.
             c->value =
               Literal::makeFromInt32(1 - c->value.getInteger(), newType);
             c->type = newType;
             return c;
           }
         };
         curr->ifTrue = updateArm(curr->ifTrue);
         curr->ifFalse = updateArm(curr->ifFalse);
         un->value = curr;
         curr->finalize(newType);
         return replaceCurrent(un);
       }
     }

     {
       // Identical code on both arms can be folded out, e.g.
       //
       //  (select
       //    (i32.eqz (X))
       //    (i32.eqz (Y))
       //    (Z)
       //  )
       // =>
       //  (i32.eqz
       //    (select
       //      (X)
       //      (Y)
       //      (Z)
       //    )
       //  )
       //
       // Continue doing this while we can, noting the chain of moved expressions
       // as we go, then do a single replaceCurrent() at the end.
       SmallVector<Expression*, 1> chain;
       while (1) {
         // Ignore control flow structures (which are handled in MergeBlocks).
         if (!Properties::isControlFlowStructure(curr->ifTrue) &&
             ExpressionAnalyzer::shallowEqual(curr->ifTrue, curr->ifFalse)) {
           // TODO: consider the case with more than one child.
           ChildIterator ifTrueChildren(curr->ifTrue);
           if (ifTrueChildren.children.size() == 1) {
             // ifTrue and ifFalse's children will become the direct children of
             // curr, and so there must be an LUB for curr to have a proper new
             // type after the transformation.
             //
             // An example where that does not happen is this:
             //
             //  (if
             //    (condition)
             //    (drop (i32.const 1))
             //    (drop (f64.const 2.0))
             //  )
             ChildIterator ifFalseChildren(curr->ifFalse);
             auto* ifTrueChild = *ifTrueChildren.begin();
             auto* ifFalseChild = *ifFalseChildren.begin();
             bool validTypes =
               Type::hasLeastUpperBound(ifTrueChild->type, ifFalseChild->type);

             // In addition, after we move code outside of curr then we need to
             // not change unreachability - if we did, we'd need to propagate
             // that further, and we leave such work to DCE and Vacuum anyhow.
             // This can happen in something like this for example, where the
             // outer type changes from i32 to unreachable if we move the
             // returns outside:
             //
             //  (if (result i32)
             //    (local.get $x)
             //    (return
             //      (local.get $y)
             //    )
             //    (return
             //      (local.get $z)
             //    )
             //  )
             assert(curr->ifTrue->type == curr->ifFalse->type);
             auto newOuterType = curr->ifTrue->type;
             if ((newOuterType == Type::unreachable) !=
                 (curr->type == Type::unreachable)) {
               validTypes = false;
             }

             // If the expression we are about to move outside has side effects,
             // then we cannot do so in general with a select: we'd be reducing
             // the amount of the effects as well as moving them. For an if,
             // the side effects execute once, so there is no problem.
             // TODO: handle certain side effects when possible in select
             bool validEffects = std::is_same<T, If>::value ||
                                 !ShallowEffectAnalyzer(getPassOptions(),
                                                        getModule()->features,
                                                        curr->ifTrue)
                                    .hasSideEffects();

             if (validTypes && validEffects) {
               // Replace ifTrue with its child.
               curr->ifTrue = ifTrueChild;
               // Relace ifFalse with its child, and reuse that node outside.
               auto* reuse = curr->ifFalse;
               curr->ifFalse = ifFalseChild;
               // curr's type may have changed, if the instructions we moved out
               // had different input types than output types.
               curr->finalize();
               // Point to curr from the code that is now outside of it.
               *ChildIterator(reuse).begin() = curr;
               if (!chain.empty()) {
                 // We've already moved things out, so chain them to there. That
                 // is, the end of the chain should now point to reuse (which
                 // in turn already points to curr).
                 *ChildIterator(chain.back()).begin() = reuse;
               }
               chain.push_back(reuse);
               continue;
             }
           }
         }
         break;
       }
       if (!chain.empty()) {
         // The beginning of the chain is the new top parent.
         return replaceCurrent(chain[0]);
       }
     }
   }
 };

 Pass* createOptimizeInstructionsPass() { return new OptimizeInstructions; }

 } // namespace wasm