| /* |
| * Copyright 2016 WebAssembly Community Group participants |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // |
| // Memory Packing. |
| // |
| // Reduces binary size by splitting data segments around ranges of zeros. This |
| // pass assumes that memory initialized by active segments is zero on |
| // instantiation and therefore simply drops the zero ranges from the active |
| // segments. For passive segments, we perform the same splitting, but we also |
| // record how each segment was split and update all instructions that use it |
| // accordingly. To preserve trapping semantics for memory.init instructions, it |
| // is sometimes necessary to explicitly track whether input segments would have |
| // been dropped in globals. We are careful to emit only as many of these globals |
| // as necessary. |
| // |
| |
| #include "ir/manipulation.h" |
| #include "ir/module-utils.h" |
| #include "ir/names.h" |
| #include "ir/utils.h" |
| #include "pass.h" |
| #include "support/space.h" |
| #include "support/stdckdint.h" |
| #include "wasm-binary.h" |
| #include "wasm-builder.h" |
| #include "wasm-limits.h" |
| #include "wasm.h" |
| |
| namespace wasm { |
| |
| namespace { |
| |
| // A subsection of an orginal memory segment. If `isZero` is true, memory.fill |
| // will be used instead of memory.init for this range. |
| struct Range { |
| bool isZero; |
| // The range [start, end) - that is, start is included while end is not. |
| size_t start; |
| size_t end; |
| }; |
| |
| // A function that produces the transformed instruction. We need to use a |
| // function here instead of simple data because the replacement code sequence |
| // may require allocating new locals, which in turn requires the enclosing |
| // Function, which is only available in the parallelized instruction replacement |
| // phase. However, we can't move the entire calculation of replacement code |
| // sequences into the parallel phase because the lowering of data.drops depends |
| // on the lowering of memory.inits to determine whether a drop state global is |
| // necessary. The solution is that we calculate the shape of the replacement |
| // code sequence up front and use a closure just to allocate and insert new |
| // locals as necessary. |
| using Replacement = std::function<Expression*(Function*)>; |
| |
| // Maps each instruction to the replacement that must be applied to it. |
| using Replacements = std::unordered_map<Expression*, Replacement>; |
| |
| // A collection of instructions referring to a particular segment. |
| using Referrers = std::vector<Expression*>; |
| |
| // Map segment indices to referrers. |
| using ReferrersMap = std::unordered_map<Name, Referrers>; |
| |
| // memory.init: 2 byte opcode + 1 byte segment index + 1 byte memory index + |
| // 3 x 2 byte operands |
| const size_t MEMORY_INIT_SIZE = 10; |
| |
| // memory.fill: 2 byte opcode + 1 byte memory index + 3 x 2 byte operands |
| const size_t MEMORY_FILL_SIZE = 9; |
| |
| // data.drop: 2 byte opcode + ~1 byte index immediate |
| const size_t DATA_DROP_SIZE = 3; |
| |
| Expression* |
| makeGtShiftedMemorySize(Builder& builder, Module& module, MemoryInit* curr) { |
| auto mem = module.getMemory(curr->memory); |
| return builder.makeBinary( |
| mem->is64() ? GtUInt64 : GtUInt32, |
| curr->dest, |
| builder.makeBinary(mem->is64() ? ShlInt64 : ShlInt32, |
| builder.makeMemorySize(mem->name), |
| builder.makeConstPtr(16, mem->indexType))); |
| } |
| |
| } // anonymous namespace |
| |
| struct MemoryPacking : public Pass { |
| // This pass operates on linear memory, and does not affect reference locals. |
| // TODO: don't run at all if the module has no memories |
| bool requiresNonNullableLocalFixups() override { return false; } |
| |
| void run(Module* module) override; |
| bool canOptimize(std::vector<std::unique_ptr<Memory>>& memories, |
| std::vector<std::unique_ptr<DataSegment>>& dataSegments); |
| void optimizeSegmentOps(Module* module); |
| void getSegmentReferrers(Module* module, ReferrersMap& referrers); |
| void dropUnusedSegments(Module* module, |
| std::vector<std::unique_ptr<DataSegment>>& segments, |
| ReferrersMap& referrers); |
| bool canSplit(const std::unique_ptr<DataSegment>& segment, |
| const Referrers& referrers); |
| void calculateRanges(Module* module, |
| const std::unique_ptr<DataSegment>& segment, |
| const Referrers& referrers, |
| std::vector<Range>& ranges); |
| void createSplitSegments(Builder& builder, |
| const DataSegment* segment, |
| std::vector<Range>& ranges, |
| std::vector<std::unique_ptr<DataSegment>>& packed, |
| size_t segmentsRemaining); |
| void createReplacements(Module* module, |
| const std::vector<Range>& ranges, |
| const std::vector<Name>& segments, |
| const Referrers& referrers, |
| Replacements& replacements); |
| void replaceSegmentOps(Module* module, Replacements& replacements); |
| }; |
| |
| void MemoryPacking::run(Module* module) { |
| // Does not have multimemory support |
| if (!canOptimize(module->memories, module->dataSegments)) { |
| return; |
| } |
| |
| bool canHaveSegmentReferrers = |
| module->features.hasBulkMemory() || module->features.hasGC(); |
| |
| auto& segments = module->dataSegments; |
| |
| // For each segment, a list of instructions that refer to it |
| ReferrersMap referrers; |
| |
| if (canHaveSegmentReferrers) { |
| // Optimize out memory.inits and data.drops that can be entirely replaced |
| // with other instruction sequences. This can increase the number of unused |
| // segments that can be dropped entirely and allows later replacement |
| // creation to make more assumptions about what these instructions will look |
| // like, such as memory.inits not having both zero offset and size. |
| optimizeSegmentOps(module); |
| getSegmentReferrers(module, referrers); |
| dropUnusedSegments(module, segments, referrers); |
| } |
| |
| // The new, split memory segments |
| std::vector<std::unique_ptr<DataSegment>> packed; |
| |
| Replacements replacements; |
| Builder builder(*module); |
| for (size_t index = 0; index < segments.size(); ++index) { |
| auto& segment = segments[index]; |
| auto& currReferrers = referrers[segment->name]; |
| |
| std::vector<Range> ranges; |
| |
| if (canSplit(segment, currReferrers)) { |
| calculateRanges(module, segment, currReferrers, ranges); |
| } else { |
| // A single range covers the entire segment. Set isZero to false so the |
| // original memory.init will be used even if segment is all zeroes. |
| ranges.push_back({false, 0, segment->data.size()}); |
| } |
| |
| size_t segmentsRemaining = segments.size() - index; |
| size_t currSegmentsStart = packed.size(); |
| createSplitSegments( |
| builder, segment.get(), ranges, packed, segmentsRemaining); |
| std::vector<Name> currSegmentNames; |
| for (size_t i = currSegmentsStart; i < packed.size(); ++i) { |
| currSegmentNames.push_back(packed[i]->name); |
| } |
| createReplacements( |
| module, ranges, currSegmentNames, currReferrers, replacements); |
| } |
| |
| segments.swap(packed); |
| module->updateDataSegmentsMap(); |
| |
| if (canHaveSegmentReferrers) { |
| replaceSegmentOps(module, replacements); |
| } |
| } |
| |
| bool MemoryPacking::canOptimize( |
| std::vector<std::unique_ptr<Memory>>& memories, |
| std::vector<std::unique_ptr<DataSegment>>& dataSegments) { |
| if (memories.empty() || memories.size() > 1) { |
| return false; |
| } |
| auto& memory = memories[0]; |
| // We must optimize under the assumption that memory has been initialized to |
| // zero. That is the case for a memory declared in the module, but for a |
| // memory that is imported, we must be told that it is zero-initialized. |
| if (memory->imported() && !getPassOptions().zeroFilledMemory) { |
| return false; |
| } |
| |
| // One segment is always ok to optimize, as it does not have the potential |
| // problems handled below. |
| if (dataSegments.size() <= 1) { |
| return true; |
| } |
| // Check if it is ok for us to optimize. |
| Address maxAddress = 0; |
| for (auto& segment : dataSegments) { |
| if (!segment->isPassive) { |
| auto* c = segment->offset->dynCast<Const>(); |
| // If an active segment has a non-constant offset, then what gets written |
| // cannot be known until runtime. That is, the active segments are written |
| // out at startup, in order, and one may trample the data of another, like |
| // |
| // (data (i32.const 100) "a") |
| // (data (i32.const 100) "\00") |
| // |
| // It is *not* ok to optimize out the zero in the last segment, as it is |
| // actually needed, it will zero out the "a" that was written earlier. And |
| // if a segment has an imported offset, |
| // |
| // (data (i32.const 100) "a") |
| // (data (global.get $x) "\00") |
| // |
| // then we can't tell if that last segment will end up overwriting or not. |
| // The only case we can easily handle is if there is just a single |
| // segment, which we handled earlier. (Note that that includes the main |
| // case of having a non-constant offset, dynamic linking, in which we have |
| // a single segment.) |
| if (!c) { |
| return false; |
| } |
| // Note the maximum address so far. |
| maxAddress = std::max( |
| maxAddress, Address(c->value.getUnsigned() + segment->data.size())); |
| } |
| } |
| // All active segments have constant offsets, known at this time, so we may be |
| // able to optimize, but must still check for the trampling problem mentioned |
| // earlier. |
| // TODO: optimize in the trampling case |
| DisjointSpans space; |
| for (auto& segment : dataSegments) { |
| if (!segment->isPassive) { |
| auto* c = segment->offset->cast<Const>(); |
| Address start = c->value.getUnsigned(); |
| DisjointSpans::Span span{start, start + segment->data.size()}; |
| if (space.addAndCheckOverlap(span)) { |
| std::cerr << "warning: active memory segments have overlap, which " |
| << "prevents some optimizations.\n"; |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| bool MemoryPacking::canSplit(const std::unique_ptr<DataSegment>& segment, |
| const Referrers& referrers) { |
| // Don't mess with segments related to llvm coverage tools such as |
| // __llvm_covfun. There segments are expected/parsed by external downstream |
| // tools (llvm-cov) so they need to be left intact. |
| // See https://clang.llvm.org/docs/SourceBasedCodeCoverage.html |
| if (segment->name.is() && segment->name.startsWith("__llvm")) { |
| return false; |
| } |
| |
| if (segment->data.empty()) { |
| // Ignore empty segments, leaving them in place. We may not need them, but |
| // leave that for RemoveUnusedModuleElements to decide (as they may trap |
| // during startup if out of bounds, which is an effect). |
| return false; |
| } |
| |
| for (auto* referrer : referrers) { |
| if (auto* curr = referrer->dynCast<MemoryInit>()) { |
| if (segment->isPassive) { |
| // Do not try to split if there is a nonconstant offset or size |
| if (!curr->offset->is<Const>() || !curr->size->is<Const>()) { |
| return false; |
| } |
| } |
| } else if (referrer->is<ArrayNewData>() || referrer->is<ArrayInitData>()) { |
| // TODO: Split segments referenced by GC instructions. |
| return false; |
| } |
| } |
| |
| // Active segments can only be split if they have constant offsets |
| return segment->isPassive || segment->offset->is<Const>(); |
| } |
| |
| void MemoryPacking::calculateRanges(Module* module, |
| const std::unique_ptr<DataSegment>& segment, |
| const Referrers& referrers, |
| std::vector<Range>& ranges) { |
| auto& data = segment->data; |
| if (data.size() == 0) { |
| return; |
| } |
| |
| // A segment that might trap during startup must be handled carefully, as we |
| // do not want to remove that trap (unless we are allowed to by TNH). |
| auto preserveTrap = !getPassOptions().trapsNeverHappen && segment->offset; |
| if (preserveTrap) { |
| // Check if we can rule out a trap by it being in bounds. |
| if (auto* c = segment->offset->dynCast<Const>()) { |
| auto* memory = module->getMemory(segment->memory); |
| auto memorySize = memory->initial * Memory::kPageSize; |
| Index start = c->value.getUnsigned(); |
| Index size = segment->data.size(); |
| Index end; |
| if (!std::ckd_add(&end, start, size) && end <= memorySize) { |
| // This is in bounds. |
| preserveTrap = false; |
| } |
| } |
| } |
| |
| // Calculate initial zero and nonzero ranges |
| size_t start = 0; |
| while (start < data.size()) { |
| size_t end = start; |
| while (end < data.size() && data[end] == 0) { |
| end++; |
| } |
| if (end > start) { |
| ranges.push_back({true, start, end}); |
| start = end; |
| } |
| while (end < data.size() && data[end] != 0) { |
| end++; |
| } |
| if (end > start) { |
| ranges.push_back({false, start, end}); |
| start = end; |
| } |
| } |
| |
| // Calculate the number of consecutive zeroes for which splitting is |
| // beneficial. This is an approximation that assumes all memory.inits cover an |
| // entire segment and that all its arguments are constants. These assumptions |
| // are true of all memory.inits generated by the tools. |
| size_t threshold = 0; |
| if (segment->isPassive) { |
| // Passive segment metadata size |
| threshold += 2; |
| // Zeroes on the edge do not increase the number of segments or data.drops, |
| // so their threshold is lower. The threshold for interior zeroes depends on |
| // an estimate of the number of new memory.fill and data.drop instructions |
| // splitting would introduce. |
| size_t edgeThreshold = 0; |
| for (auto* referrer : referrers) { |
| if (referrer->is<MemoryInit>()) { |
| // Splitting adds a new memory.fill and a new memory.init |
| threshold += MEMORY_FILL_SIZE + MEMORY_INIT_SIZE; |
| edgeThreshold += MEMORY_FILL_SIZE; |
| } else { |
| threshold += DATA_DROP_SIZE; |
| } |
| } |
| |
| // Merge edge zeroes if they are not worth splitting. Note that we must not |
| // have a trap we must preserve here (if we did, we'd need to handle that in |
| // the code below). |
| assert(!preserveTrap); |
| if (ranges.size() >= 2) { |
| auto last = ranges.end() - 1; |
| auto penultimate = ranges.end() - 2; |
| if (last->isZero && last->end - last->start <= edgeThreshold) { |
| penultimate->end = last->end; |
| ranges.erase(last); |
| } |
| } |
| if (ranges.size() >= 2) { |
| auto first = ranges.begin(); |
| auto second = ranges.begin() + 1; |
| if (first->isZero && first->end - first->start <= edgeThreshold) { |
| second->start = first->start; |
| ranges.erase(first); |
| } |
| } |
| } else { |
| // Legacy ballpark overhead of active segment with offset. |
| // TODO: Tune this |
| threshold = 8; |
| } |
| |
| // Merge ranges across small spans of zeroes. |
| std::vector<Range> mergedRanges = {ranges.front()}; |
| size_t i; |
| for (i = 1; i < ranges.size() - 1; ++i) { |
| auto left = mergedRanges.end() - 1; |
| auto curr = ranges.begin() + i; |
| auto right = ranges.begin() + i + 1; |
| if (curr->isZero && curr->end - curr->start <= threshold) { |
| left->end = right->end; |
| ++i; |
| } else { |
| mergedRanges.push_back(*curr); |
| } |
| } |
| // Add the final range if it hasn't already been merged in. |
| if (i < ranges.size()) { |
| mergedRanges.push_back(ranges.back()); |
| } |
| // If we need to preserve a trap then we must keep the topmost byte of the |
| // segment, which is enough to cause a trap if we do in fact trap. |
| if (preserveTrap) { |
| // Check if the last byte is in a zero range. Such a range will be dropped |
| // later, so add a non-zero range with that byte. (It is slightly odd to |
| // add a range with a zero marked as non-zero, but that is how we ensure it |
| // is preserved later in the output.) |
| auto& back = mergedRanges.back(); |
| if (back.isZero) { |
| // Remove the last byte from |back|. Decrementing this prevents it from |
| // overlapping with the new segment we are about to add. Note that this |
| // might make |back| have size 0, but that is not a problem as it will be |
| // dropped later anyhow, since it contains zeroes. |
| back.end--; |
| auto lastByte = data.size() - 1; |
| mergedRanges.push_back({false, lastByte, lastByte + 1}); |
| } |
| } |
| std::swap(ranges, mergedRanges); |
| } |
| |
| void MemoryPacking::optimizeSegmentOps(Module* module) { |
| struct Optimizer : WalkerPass<PostWalker<Optimizer>> { |
| bool isFunctionParallel() override { return true; } |
| |
| // This operates on linear memory, and does not affect reference locals. |
| bool requiresNonNullableLocalFixups() override { return false; } |
| |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<Optimizer>(); |
| } |
| |
| bool needsRefinalizing; |
| |
| void visitMemoryInit(MemoryInit* curr) { |
| Builder builder(*getModule()); |
| auto* segment = getModule()->getDataSegment(curr->segment); |
| size_t maxRuntimeSize = segment->isPassive ? segment->data.size() : 0; |
| bool mustNop = false; |
| bool mustTrap = false; |
| auto* offset = curr->offset->dynCast<Const>(); |
| auto* size = curr->size->dynCast<Const>(); |
| if (offset && uint32_t(offset->value.geti32()) > maxRuntimeSize) { |
| mustTrap = true; |
| } |
| if (size && uint32_t(size->value.geti32()) > maxRuntimeSize) { |
| mustTrap = true; |
| } |
| if (offset && size) { |
| uint64_t offsetVal(offset->value.geti32()); |
| uint64_t sizeVal(size->value.geti32()); |
| if (offsetVal + sizeVal > maxRuntimeSize) { |
| mustTrap = true; |
| } else if (offsetVal == 0 && sizeVal == 0) { |
| mustNop = true; |
| } |
| } |
| assert(!mustNop || !mustTrap); |
| if (mustNop) { |
| // Offset and size are 0, so just trap if dest > memory.size |
| replaceCurrent( |
| builder.makeIf(makeGtShiftedMemorySize(builder, *getModule(), curr), |
| builder.makeUnreachable())); |
| } else if (mustTrap) { |
| // Drop dest, offset, and size then trap |
| replaceCurrent(builder.blockify(builder.makeDrop(curr->dest), |
| builder.makeDrop(curr->offset), |
| builder.makeDrop(curr->size), |
| builder.makeUnreachable())); |
| needsRefinalizing = true; |
| } else if (!segment->isPassive) { |
| // trap if (dest > memory.size | offset | size) != 0 |
| replaceCurrent(builder.makeIf( |
| builder.makeBinary( |
| OrInt32, |
| makeGtShiftedMemorySize(builder, *getModule(), curr), |
| builder.makeBinary(OrInt32, curr->offset, curr->size)), |
| builder.makeUnreachable())); |
| } |
| } |
| void visitDataDrop(DataDrop* curr) { |
| if (!getModule()->getDataSegment(curr->segment)->isPassive) { |
| ExpressionManipulator::nop(curr); |
| } |
| } |
| void doWalkFunction(Function* func) { |
| needsRefinalizing = false; |
| super::doWalkFunction(func); |
| if (needsRefinalizing) { |
| ReFinalize().walkFunctionInModule(func, getModule()); |
| } |
| } |
| } optimizer; |
| optimizer.run(getPassRunner(), module); |
| } |
| |
| void MemoryPacking::getSegmentReferrers(Module* module, |
| ReferrersMap& referrers) { |
| auto collectReferrers = [&](Function* func, ReferrersMap& referrers) { |
| if (func->imported()) { |
| return; |
| } |
| struct Collector |
| : WalkerPass<PostWalker<Collector, UnifiedExpressionVisitor<Collector>>> { |
| ReferrersMap& referrers; |
| Collector(ReferrersMap& referrers) : referrers(referrers) {} |
| |
| void visitExpression(Expression* curr) { |
| #define DELEGATE_ID curr->_id |
| |
| #define DELEGATE_START(id) [[maybe_unused]] auto* cast = curr->cast<id>(); |
| |
| #define DELEGATE_GET_FIELD(id, field) cast->field |
| |
| #define DELEGATE_FIELD_TYPE(id, field) |
| #define DELEGATE_FIELD_HEAPTYPE(id, field) |
| #define DELEGATE_FIELD_CHILD(id, field) |
| #define DELEGATE_FIELD_OPTIONAL_CHILD(id, field) |
| #define DELEGATE_FIELD_INT(id, field) |
| #define DELEGATE_FIELD_LITERAL(id, field) |
| #define DELEGATE_FIELD_NAME(id, field) |
| #define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field) |
| #define DELEGATE_FIELD_SCOPE_NAME_USE(id, field) |
| #define DELEGATE_FIELD_ADDRESS(id, field) |
| |
| #define DELEGATE_FIELD_NAME_KIND(id, field, kind) \ |
| if (kind == ModuleItemKind::DataSegment) { \ |
| referrers[cast->field].push_back(curr); \ |
| } |
| |
| #include "wasm-delegations-fields.def" |
| } |
| } collector(referrers); |
| collector.walkFunctionInModule(func, module); |
| }; |
| ModuleUtils::ParallelFunctionAnalysis<ReferrersMap> analysis( |
| *module, collectReferrers); |
| for (auto& [_, funcReferrersMap] : analysis.map) { |
| for (auto& [i, segReferrers] : funcReferrersMap) { |
| referrers[i].insert( |
| referrers[i].end(), segReferrers.begin(), segReferrers.end()); |
| } |
| } |
| } |
| |
| void MemoryPacking::dropUnusedSegments( |
| Module* module, |
| std::vector<std::unique_ptr<DataSegment>>& segments, |
| ReferrersMap& referrers) { |
| std::vector<std::unique_ptr<DataSegment>> usedSegments; |
| // Remove segments that are never used |
| // TODO: remove unused portions of partially used segments as well |
| for (size_t i = 0; i < segments.size(); ++i) { |
| bool used = false; |
| auto referrersIt = referrers.find(segments[i]->name); |
| bool hasReferrers = referrersIt != referrers.end(); |
| if (segments[i]->isPassive) { |
| if (hasReferrers) { |
| for (auto* referrer : referrersIt->second) { |
| if (!referrer->is<DataDrop>()) { |
| used = true; |
| break; |
| } |
| } |
| } |
| } else { |
| // Active segment. |
| used = true; |
| } |
| if (used) { |
| usedSegments.push_back(std::move(segments[i])); |
| } else if (hasReferrers) { |
| // All referrers are data.drops. Make them nops. |
| for (auto* referrer : referrersIt->second) { |
| ExpressionManipulator::nop(referrer); |
| } |
| } |
| } |
| std::swap(segments, usedSegments); |
| module->updateDataSegmentsMap(); |
| } |
| |
| // Given the start of a segment and an offset into it, compute the sum of the |
| // two to get the absolute address the data should be at. This takes into |
| // account overflows in that we saturate to UINT_MAX: we do not want an overflow |
| // to take us down into a small address; in the invalid case of an overflow we |
| // stay at the largest possible unsigned value, which will keep us trapping. |
| template<typename T> |
| Expression* addStartAndOffset(T start, T offset, Builder& builder) { |
| T total; |
| if (std::ckd_add(&total, start, offset)) { |
| total = std::numeric_limits<T>::max(); |
| } |
| return builder.makeConst(T(total)); |
| } |
| |
| void MemoryPacking::createSplitSegments( |
| Builder& builder, |
| const DataSegment* segment, |
| std::vector<Range>& ranges, |
| std::vector<std::unique_ptr<DataSegment>>& packed, |
| size_t segmentsRemaining) { |
| size_t segmentCount = 0; |
| bool hasExplicitName = false; |
| for (size_t i = 0; i < ranges.size(); ++i) { |
| Range& range = ranges[i]; |
| if (range.isZero) { |
| continue; |
| } |
| Expression* offset = nullptr; |
| if (!segment->isPassive) { |
| if (auto* c = segment->offset->dynCast<Const>()) { |
| if (c->value.type == Type::i32) { |
| offset = addStartAndOffset<uint32_t>( |
| range.start, c->value.geti32(), builder); |
| } else { |
| assert(c->value.type == Type::i64); |
| offset = addStartAndOffset<uint64_t>( |
| range.start, c->value.geti64(), builder); |
| } |
| } else { |
| assert(ranges.size() == 1); |
| offset = segment->offset; |
| } |
| } |
| if (WebLimitations::MaxDataSegments <= packed.size() + segmentsRemaining) { |
| // Give up splitting and merge all remaining ranges except end zeroes |
| auto lastNonzero = ranges.end() - 1; |
| if (lastNonzero->isZero) { |
| --lastNonzero; |
| } |
| range.end = lastNonzero->end; |
| ranges.erase(ranges.begin() + i + 1, lastNonzero + 1); |
| } |
| Name name; |
| if (segment->name.is()) { |
| // Name the first range after the original segment and all following |
| // ranges get numbered accordingly. This means that for segments that |
| // canot be split (segments that contains a single range) the input and |
| // output segment have the same name. |
| if (!segmentCount) { |
| name = segment->name; |
| hasExplicitName = segment->hasExplicitName; |
| } else { |
| name = segment->name.toString() + "." + std::to_string(segmentCount); |
| } |
| segmentCount++; |
| } |
| auto curr = Builder::makeDataSegment(name, |
| segment->memory, |
| segment->isPassive, |
| offset, |
| segment->data.data() + range.start, |
| range.end - range.start); |
| curr->hasExplicitName = hasExplicitName; |
| packed.push_back(std::move(curr)); |
| } |
| } |
| |
| void MemoryPacking::createReplacements(Module* module, |
| const std::vector<Range>& ranges, |
| const std::vector<Name>& segments, |
| const Referrers& referrers, |
| Replacements& replacements) { |
| // If there was no transformation, we do not need to do anything. |
| if (ranges.size() == 1 && !ranges.front().isZero) { |
| return; |
| } |
| |
| Builder builder(*module); |
| |
| Name dropStateGlobal; |
| |
| // Return the drop state global, initializing it if it does not exist. This |
| // may change module-global state and has the important side effect of setting |
| // dropStateGlobal, so it must be evaluated eagerly, not in the replacements. |
| auto getDropStateGlobal = [&]() { |
| if (dropStateGlobal != Name()) { |
| return dropStateGlobal; |
| } |
| dropStateGlobal = |
| Names::getValidGlobalName(*module, "__mem_segment_drop_state"); |
| module->addGlobal(builder.makeGlobal(dropStateGlobal, |
| Type::i32, |
| builder.makeConst(int32_t(0)), |
| Builder::Mutable)); |
| return dropStateGlobal; |
| }; |
| |
| // Create replacements for memory.init instructions first |
| for (auto referrer : referrers) { |
| auto* init = referrer->dynCast<MemoryInit>(); |
| if (init == nullptr) { |
| continue; |
| } |
| |
| // Nonconstant offsets or sizes will have inhibited splitting |
| size_t start = init->offset->cast<Const>()->value.geti32(); |
| size_t end = start + init->size->cast<Const>()->value.geti32(); |
| |
| // Index in `segments` of the segment used in emitted memory.init |
| // instructions |
| size_t initIndex = 0; |
| |
| // Index of the range from which this memory.init starts reading |
| size_t firstRangeIdx = 0; |
| while (firstRangeIdx < ranges.size() && |
| ranges[firstRangeIdx].end <= start) { |
| if (!ranges[firstRangeIdx].isZero) { |
| ++initIndex; |
| } |
| ++firstRangeIdx; |
| } |
| |
| // Handle zero-length memory.inits separately so we can later assume that |
| // start is in bounds and that some range will be intersected. |
| if (start == end) { |
| // Offset is nonzero because init would otherwise have previously been |
| // optimized out, so trap if the dest is out of bounds or the segment is |
| // dropped |
| Expression* result = builder.makeIf( |
| builder.makeBinary( |
| OrInt32, |
| makeGtShiftedMemorySize(builder, *module, init), |
| builder.makeGlobalGet(getDropStateGlobal(), Type::i32)), |
| builder.makeUnreachable()); |
| replacements[init] = [result](Function*) { return result; }; |
| continue; |
| } |
| |
| assert(firstRangeIdx < ranges.size()); |
| |
| // Split init into multiple memory.inits and memory.fills, storing the |
| // original base destination in a local if it is not a constant. If the |
| // first access is a memory.fill, explicitly check the drop status first to |
| // avoid writing zeroes when we should have trapped. |
| Expression* result = nullptr; |
| auto appendResult = [&](Expression* expr) { |
| result = result ? builder.blockify(result, expr) : expr; |
| }; |
| |
| // The local var holding the dest is not known until replacement time. Keep |
| // track of the locations where it will need to be patched in. |
| Index* setVar = nullptr; |
| std::vector<Index*> getVars; |
| if (!init->dest->is<Const>()) { |
| auto set = builder.makeLocalSet(-1, init->dest); |
| setVar = &set->index; |
| appendResult(set); |
| } |
| |
| // We only need to explicitly check the drop state when we will emit |
| // memory.fill first, since memory.init will implicitly do the check for us. |
| if (ranges[firstRangeIdx].isZero) { |
| appendResult( |
| builder.makeIf(builder.makeGlobalGet(getDropStateGlobal(), Type::i32), |
| builder.makeUnreachable())); |
| } |
| |
| size_t bytesWritten = 0; |
| |
| auto is64 = module->getMemory(init->memory)->is64(); |
| for (size_t i = firstRangeIdx; i < ranges.size() && ranges[i].start < end; |
| ++i) { |
| auto& range = ranges[i]; |
| |
| // Calculate dest, either as a const or as an addition to the dest local |
| Expression* dest; |
| Type ptrType = module->getMemory(init->memory)->indexType; |
| if (auto* c = init->dest->dynCast<Const>()) { |
| dest = |
| builder.makeConstPtr(c->value.getInteger() + bytesWritten, ptrType); |
| } else { |
| auto* get = builder.makeLocalGet(-1, ptrType); |
| getVars.push_back(&get->index); |
| dest = get; |
| if (bytesWritten > 0) { |
| Const* addend = builder.makeConstPtr(bytesWritten, ptrType); |
| dest = builder.makeBinary(is64 ? AddInt64 : AddInt32, dest, addend); |
| } |
| } |
| |
| // How many bytes are read from this range |
| size_t bytes = std::min(range.end, end) - std::max(range.start, start); |
| bytesWritten += bytes; |
| |
| // Create new memory.init or memory.fill |
| if (range.isZero) { |
| Expression* value = builder.makeConst(Literal::makeZero(Type::i32)); |
| Expression* size = builder.makeConstPtr(bytes, ptrType); |
| appendResult(builder.makeMemoryFill(dest, value, size, init->memory)); |
| } else { |
| size_t offsetBytes = std::max(start, range.start) - range.start; |
| Expression* offset = builder.makeConst(int32_t(offsetBytes)); |
| Expression* size = builder.makeConst(int32_t(bytes)); |
| appendResult(builder.makeMemoryInit( |
| segments[initIndex], dest, offset, size, init->memory)); |
| initIndex++; |
| } |
| } |
| |
| // Non-zero length memory.inits must have intersected some range |
| assert(result); |
| replacements[init] = |
| [module, init, setVar, getVars, result](Function* function) { |
| if (setVar != nullptr) { |
| auto indexType = module->getMemory(init->memory)->indexType; |
| Index destVar = Builder(*module).addVar(function, indexType); |
| *setVar = destVar; |
| for (auto* getVar : getVars) { |
| *getVar = destVar; |
| } |
| } |
| return result; |
| }; |
| } |
| |
| // Create replacements for data.drop instructions now that we know whether we |
| // need a drop state global |
| for (auto drop : referrers) { |
| if (!drop->is<DataDrop>()) { |
| continue; |
| } |
| |
| Expression* result = nullptr; |
| auto appendResult = [&](Expression* expr) { |
| result = result ? builder.blockify(result, expr) : expr; |
| }; |
| |
| // Track drop state in a global only if some memory.init required it |
| if (dropStateGlobal != Name()) { |
| appendResult( |
| builder.makeGlobalSet(dropStateGlobal, builder.makeConst(int32_t(1)))); |
| } |
| size_t dropIndex = 0; |
| for (auto range : ranges) { |
| if (!range.isZero) { |
| appendResult(builder.makeDataDrop(segments[dropIndex++])); |
| } |
| } |
| replacements[drop] = [result, module](Function*) { |
| return result ? result : Builder(*module).makeNop(); |
| }; |
| } |
| } |
| |
| void MemoryPacking::replaceSegmentOps(Module* module, |
| Replacements& replacements) { |
| struct Replacer : WalkerPass<PostWalker<Replacer>> { |
| bool isFunctionParallel() override { return true; } |
| |
| // This operates on linear memory, and does not affect reference locals. |
| bool requiresNonNullableLocalFixups() override { return false; } |
| |
| Replacements& replacements; |
| |
| Replacer(Replacements& replacements) : replacements(replacements){}; |
| std::unique_ptr<Pass> create() override { |
| return std::make_unique<Replacer>(replacements); |
| } |
| |
| void visitMemoryInit(MemoryInit* curr) { |
| if (auto replacement = replacements.find(curr); |
| replacement != replacements.end()) { |
| replaceCurrent(replacement->second(getFunction())); |
| } |
| } |
| |
| void visitDataDrop(DataDrop* curr) { |
| if (auto replacement = replacements.find(curr); |
| replacement != replacements.end()) { |
| replaceCurrent(replacement->second(getFunction())); |
| } |
| } |
| |
| void visitArrayNewData(ArrayNewData* curr) { |
| if (auto replacement = replacements.find(curr); |
| replacement != replacements.end()) { |
| replaceCurrent(replacement->second(getFunction())); |
| } |
| } |
| } replacer(replacements); |
| replacer.run(getPassRunner(), module); |
| } |
| |
| Pass* createMemoryPackingPass() { return new MemoryPacking(); } |
| |
| } // namespace wasm |