lib/Parser/pnodediff.h - external/github.com/Microsoft/ChakraCore - Git at Google

 //-------------------------------------------------------------------------------------------------------
 // Copyright (C) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #pragma once

 //-----------------------------------------------------------------------------
 enum EditKind
 {
     // No change.
     None = 0,

     // Node value was updated.
     Update,

     // Node was inserted.
     Insert,

     // Node was deleted.
     Delete,

     // Node changed parent.
     Move,

     // Node changed position within its parent. The parent nodes of the old node and the new node are matching.
     Reorder,
 };

 //-----------------------------------------------------------------------------
 // Calculates Longest Common Subsequence.
 // This uses the basic version in
 //
 //      EUGENE W. MYERS: An O(ND) Difference Algorithm and Its Variations
 //
 // The idea is that LCS is a dual problem of shortest path in an edit graph. The edit graph is a grid of lengthA
 // columns and lengthB rows. A path starts from (0,0) and moves toward (lengthA, lengthB).
 //  - A horizontal move (i,j) -> (i+1,j) represents deleting A[i].
 //  - A vertical move (i,j) -> (i,j+1) represents inserting B[j].
 //  - A diagonal move (i,j) -> (i+1,j+1) represents a match, A[i] == B[j].
 // Each diagonal move represents a match. We want more diagonal moves. Let diagonal move cost 0, horizontal or
 // vertical move each costs 1. The basic algorithm is a greedy algorithm to find a shortest path from (0,0) to
 // (lengthA, lengthB).
 //
 // Terms:
 //  diagonal k: The diagonal where x-y==k.
 //  d-path: A path starting from (0,0) with d number of horizontal or vertical moves. Or, its length is d (note
 //          that each horizontal/vertical move costs 1, diagonal move costs 0).
 //
 //          0-path can only move along and end on diagonal 0.
 //          1-path can only end on diagonal -1 or 1.
 //          d-path can end on diagonal [-d, -d+2, ..., d-2, d].
 //
 // The basic algorithm tries to find the smallest d, where there is a d-path reaches (lengthA, lengthB).
 //-----------------------------------------------------------------------------
 template <class Allocator>
 class LongestCommonSubsequence
 {
 private:
     // Stores d-path furthest reaching endpoints. They can be on diagonal [-d, -d+2, ..., d-2, d].
     class EndPoints
     {
     private:
         int d;
         int x[]; // Stores x for endpoints on the (d+1) diagonals. y == x - k.

         EndPoints(int d) : d(d)
         {
         }

     public:
         int getd() const
         {
             return d;
         }

         // Get x of furthest reaching endpoint on diagonal k.
         int operator[](int k) const
         {
             Assert(k >= -d && k <= d && (d - k) % 2 == 0); // k must be in [-d, -d+2, ..., d-2, d]
             int i = (k + d) / 2;
             return x[i];
         }

         // Get x reference of furthest reaching endpoint on diagonal k.
         int& operator[](int k)
         {
             Assert(k >= -d && k <= d && (d - k) % 2 == 0); // k must be in [-d, -d+2, ..., d-2, d]
             int i = (k + d) / 2;
             return x[i];
         }

         static EndPoints* New(Allocator* alloc, int d)
         {
             Assert(d >= 0);
             return AllocatorNewPlusLeaf(Allocator, alloc, sizeof(int) * (d + 1), EndPoints, d);
         }

         void Destroy(Allocator* alloc)
         {
             AllocatorDeletePlusLeaf(Allocator, alloc, sizeof(int) * (d + 1), this);
         }
     };

     // Represents an EditGraph for finding LCS
     class EditGraph
     {
     private:
         typedef JsUtil::List<EndPoints*, Allocator> EndPointsList;

         EndPointsList m_endPoints; // Stores endPoints for paths: -1, 0, 1, ..., d
         int m_diagonal;             // The final diagonal found on d-path that reaches destination

         // Add EndPoints storage for d-path
         EndPoints* AddEndPoints(int d)
         {
             int i = m_endPoints.Add(nullptr);
             EndPoints* e = EndPoints::New(m_endPoints.GetAllocator(), d);
             m_endPoints.Item(i, e);
             return e;
         }

     public:
         EditGraph(Allocator* alloc) : m_endPoints(alloc) {}

         ~EditGraph()
         {
             Allocator* alloc = m_endPoints.GetAllocator();
             m_endPoints.Map([=](int, EndPoints* e)
             {
                 if (e)
                 {
                     e->Destroy(alloc);
                 }
             });
         }

         //
         // This is the basic algorithm to find a shortest path in the edit graph from (0,0) to (lengthA, lengthB).
         // We iterate through d=0,1,2,... to find smallest d, where one d-path reaches (lengthA, lengthB).
         //  - d-path must end on diagonal [-d, -d+2, ..., d-2, d]
         //  - A furthest reaching d-path on diagonal k is composed of a furthest reaching d-1 path on diagonal k-1 or k+1,
         //    followed by a vertical or horizontal move, followed by moving along diagonal k.
         //
         template <class ItemEquals>
         void FindPath(int lengthA, int lengthB, const ItemEquals& equals)
         {
             Assert(m_endPoints.Empty()); // Only support one FindPath

             int maxD;
             if (Int32Math::Add(lengthA, lengthB, &maxD) || maxD > INT_MAX / 2) // Limits maxD to simplify overflow handling
             {
                 Math::DefaultOverflowPolicy();
             }

             // Pre-add virtual path -1
             {
                 EndPoints& pre = *AddEndPoints(1);
                 pre[1] = 0;
             }

             bool found = false;
             for (int d = 0; d <= maxD && !found; d++)
             {
                 const EndPoints& v = *m_endPoints.Item(d);      // d-1 path
                 EndPoints& cur = *AddEndPoints(d);              // d path

                 for (int k = -d; k <= d; k += 2)
                 {
                     const bool verticalMove = (k == -d || (k != d && v[k - 1] < v[k + 1]));
                     int x = verticalMove ? v[k + 1] : v[k - 1] + 1;
                     int y = x - k;

                     while (x < lengthA && y < lengthB && equals(x, y))
                     {
                         x++;
                         y++;
                     }
                     cur[k] = x; // furthest reaching end point

                     if (x == lengthA && y == lengthB)
                     {
                         m_diagonal = k;
                         found = true;
                         break;
                     }
                 }
             }

             Assert(found);
         }

         template <class Func>
         void MapEdits(const Func& map) const
         {
             // m_endPoints contains endPoints for paths: -1, 0, 1, ..., d
             int d = m_endPoints.Count() - 2;
             int k = m_diagonal;

             for (; d >= 0; d--)
             {
                 const EndPoints& v = *m_endPoints.Item(d);          // d-1 path
                 const EndPoints& cur = *m_endPoints.Item(d + 1);    // d path
                 Assert(cur.getd() == d);

                 const bool verticalMove = (k == -d || (k != d && v[k - 1] < v[k + 1]));
                 int x0 = verticalMove ? v[k + 1] : v[k - 1] + 1;

                 int x = cur[k];
                 int y = x - k;
                 while (x > x0)
                 {
                     map(EditKind::Update, --x, --y);
                 }

                 if (verticalMove)
                 {
                     if (d > 0) // Don't emit virtual initial move from path -1 to 0
                     {
                         map(EditKind::Insert, -1, --y);
                     }
                     k++;
                 }
                 else
                 {
                     map(EditKind::Delete, --x, -1);
                     k--;
                 }
             }
         }
     };

     struct Edit
     {
         EditKind kind;
         int indexA;
         int indexB;

         Edit() {}
         Edit(EditKind kind, int indexA, int indexB) :
             kind(kind), indexA(indexA), indexB(indexB)
         {
             Assert((kind == EditKind::Insert && indexA == -1 && indexB >= 0)
                 || (kind == EditKind::Delete && indexA >= 0 && indexB == -1)
                 || (kind == EditKind::Update && indexA >= 0 && indexB >= 0));
         }
     };

     typedef JsUtil::List<Edit, Allocator, /*isLeaf*/true> EditList;

     EditList m_edits;

 public:
     template <class ItemEquals>
     LongestCommonSubsequence(Allocator* alloc, int lengthA, int lengthB, const ItemEquals& equals) :
         m_edits(alloc)
     {
         EditGraph graph(alloc);
         graph.FindPath(lengthA, lengthB, equals);
         graph.MapEdits([this](EditKind kind, int indexA, int indexB)
         {
             m_edits.Add(Edit(kind, indexA, indexB));
         });
     }

     template <class Func>
     void MapEdits(const Func& map) const
     {
         for (int i = m_edits.Count() - 1; i >= 0; i--)
         {
             const Edit& e = m_edits.Item(i);
             map(e.kind, e.indexA, e.indexB);
         }
     }

     template <class Func>
     void MapMatches(const Func& map) const
     {
         MapEdits([&](EditKind kind, int indexA, int indexB)
         {
             if (kind == EditKind::Update)
             {
                 map(indexA, indexB);
             }
         });
     }
 };

 //
 // Returns a distance [0..1] of the specified sequences. The smaller distance the more of their elements match.
 //
 template <class Allocator, class ItemEquals>
 double ComputeLongestCommonSubsequenceDistance(Allocator* alloc, int lengthA, int lengthB, const ItemEquals& equals)
 {
     Assert(lengthA >= 0 && lengthB >= 0);
     if (lengthA == 0 || lengthB == 0)
     {
         return (lengthA == lengthB) ? 0.0 : 1.0;
     }

     int lcsLength = 0;
     LongestCommonSubsequence<Allocator> lcs(alloc, lengthA, lengthB, equals);
     lcs.MapMatches([&](int, int)
     {
         ++lcsLength;
     });

     return 1.0 - (double)lcsLength / (double)max(lengthA, lengthB);
 }

 //-----------------------------------------------------------------------------
 // Base class for TreeComparers, used with TreeMatch. TreeComparers specify parse node details.
 //-----------------------------------------------------------------------------
 template <class SubClass, class Node>
 struct TreeComparerBase
 {
     typedef Node Node;
     typedef Node* PNode;

     static const double ExactMatchDistance;
     static const double EpsilonDistance;

     const SubClass* pThis() const { return static_cast<const SubClass*>(this); }
     SubClass* pThis() { return static_cast<SubClass*>(this); }

     // The number of distinct labels used in the tree.
     int LabelCount() const { return 0; }

     // Returns an integer label corresponding to the given node.
     // Returned value must be within [0, LabelCount).
     int GetLabel(PNode x) const { return 0; }

     // Returns N > 0 if the node with specified label can't change its N-th ancestor node, zero otherwise.
     // 1st ancestor is the node's parent node.
     // 2nd ancestor is the node's grandparent node.
     // etc.
     int TiedToAncestor(int label) { return 0; }

     // Calculates the distance [0..1] of two nodes.
     // The more similar the nodes the smaller the distance.
     //
     // Used to determine whether two nodes of the same label match.
     // Even if 0 is returned the nodes might be slightly different.
     double GetDistance(PNode x, PNode y) const { return 0; }

     // Returns true if the specified nodes have equal values.
     // Called with matching nodes (oldNode, newNode).
     // Return true if the values of the nodes are the same, or their difference is not important.
     bool ValuesEqual(PNode oldNode, PNode newNode) const { return true; }

     PNode GetParent(PNode x) const { return nullptr; }

     bool TryGetParent(PNode x, _Out_ PNode* p) const
     {
         *p = pThis()->GetParent(x);
         return *p != nullptr;
     }

     PNode GetAncestor(PNode node, int level) const
     {
         while (level > 0)
         {
             node = pThis()->GetParent(node);
             level--;
         }

         return node;
     }

     // Map children nodes of x
     template <class Func>
     void MapChildren(PNode x, const Func& func) const {}

     // Map all descendant nodes of x (not including x itself)
     template <class Func>
     void MapDescendants(PNode x, const Func& func) const
     {
         pThis()->MapChildren(x, [&](PNode child)
         {
             func(child);
             MapDescendants(child, func);
         });
     }

     // Map every node in the (sub)tree x.
     template <class Func>
     void MapTree(PNode x, const Func& func) const
     {
         func(x);
         pThis()->MapDescendants(x, func);
     }

     // Return true if specified nodes belong to the same tree. For debug only.
     bool TreesEqual(PNode left, PNode right) const { return true; }
 };

 template <class SubClass, class Node> const double TreeComparerBase<SubClass, Node>::ExactMatchDistance = 0.0;
 template <class SubClass, class Node> const double TreeComparerBase<SubClass, Node>::EpsilonDistance = 0.00001;

 //-----------------------------------------------------------------------------
 // Tree match algorithm, based on general algorithm described in
 //      Change Detection in Hierarchically Structured Information
 //      by Sudarshan S. Chawathe, Anand Rajaraman, Hector Garcia-Molina, and Jennifer Widom
 //
 // Derived from Roslyn implementation.
 //-----------------------------------------------------------------------------
 template <class TreeComparer, class Allocator>
 class TreeMatch
 {
 public:
     // ParseNodes are owned by Parser arena. Considered leaf here.
     typedef typename TreeComparer::PNode PNode;
     typedef JsUtil::List<PNode, Allocator, /*isLeaf*/true> NodeList;
     typedef JsUtil::BaseDictionary<PNode, PNode, typename ForceLeafAllocator<Allocator>::AllocatorType> NodeMap;

 private:
     static const double ExactMatchDistance;
     static const double EpsilonDistance;
     static const double MatchingDistance1;
     static const double MatchingDistance2;
     static const double MatchingDistance3;
     static const double MaxDistance;

     Allocator* alloc;
     const PNode root1;
     const PNode root2;
     TreeComparer comparer;

     NodeMap* oneToTwo;
     NodeMap* twoToOne;

 public:
     TreeMatch(Allocator* alloc, PNode root1, PNode root2, const TreeComparer& comparer = TreeComparer()) :
         alloc(alloc), root1(root1), root2(root2), comparer(comparer)
     {
         const int labelCount = comparer.LabelCount();

         // calculate chains (not including root node)
         AutoAllocatorObjectArrayPtr<NodeList, Allocator> nodes1(AllocatorNewArrayZ(Allocator, alloc, NodeList*, labelCount), labelCount, alloc);
         AutoAllocatorObjectArrayPtr<NodeList, Allocator> nodes2(AllocatorNewArrayZ(Allocator, alloc, NodeList*, labelCount), labelCount, alloc);
         int count1 = CategorizeNodesByLabels(root1, labelCount, nodes1);
         int count2 = CategorizeNodesByLabels(root2, labelCount, nodes2);

         AutoAllocatorObjectPtr<NodeMap, Allocator> map1(AllocatorNew(Allocator, alloc, NodeMap, alloc, count1), alloc);
         AutoAllocatorObjectPtr<NodeMap, Allocator> map2(AllocatorNew(Allocator, alloc, NodeMap, alloc, count2), alloc);
         this->oneToTwo = map1;
         this->twoToOne = map2;

         ComputeMatch(nodes1, nodes2, labelCount);

         // Succeeded. Detach local objects that are now owned by this instance.
         map1.Detach();
         map2.Detach();
     }

     ~TreeMatch()
     {
         DeleteObject<Allocator>(alloc, oneToTwo);
         DeleteObject<Allocator>(alloc, twoToOne);
     }

     const TreeComparer& Comparer() const { return comparer; }
     PNode OldRoot() const { return root1;  }
     PNode NewRoot() const { return root2; }

     bool HasPartnerInTree1(PNode node2) const
     {
         Assert(comparer.TreesEqual(node2, root2));
         return twoToOne->ContainsKey(node2);
     }

     bool HasPartnerInTree2(PNode node1) const
     {
         Assert(comparer.TreesEqual(node1, root1));
         return oneToTwo->ContainsKey(node1);
     }

     bool TryGetPartnerInTree1(PNode node2, PNode* partner1) const
     {
         Assert(comparer.TreesEqual(node2, root2));
         return twoToOne->TryGetValue(node2, partner1);
     }

     bool TryGetPartnerInTree2(PNode node1, PNode* partner2) const
     {
         Assert(comparer.TreesEqual(node1, root1));
         return oneToTwo->TryGetValue(node1, partner2);
     }

     bool Contains(PNode node1, PNode node2) const
     {
         Assert(comparer.TreesEqual(node2, root2));

         PNode partner2;
         return TryGetPartnerInTree2(node1, &partner2) && node2 == partner2;
     }

 private:
     int CategorizeNodesByLabels(PNode root, int labelCount, _Out_writes_(labelCount) NodeList* nodes[])
     {
         int count = 0;
         comparer.MapDescendants(root, [&](PNode node)
         {
             int label = comparer.GetLabel(node);
             Assert(label >= 0 && label < labelCount);

             NodeList* list = nodes[label];
             if (!list)
             {
                 list = NodeList::New(alloc);
                 nodes[label] = list;
             }

             list->Add(node);
             count++;
         });

         return count;
     }

     void ComputeMatch(_In_reads_(labelCount) NodeList* nodes1[], _In_reads_(labelCount) NodeList* nodes2[], int labelCount)
     {
         // Root nodes always match but they might have been added as knownMatches
         if (!HasPartnerInTree2(root1))
         {
             Add(root1, root2);
         }

         // --- The original FastMatch algorithm ---
         //
         // For each leaf label l, and then for each internal node label l do:
         // a) S1 := chain T1(l)
         // b) S2 := chain T2(l)
         // c) lcs := LCS(S1, S2, Equal)
         // d) For each pair of nodes (x,y) in lcs add (x,y) to M.
         // e) Pair unmatched nodes with label l as in Algorithm Match, adding matches to M:
         //    For each unmatched node x in T1, if there is an unmatched node y in T2 such that equal(x,y)
         //    then add (x,y) to M.
         //
         // equal(x,y) is defined as follows:
         //   x, y are leafs => equal(x,y) := label(x) == label(y) && compare(value(x), value(y)) <= f
         //   x, y are nodes => equal(x,y) := label(x) == label(y) && |common(x,y)| / max(|x|, |y|) > t
         // where f, t are constants.
         //
         // --- Actual implementation ---
         //
         // We also categorize nodes by their labels, but then we proceed differently:
         //
         // 1) A label may be marked "tied to parent". Let x, y have both label l and l is "tied to parent".
         //    Then (x,y) can be in M only if (parent(x), parent(y)) in M.
         //    Thus we require labels of children tied to a parent to be preceded by all their possible parent labels.
         //
         // 2) Rather than defining function equal in terms of constants f and t, which are hard to get right,
         //    we try to match multiple times with different thresholds for node distance.
         //    The comparer defines the distance [0..1] between two nodes and it can do so by analyzing
         //    the node structure and value. The comparer can tune the distance specifically for each node kind.
         //    We first try to match nodes of the same labels to the exactly matching or almost matching counterparts.
         //    Then we keep increasing the threshold and keep adding matches.
         for (int label = 0; label < labelCount; label++)
         {
             if (nodes1[label] && nodes2[label])
             {
                 ComputeMatchForLabel(label, *nodes1[label], *nodes2[label]);
             }
         }
     }

     void ComputeMatchForLabel(int label, NodeList& s1, NodeList& s2)
     {
         int tiedToAncestor = comparer.TiedToAncestor(label);

         ComputeMatchForLabel(s1, s2, tiedToAncestor, EpsilonDistance);     // almost exact match
         ComputeMatchForLabel(s1, s2, tiedToAncestor, MatchingDistance1);   // ok match
         ComputeMatchForLabel(s1, s2, tiedToAncestor, MatchingDistance2);   // ok match
         ComputeMatchForLabel(s1, s2, tiedToAncestor, MatchingDistance3);   // ok match
         ComputeMatchForLabel(s1, s2, tiedToAncestor, MaxDistance);         // any match
     }

     void ComputeMatchForLabel(NodeList& s1, NodeList& s2, int tiedToAncestor, double maxAcceptableDistance)
     {
         // Obviously, the algorithm below is O(n^2). However, in the common case, the 2 lists will
         // be sequences that exactly match. The purpose of "firstNonMatch2" is to reduce the complexity
         // to O(n) in this case. Basically, the pointer is the 1st non-matched node in the list of nodes of tree2
         // with the given label.
         // Whenever we match to firstNonMatch2 we set firstNonMatch2 to the subsequent node.
         // So in the case of totally matching sequences, we process them in O(n) -
         // both node1 and firstNonMatch2 will be advanced simultaneously.

         UnmatchedIterator i1(s1);
         for (;;)
         {
             PNode node1 = i1.GetNextUnmatched();
             if (!node1) break;
             Assert(!HasPartnerInTree2(node1));

             // Find node2 that matches node1 the best, i.e. has minimal distance.

             double bestDistance = MaxDistance;
             PNode bestMatch = nullptr;
             int bestMatchIndex = -1; // node1's best match index in list2
             bool matched = false;
             UnmatchedIterator i2(s2);

             for (;;)
             {
                 PNode node2 = i2.GetNextUnmatched();
                 if (!node2) break;
                 Assert(!HasPartnerInTree1(node2));

                 // this requires parents to be processed before their children:
                 if (tiedToAncestor > 0)
                 {
                     // TODO: For nodes tied to their parents,
                     // consider avoiding matching them to all other nodes of the same label.
                     // Rather we should only match them with their siblings that share the same parent.

                     PNode ancestor1 = comparer.GetAncestor(node1, tiedToAncestor);
                     PNode ancestor2 = comparer.GetAncestor(node2, tiedToAncestor);
                     Assert(comparer.GetLabel(ancestor1) < comparer.GetLabel(node1));

                     if (!Contains(ancestor1, ancestor2))
                     {
                         continue;
                     }
                 }

                 // We know that
                 // 1. (node1, node2) not in M
                 // 2. Both of their parents are matched to the same parent (or are not matched)
                 //
                 // Now, we have no other choice than comparing the node "values"
                 // and looking for the one with the smaller distance.
                 //
                 double distance = comparer.GetDistance(node1, node2);
                 if (distance < bestDistance)
                 {
                     matched = true;
                     bestMatch = node2;
                     bestMatchIndex = i2.CurIndex();
                     bestDistance = distance;

                     // We only stop if we've got an exact match. This is to resolve the problem
                     // of entities with identical names(name is often used as the "value" of a
                     // node) but with different "sub-values" (e.g. two locals may have the same name
                     // but different types. Since the type is not part of the value, we don't want
                     // to stop looking for the best match if we don't have an exact match).
                     if (distance == ExactMatchDistance)
                     {
                         break;
                     }
                 }
             }

             if (matched && bestDistance <= maxAcceptableDistance)
             {
                 Add(node1, bestMatch);

                 i1.MarkCurrentMatched();        // i1's match is current node1
                 i2.MarkMatched(bestMatchIndex); // i2's match is one of the nodes examined in the above for(;;) pass
             }
         }
     }

     void Add(PNode node1, PNode node2)
     {
         Assert(comparer.TreesEqual(node1, root1));
         Assert(comparer.TreesEqual(node2, root2));

         oneToTwo->Add(node1, node2);
         twoToOne->Add(node2, node1);
     }

     // The customized Match algorithm iterates over the 2 node lists, compares every unmatched node pair to match nodes.
     // To find the next unmatched node, original algorithm iterates over every node in each list, use a dictionary lookup
     // to test if the node has been matched or not, until it sees next unmatched node. This could be very expensive if the
     // lists are huge. E.g., assume the only diff is inserting a new node at the beginning of list2. Then for each node in
     // list1, it checks every node starting from the beginning new node in list2 for next unmatched node. This results in
     // O(N^2) dictionary lookups. And we do 5 passes of these.
     //
     // To improve on this, we can try to record every match span and directly jump to next unmatched position. Note that
     // in both lists once a node is matched, the list entry is no longer used. We can reuse that space to record extra info.
     //      * Original PNode pointer value must be at even address. The list item must have 0 at bit0 (lowest bit).
     //      * Once a node is matched, mark 1 at bit0. With this we can get rid of dictionary lookup.
     //      * Next, for each matched entry, use the upper bits to record "next" unmatched index. Try to maintain match span,
     //        so that from a matched node we can directly jump to next unmatched index.
     //
     // This class is for above purpose. Expected call pattern:
     //      * GetNextUnmatched, [MarkCurrentMatched], GetNextUnmatched, [MarkCurrentMatched], ...
     //          -- (A) With first MarkCurrentMatched we know the start of a match span.
     //          -- (B) Subsequent MarkCurrentMatched indicates continuous match span.
     //          -- (C) When MarkCurrentMatched is not called for an entry, we know the end of a match span. Record the whole
     //             span (A)->(C). If walked again we would directly jump from (A) to (C).
     //      * Random MarkMatched(i)
     //          -- We don't know the exact match span. Just mark this entry "i" as matched, but set its "next" (upper bits) to 0.
     //          -- During next pass, we can merge all adjacent match spans and individual matched entries to bigger match spans.
     //             This would help next pass (we have 5).
     //
     class UnmatchedIterator
     {
     private:
         NodeList& list;
         int lastMatched;    // last matched node index. -1 means no known last matched index.
         int index;          // current examining index. Only moved by GetNextUnmatched().

     public:
         UnmatchedIterator(NodeList& list) :
             list(list),
             lastMatched(-1),
             index(-1)
         {
             VerifySize(list);
         }

         ~UnmatchedIterator()
         {
             // If we have lastMatched, we could have one of following:
             //      * index is matched by MarkCurrentMatched(). Link lastMatched -> index (== lastMatched). GetNextUnmatched() can handle it.
             //      * index remains unmatched (ends a matched sequence). Link lastMatched -> index.
             //      * index is out of range. That means [lastMatched, ...end) are all matched. Link lastMatched -> index (out of range).
             //
             if (lastMatched >= 0)
             {
                 SetNext(lastMatched, index);
             }
         }

         PNode GetNextUnmatched()
         {
             // If current ends a matched sequence, make a link [lastMatched -> current).
             if (lastMatched >= 0 && !IsMatched(index))
             {
                 SetNext(lastMatched, index);
                 lastMatched = -1;
             }

             ++index;
             if (index < list.Count())
             {
                 if (IsMatched(index))
                 {
                     if (lastMatched < 0) // Check if current starts a matched sequence
                     {
                         lastMatched = index;
                     }

                     // Jumps all matched span, until sees an unmatched entry or the end.
                     int next;
                     while (index < list.Count() && IsNext(list.Item(index), &next))
                     {
                         index = max(next, index + 1); // Ensure moves forward (next could be 0, from individual MarkMatched() call).
                     }
                 }

                 if (index < list.Count())
                 {
                     return list.Item(index);
                 }
             }

             return nullptr;
         }

         int CurIndex() const { return index; }

         void MarkMatched(int i)
         {
             if (i == index)
             {
                 MarkCurrentMatched();
             }
             else
             {
                 SetMatched(i);
             }
         }

         void MarkCurrentMatched()
         {
             Assert(!IsMatched(index));
             SetMatched(index);

             if (lastMatched < 0) // If current starts a matched sequence
             {
                 lastMatched = index;
             }
         }

     private:
         static void VerifySize(const NodeList& list)
         {
             if (list.Count() > INT_MAX / 2) // Limit max size as we used bit0
             {
                 Math::DefaultOverflowPolicy();
             }
         }

         template <class P>
         static void SetMatched(P& node)
         {
             SetNext(node, 0);
         }

         static bool IsMatched(PNode node)
         {
             return !!(reinterpret_cast<UINT_PTR>(node) & 1);
         }

         template <class P>
         static void SetNext(P& node, int next)
         {
             UINT_PTR value = (static_cast<UINT_PTR>(next) << 1) | 1;
             node = reinterpret_cast<PNode>(value);
         }

         static bool IsNext(PNode node, _Out_ int* next)
         {
             UINT_PTR value = reinterpret_cast<UINT_PTR>(node);
             if (value & 1)
             {
                 *next = static_cast<int>(value >> 1);
                 return true;
             }

             return false;
         }

         void SetMatched(int i) { SetMatched(list.Item(i)); }
         bool IsMatched(int i) const { return IsMatched(list.Item(i)); }
         void SetNext(int i, int next) { SetNext(list.Item(i), next); }
         bool IsNext(int i, _Out_ int* next) const { return IsNext(list.Item(i), next); }
     };
 };

 template <class TreeComparer, class Allocator> const double TreeMatch<TreeComparer, Allocator>::ExactMatchDistance = TreeComparer::ExactMatchDistance;
 template <class TreeComparer, class Allocator> const double TreeMatch<TreeComparer, Allocator>::EpsilonDistance = TreeComparer::EpsilonDistance;
 template <class TreeComparer, class Allocator> const double TreeMatch<TreeComparer, Allocator>::MatchingDistance1 = 0.5;
 template <class TreeComparer, class Allocator> const double TreeMatch<TreeComparer, Allocator>::MatchingDistance2 = 1.0;
 template <class TreeComparer, class Allocator> const double TreeMatch<TreeComparer, Allocator>::MatchingDistance3 = 1.5;
 template <class TreeComparer, class Allocator> const double TreeMatch<TreeComparer, Allocator>::MaxDistance = 2.0;

 //-----------------------------------------------------------------------------
 // Represents an edit operation on a tree or a sequence of nodes.
 //-----------------------------------------------------------------------------
 template <class PNode>
 class Edit
 {
 private:
     EditKind kind;
     PNode node1;
     PNode node2;

 public:
     Edit() {}

     //
     //  Insert      nullptr    NewNode
     //  Delete      OldNode nullptr
     //  Move/Update OldNode NewNode
     //
     Edit(EditKind kind, PNode node1, PNode node2) :
         kind(kind), node1(node1), node2(node2)
     {
         Assert((node1 == nullptr) == (kind == EditKind::Insert));
         Assert((node2 == nullptr) == (kind == EditKind::Delete));
     }

     EditKind Kind() const { return kind; }
     PNode OldNode() const { return node1; }
     PNode NewNode() const { return node2; }
 };

 //-----------------------------------------------------------------------------
 // Represents a sequence of tree edits.
 //-----------------------------------------------------------------------------
 template <class TreeComparer, class Allocator>
 class EditScript
 {
 public:
     typedef TreeMatch<TreeComparer, Allocator> TreeMatch;
     typedef typename TreeMatch::PNode PNode;
     typedef typename TreeMatch::NodeList NodeList;
     typedef typename TreeMatch::NodeMap NodeMap;
     typedef JsUtil::List<Edit<PNode>, Allocator, /*isLeaf*/true> EditList;

 private:
     const TreeMatch& match;
     TreeComparer comparer;
     EditList edits;

 public:
     EditScript(Allocator* alloc, const TreeMatch& match) :
         match(match), comparer(match.Comparer()), edits(alloc)
     {
         AddUpdatesInsertsMoves();
         AddDeletes();
     }

     const EditList& Edits() const { return edits; }

 private:
     PNode Root1() const { return match.OldRoot(); }
     PNode Root2() const { return match.NewRoot(); }

     void AddUpdatesInsertsMoves()
     {
         // Breadth-first traversal.
         ProcessNode(Root2());

         JsUtil::Queue<PNode, Allocator> queue(edits.GetAllocator());
         queue.Enqueue(Root2());

         while (!queue.Empty())
         {
             PNode head = queue.Dequeue();
             comparer.MapChildren(head, [&](PNode child)
             {
                 ProcessNode(child);
                 queue.Enqueue(child);
             });
         }
     }

     void ProcessNode(PNode x)
     {
         Assert(comparer.TreesEqual(x, Root2()));

         // NOTE:
         // Our implementation differs from the algorithm described in the paper in following:
         // - We don't update M' and T1 since we don't need the final matching and the transformed tree.
         // - Insert and Move edits don't need to store the offset of the nodes relative to their parents,
         //   so we don't calculate those. Thus we don't need to implement FindPos.
         // - We don't mark nodes "in order" since the marks are only needed by FindPos.

         // a)
         // Let x be the current node in the breadth-first search of T2.
         // Let y = parent(x).
         // Let z be the partner of parent(x) in M'.  (note: we don't need z for insert)
         //
         // NOTE:
         // If we needed z then we would need to be updating M' as we encounter insertions.

         PNode w;
         bool hasPartner = match.TryGetPartnerInTree1(x, &w);

         PNode y;
         bool hasParent = comparer.TryGetParent(x, &y);

         if (!hasPartner)
         {
             // b) If x has no partner in M'.
             //   i. k := FindPos(x)
             //  ii. Append INS((w, a, value(x)), z, k) to E for a new identifier w.
             // iii. Add (w, x) to M' and apply INS((w, a, value(x)), z, k) to T1.
             edits.Add(Edit<PNode>(EditKind::Insert, /*node1*/nullptr, /*node2*/x));

             // NOTE:
             // We don't update M' here.
         }
         else if (hasParent)
         {
             // c) else if x is not a root
             // i. Let w be the partner of x in M', and let v = parent(w) in T1.
             PNode v = comparer.GetParent(w);

             // ii. if value(w) != value(x)
             // A. Append UPD(w, value(x)) to E
             // B. Apply UPD(w, value(x) to T1

             // Let the Comparer decide whether an update should be added to the edit list.
             // The Comparer defines what changes in node values it cares about.
             if (!comparer.ValuesEqual(w, x))
             {
                 edits.Add(Edit<PNode>(EditKind::Update, /*node1*/w, /*node2*/x));
             }

             // If parents of w and x don't match, it's a move.
             // iii. if not (v, y) in M'
             // NOTE: The paper says (y, v) but that seems wrong since M': T1 -> T2 and w,v in T1 and x,y in T2.
             if (!match.Contains(v, y))
             {
                 // A. Let z be the partner of y in M'. (NOTE: z not needed)
                 // B. k := FindPos(x)
                 // C. Append MOV(w, z, k)
                 // D. Apply MOV(w, z, k) to T1
                 edits.Add(Edit<PNode>(EditKind::Move, /*node1*/w, /*node2*/x));
             }
         }

         // d) AlignChildren(w, x)

         // NOTE: If we just applied an INS((w, a, value(x)), z, k) operation on tree T1
         // the newly created node w would have no children. So there is nothing to align.
         if (hasPartner)
         {
             AlignChildren(w, x);
         }
     }

     void AddDeletes()
     {
         // 3. Do a post-order traversal of T1.
         //    a) Let w be the current node in the post-order traversal of T1.
         //    b) If w has no partner in M' then append DEL(w) to E and apply DEL(w) to T1.
         //
         // NOTE: The fact that we haven't updated M' during the Insert phase
         // doesn't affect Delete phase. The original algorithm inserted new node n1 into T1
         // when an insertion INS(n1, n2) was detected. It also added (n1, n2) to M'.
         // Then in Delete phase n1 is visited but nothing is done since it has a partner n2 in M'.
         // Since we don't add n1 into T1, not adding (n1, n2) to M' doesn't affect the Delete phase.

         comparer.MapDescendants(Root1(), [&](PNode w)
         {
             if (!match.HasPartnerInTree2(w))
             {
                 edits.Add(Edit<PNode>(EditKind::Delete, /*node1*/w, /*node2*/nullptr));
             }
         });
     }

     void AlignChildren(PNode w, PNode x)
     {
         Assert(comparer.TreesEqual(w, Root1()));
         Assert(comparer.TreesEqual(x, Root2()));
         Allocator* alloc = edits.GetAllocator();

         // Step 1
         //  Make all children of w and and all children x "out of order"
         //  NOTE: We don't need to mark nodes "in order".

         // Step 2
         //  Let S1 be the sequence of children of w whose partner are children
         //  of x and let S2 be the sequence of children of x whose partner are
         //  children of w.
         NodeList s1(alloc), s2(alloc);
         if (!TryGetMatchedChildren(s1, w, x, [&](PNode e, PNode* partner) { return match.TryGetPartnerInTree2(e, partner); }) ||
             !TryGetMatchedChildren(s2, x, w, [&](PNode e, PNode* partner) { return match.TryGetPartnerInTree1(e, partner); }))
         {
             return;
         }

         // Step 3, 4
         //  Define the function Equal(a,b) to be true if and only if  (a,b) in M'
         //  Let S <- LCS(S1, S2, Equal)
         NodeMap s(alloc);
         {
             LongestCommonSubsequence<Allocator> lcs(alloc, s1.Count(), s2.Count(), [&](int indexA, int indexB)
             {
                 return match.Contains(s1.Item(indexA), s2.Item(indexB));
             });

             lcs.MapMatches([&](int indexA, int indexB)
             {
                 s.AddNew(s1.Item(indexA), s2.Item(indexB));
             });
         }

         // Step 5
         //  For each (a,b) in S, mark nodes a and b "in order"
         //  NOTE: We don't need to mark nodes "in order".

         // Step 6
         //  For each a in S1, b in S2 such that (a,b) in M but (a,b) not in S
         //   (a) k <- FindPos(b)
         //   (b) Append MOV(a,w,k) to E and apply MOV(a,w,k) to T1
         //   (c) Mark a and b "in order"
         //       NOTE: We don't mark nodes "in order".
         s1.Map([&](int index, PNode a)
         {
             PNode b;
             if (match.TryGetPartnerInTree2(a, &b)   // (a,b) in M
                 && comparer.GetParent(b) == x       // => b in S2 since S2 == { b | parent(b) == x && parent(partner(b)) == w }
                 && !ContainsPair(s, a, b))          // (a,b) not in S
             {
                 Assert(comparer.TreesEqual(a, Root1()));
                 Assert(comparer.TreesEqual(b, Root2()));

                 edits.Add(Edit<PNode>(EditKind::Reorder, /*node1*/a, /*node2*/b));
             }
         });
     }

     // Helper: Get the sequence of children of x whose partner are children of y.
     template <class TryGetPartnerFunc>
     bool TryGetMatchedChildren(NodeList& nodes, PNode x, PNode y, const TryGetPartnerFunc& tryGetPartner)
     {
         Assert(nodes.Empty());
         comparer.MapChildren(x, [&](PNode e)
         {
             PNode partner;
             if (tryGetPartner(e, &partner) && comparer.GetParent(partner) == y)
             {
                 nodes.Add(e);
             }
         });
         return !nodes.Empty();
     }

     static bool ContainsPair(const NodeMap& dict, PNode a, PNode b)
     {
         PNode value;
         return dict.TryGetValue(a, &value) && value == b;
     }
 };