| /* CollationElementIterator.java -- Walks through collation elements |
| Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004, 2012 Free Software Foundation |
| |
| This file is part of GNU Classpath. |
| |
| GNU Classpath is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU Classpath is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU Classpath; see the file COPYING. If not, write to the |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301 USA. |
| |
| Linking this library statically or dynamically with other modules is |
| making a combined work based on this library. Thus, the terms and |
| conditions of the GNU General Public License cover the whole |
| combination. |
| |
| As a special exception, the copyright holders of this library give you |
| permission to link this library with independent modules to produce an |
| executable, regardless of the license terms of these independent |
| modules, and to copy and distribute the resulting executable under |
| terms of your choice, provided that you also meet, for each linked |
| independent module, the terms and conditions of the license of that |
| module. An independent module is a module which is not derived from |
| or based on this library. If you modify this library, you may extend |
| this exception to your version of the library, but you are not |
| obligated to do so. If you do not wish to do so, delete this |
| exception statement from your version. */ |
| |
| |
| package java.text; |
| |
| import gnu.java.lang.CPStringBuilder; |
| |
| import java.util.ArrayList; |
| |
| /* Written using "Java Class Libraries", 2nd edition, plus online |
| * API docs for JDK 1.2 from http://www.javasoft.com. |
| * Status: Believed complete and correct to JDK 1.1. |
| */ |
| |
| /** |
| * This class walks through the character collation elements of a |
| * <code>String</code> as defined by the collation rules in an instance of |
| * <code>RuleBasedCollator</code>. There is no public constructor for |
| * this class. An instance is created by calling the |
| * <code>getCollationElementIterator</code> method on |
| * <code>RuleBasedCollator</code>. |
| * |
| * @author Aaron M. Renn (arenn@urbanophile.com) |
| * @author Tom Tromey (tromey@cygnus.com) |
| * @author Guilhem Lavaux (guilhem.lavaux@free.fr) |
| */ |
| public final class CollationElementIterator |
| { |
| /** |
| * This is a constant value that is returned to indicate that the end of |
| * the string was encountered. |
| */ |
| public static final int NULLORDER = -1; |
| |
| /** |
| * This is the RuleBasedCollator this object was created from. |
| */ |
| RuleBasedCollator collator; |
| |
| /** |
| * This is the String that is being iterated over. |
| */ |
| CharacterIterator text; |
| |
| /** |
| * This is the index into the collation decomposition where we are currently scanning. |
| */ |
| int index; |
| |
| /** |
| * This is the index into the String where we are currently scanning. |
| */ |
| int textIndex; |
| |
| /** |
| * Array containing the collation decomposition of the |
| * text given to the constructor. |
| */ |
| private RuleBasedCollator.CollationElement[] textDecomposition; |
| |
| /** |
| * Array containing the index of the specified block. |
| */ |
| private int[] textIndexes; |
| |
| /** |
| * This method initializes a new instance of <code>CollationElementIterator</code> |
| * to iterate over the specified <code>String</code> using the rules in the |
| * specified <code>RuleBasedCollator</code>. |
| * |
| * @param collator The <code>RuleBasedCollation</code> used for calculating collation values |
| * @param text The <code>String</code> to iterate over. |
| */ |
| CollationElementIterator(RuleBasedCollator collator, String text) |
| { |
| this.collator = collator; |
| |
| setText (text); |
| } |
| |
| /** |
| * This method initializes a new instance of <code>CollationElementIterator</code> |
| * to iterate over the specified <code>String</code> using the rules in the |
| * specified <code>RuleBasedCollator</code>. |
| * |
| * @param collator The <code>RuleBasedCollation</code> used for calculating collation values |
| * @param text The character iterator to iterate over. |
| */ |
| CollationElementIterator(RuleBasedCollator collator, CharacterIterator text) |
| { |
| this.collator = collator; |
| |
| setText (text); |
| } |
| |
| RuleBasedCollator.CollationElement nextBlock() |
| { |
| if (index >= textDecomposition.length) |
| return null; |
| |
| RuleBasedCollator.CollationElement e = textDecomposition[index]; |
| |
| textIndex = textIndexes[index+1]; |
| |
| index++; |
| |
| return e; |
| } |
| |
| RuleBasedCollator.CollationElement previousBlock() |
| { |
| if (index == 0) |
| return null; |
| |
| index--; |
| RuleBasedCollator.CollationElement e = textDecomposition[index]; |
| |
| textIndex = textIndexes[index+1]; |
| |
| return e; |
| } |
| |
| /** |
| * This method returns the collation ordering value of the next character sequence |
| * in the string (it may be an extended character following collation rules). |
| * This method will return <code>NULLORDER</code> if the |
| * end of the string was reached. |
| * |
| * @return The collation ordering value. |
| */ |
| public int next() |
| { |
| RuleBasedCollator.CollationElement e = nextBlock(); |
| |
| if (e == null) |
| return NULLORDER; |
| |
| return e.getValue(); |
| } |
| |
| /** |
| * This method returns the collation ordering value of the previous character |
| * in the string. This method will return <code>NULLORDER</code> if the |
| * beginning of the string was reached. |
| * |
| * @return The collation ordering value. |
| */ |
| public int previous() |
| { |
| RuleBasedCollator.CollationElement e = previousBlock(); |
| |
| if (e == null) |
| return NULLORDER; |
| |
| return e.getValue(); |
| } |
| |
| /** |
| * This method returns the primary order value for the given collation |
| * value. |
| * |
| * @param order The collation value returned from <code>next()</code> or |
| * <code>previous()</code>. |
| * |
| * @return The primary order value of the specified collation value. This is |
| * the high 16 bits. |
| */ |
| public static int primaryOrder(int order) |
| { |
| // From the JDK 1.2 spec. |
| return order >>> 16; |
| } |
| |
| /** |
| * This method resets the internal position pointer to read from the |
| * beginning of the <code>String</code> again. |
| */ |
| public void reset() |
| { |
| index = 0; |
| textIndex = 0; |
| } |
| |
| /** |
| * This method returns the secondary order value for the given collation |
| * value. |
| * |
| * @param order The collation value returned from <code>next()</code> or |
| * <code>previous()</code>. |
| * |
| * @return The secondary order value of the specified collation value. This |
| * is the bits 8-15. |
| */ |
| public static short secondaryOrder(int order) |
| { |
| // From the JDK 1.2 spec. |
| return (short) ((order >>> 8) & 255); |
| } |
| |
| /** |
| * This method returns the tertiary order value for the given collation |
| * value. |
| * |
| * @param order The collation value returned from <code>next()</code> or |
| * <code>previous()</code>. |
| * |
| * @return The tertiary order value of the specified collation value. This |
| * is the low eight bits. |
| */ |
| public static short tertiaryOrder(int order) |
| { |
| // From the JDK 1.2 spec. |
| return (short) (order & 255); |
| } |
| |
| /** |
| * This method sets the <code>String</code> that it is iterating over |
| * to the specified <code>String</code>. |
| * |
| * @param text The new <code>String</code> to iterate over. |
| * |
| * @since 1.2 |
| */ |
| public void setText(String text) |
| { |
| int idx = 0; |
| int idx_idx = 0; |
| int alreadyExpanded = 0; |
| int idxToMove = 0; |
| |
| this.text = new StringCharacterIterator(text); |
| this.index = 0; |
| |
| String work_text = text.intern(); |
| |
| ArrayList<RuleBasedCollator.CollationElement> aElement = new ArrayList<RuleBasedCollator.CollationElement>(); |
| ArrayList<Integer> aIdx = new ArrayList<Integer>(); |
| |
| // Build element collection ordered as they come in "text". |
| while (idx < work_text.length()) |
| { |
| String key, keyOld; |
| |
| Object object = null; |
| int p = 1; |
| |
| // IMPROVE: use a TreeMap with a prefix-ordering rule. |
| keyOld = key = null; |
| do |
| { |
| if (object != null) |
| keyOld = key; |
| key = work_text.substring (idx, idx+p); |
| object = collator.prefix_tree.get (key); |
| if (object != null && idx < alreadyExpanded) |
| { |
| RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object; |
| if (prefix.expansion != null && |
| prefix.expansion.startsWith(work_text.substring(0, idx))) |
| { |
| object = null; |
| key = keyOld; |
| } |
| } |
| p++; |
| } |
| while (idx+p <= work_text.length()); |
| |
| if (object == null) |
| key = keyOld; |
| |
| RuleBasedCollator.CollationElement prefix = |
| (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key); |
| |
| /* |
| * First case: There is no such sequence in the database. |
| * We will have to build one from the context. |
| */ |
| if (prefix == null) |
| { |
| /* |
| * We are dealing with sequences in an expansion. They |
| * are treated as accented characters (tertiary order). |
| */ |
| if (alreadyExpanded > 0) |
| { |
| RuleBasedCollator.CollationElement e = |
| collator.getDefaultAccentedElement (work_text.charAt (idx)); |
| |
| aElement.add (e); |
| aIdx.add (Integer.valueOf(idx_idx)); |
| idx++; |
| alreadyExpanded--; |
| if (alreadyExpanded == 0) |
| { |
| /* There is not any characters left in the expansion set. |
| * We can increase the pointer in the source string. |
| */ |
| idx_idx += idxToMove; |
| idxToMove = 0; |
| } |
| else |
| idx_idx++; |
| } |
| else |
| { |
| /* This is a normal character. */ |
| RuleBasedCollator.CollationElement e = |
| collator.getDefaultElement (work_text.charAt (idx)); |
| Integer iRef = Integer.valueOf(idx_idx); |
| |
| /* Don't forget to mark it as a special sequence so the |
| * string can be ordered. |
| */ |
| aElement.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ); |
| aIdx.add (iRef); |
| aElement.add (e); |
| aIdx.add (iRef); |
| idx_idx++; |
| idx++; |
| } |
| continue; |
| } |
| |
| /* |
| * Second case: Here we have found a matching sequence. |
| * Here we have an expansion string prepend it to the "work text" and |
| * add the corresponding sorting element. We must also mark |
| */ |
| if (prefix.expansion != null) |
| { |
| work_text = prefix.expansion |
| + work_text.substring (idx+prefix.key.length()); |
| idx = 0; |
| aElement.add (prefix); |
| aIdx.add (Integer.valueOf(idx_idx)); |
| if (alreadyExpanded == 0) |
| idxToMove = prefix.key.length(); |
| alreadyExpanded += prefix.expansion.length()-prefix.key.length(); |
| } |
| else |
| { |
| /* Third case: the simplest. We have got the prefix and it |
| * has not to be expanded. |
| */ |
| aElement.add (prefix); |
| aIdx.add (Integer.valueOf(idx_idx)); |
| idx += prefix.key.length(); |
| /* If the sequence is in an expansion, we must decrease the |
| * counter. |
| */ |
| if (alreadyExpanded > 0) |
| { |
| alreadyExpanded -= prefix.key.length(); |
| if (alreadyExpanded == 0) |
| { |
| idx_idx += idxToMove; |
| idxToMove = 0; |
| } |
| } |
| else |
| idx_idx += prefix.key.length(); |
| } |
| } |
| |
| textDecomposition = aElement.toArray(new RuleBasedCollator.CollationElement[aElement.size()]); |
| textIndexes = new int[aIdx.size()+1]; |
| for (int i = 0; i < aIdx.size(); i++) |
| { |
| textIndexes[i] = aIdx.get(i).intValue(); |
| } |
| textIndexes[aIdx.size()] = text.length(); |
| } |
| |
| /** |
| * This method sets the <code>String</code> that it is iterating over |
| * to the <code>String</code> represented by the specified |
| * <code>CharacterIterator</code>. |
| * |
| * @param source The <code>CharacterIterator</code> containing the new |
| * <code>String</code> to iterate over. |
| */ |
| public void setText(CharacterIterator source) |
| { |
| CPStringBuilder expand = new CPStringBuilder(); |
| |
| // For now assume we read from the beginning of the string. |
| for (char c = source.first(); |
| c != CharacterIterator.DONE; |
| c = source.next()) |
| expand.append(c); |
| |
| setText(expand.toString()); |
| } |
| |
| /** |
| * This method returns the current offset into the <code>String</code> |
| * that is being iterated over. |
| * |
| * @return The iteration index position. |
| * |
| * @since 1.2 |
| */ |
| public int getOffset() |
| { |
| return textIndex; |
| } |
| |
| /** |
| * This method sets the iteration index position into the current |
| * <code>String</code> to the specified value. This value must not |
| * be negative and must not be greater than the last index position |
| * in the <code>String</code>. |
| * |
| * @param offset The new iteration index position. |
| * |
| * @exception IllegalArgumentException If the new offset is not valid. |
| */ |
| public void setOffset(int offset) |
| { |
| if (offset < 0) |
| throw new IllegalArgumentException("Negative offset: " + offset); |
| |
| if (offset > (text.getEndIndex() - 1)) |
| throw new IllegalArgumentException("Offset too large: " + offset); |
| |
| for (index = 0; index < textDecomposition.length; index++) |
| { |
| if (offset <= textIndexes[index]) |
| break; |
| } |
| /* |
| * As textIndexes[0] == 0, we should not have to take care whether index is |
| * greater than 0. It is always. |
| */ |
| if (textIndexes[index] == offset) |
| textIndex = offset; |
| else |
| textIndex = textIndexes[index-1]; |
| } |
| |
| /** |
| * This method returns the maximum length of any expansion sequence that |
| * ends with the specified collation order value. (Whatever that means). |
| * |
| * @param value The collation order value |
| * |
| * @return The maximum length of an expansion sequence. |
| */ |
| public int getMaxExpansion(int value) |
| { |
| return 1; |
| } |
| } |