| // Copyright 2013 the V8 project authors. All rights reserved. | 
 | // Redistribution and use in source and binary forms, with or without | 
 | // modification, are permitted provided that the following conditions are | 
 | // met: | 
 | // | 
 | //     * Redistributions of source code must retain the above copyright | 
 | //       notice, this list of conditions and the following disclaimer. | 
 | //     * Redistributions in binary form must reproduce the above | 
 | //       copyright notice, this list of conditions and the following | 
 | //       disclaimer in the documentation and/or other materials provided | 
 | //       with the distribution. | 
 | //     * Neither the name of Google Inc. nor the names of its | 
 | //       contributors may be used to endorse or promote products derived | 
 | //       from this software without specific prior written permission. | 
 | // | 
 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 |  | 
 | #ifndef V8_URI_H_ | 
 | #define V8_URI_H_ | 
 |  | 
 | #include "v8.h" | 
 |  | 
 | #include "string-search.h" | 
 | #include "v8utils.h" | 
 | #include "v8conversions.h" | 
 |  | 
 | namespace v8 { | 
 | namespace internal { | 
 |  | 
 |  | 
 | template <typename Char> | 
 | static INLINE(Vector<const Char> GetCharVector(Handle<String> string)); | 
 |  | 
 |  | 
 | template <> | 
 | Vector<const uint8_t> GetCharVector(Handle<String> string) { | 
 |   String::FlatContent flat = string->GetFlatContent(); | 
 |   ASSERT(flat.IsAscii()); | 
 |   return flat.ToOneByteVector(); | 
 | } | 
 |  | 
 |  | 
 | template <> | 
 | Vector<const uc16> GetCharVector(Handle<String> string) { | 
 |   String::FlatContent flat = string->GetFlatContent(); | 
 |   ASSERT(flat.IsTwoByte()); | 
 |   return flat.ToUC16Vector(); | 
 | } | 
 |  | 
 |  | 
 | class URIUnescape : public AllStatic { | 
 |  public: | 
 |   template<typename Char> | 
 |   static Handle<String> Unescape(Isolate* isolate, Handle<String> source); | 
 |  | 
 |  private: | 
 |   static const signed char kHexValue['g']; | 
 |  | 
 |   template<typename Char> | 
 |   static Handle<String> UnescapeSlow( | 
 |       Isolate* isolate, Handle<String> string, int start_index); | 
 |  | 
 |   static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); | 
 |  | 
 |   template <typename Char> | 
 |   static INLINE(int UnescapeChar(Vector<const Char> vector, | 
 |                                  int i, | 
 |                                  int length, | 
 |                                  int* step)); | 
 | }; | 
 |  | 
 |  | 
 | const signed char URIUnescape::kHexValue[] = { | 
 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
 |     -0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, | 
 |     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
 |     -1, 10, 11, 12, 13, 14, 15 }; | 
 |  | 
 |  | 
 | template<typename Char> | 
 | Handle<String> URIUnescape::Unescape(Isolate* isolate, Handle<String> source) { | 
 |   int index; | 
 |   { DisallowHeapAllocation no_allocation; | 
 |     StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%")); | 
 |     index = search.Search(GetCharVector<Char>(source), 0); | 
 |     if (index < 0) return source; | 
 |   } | 
 |   return UnescapeSlow<Char>(isolate, source, index); | 
 | } | 
 |  | 
 |  | 
 | template <typename Char> | 
 | Handle<String> URIUnescape::UnescapeSlow( | 
 |     Isolate* isolate, Handle<String> string, int start_index) { | 
 |   bool one_byte = true; | 
 |   int length = string->length(); | 
 |  | 
 |   int unescaped_length = 0; | 
 |   { DisallowHeapAllocation no_allocation; | 
 |     Vector<const Char> vector = GetCharVector<Char>(string); | 
 |     for (int i = start_index; i < length; unescaped_length++) { | 
 |       int step; | 
 |       if (UnescapeChar(vector, i, length, &step) > | 
 |               String::kMaxOneByteCharCode) { | 
 |         one_byte = false; | 
 |       } | 
 |       i += step; | 
 |     } | 
 |   } | 
 |  | 
 |   ASSERT(start_index < length); | 
 |   Handle<String> first_part = | 
 |       isolate->factory()->NewProperSubString(string, 0, start_index); | 
 |  | 
 |   int dest_position = 0; | 
 |   Handle<String> second_part; | 
 |   if (one_byte) { | 
 |     Handle<SeqOneByteString> dest = | 
 |         isolate->factory()->NewRawOneByteString(unescaped_length); | 
 |     DisallowHeapAllocation no_allocation; | 
 |     Vector<const Char> vector = GetCharVector<Char>(string); | 
 |     for (int i = start_index; i < length; dest_position++) { | 
 |       int step; | 
 |       dest->SeqOneByteStringSet(dest_position, | 
 |                                 UnescapeChar(vector, i, length, &step)); | 
 |       i += step; | 
 |     } | 
 |     second_part = dest; | 
 |   } else { | 
 |     Handle<SeqTwoByteString> dest = | 
 |         isolate->factory()->NewRawTwoByteString(unescaped_length); | 
 |     DisallowHeapAllocation no_allocation; | 
 |     Vector<const Char> vector = GetCharVector<Char>(string); | 
 |     for (int i = start_index; i < length; dest_position++) { | 
 |       int step; | 
 |       dest->SeqTwoByteStringSet(dest_position, | 
 |                                 UnescapeChar(vector, i, length, &step)); | 
 |       i += step; | 
 |     } | 
 |     second_part = dest; | 
 |   } | 
 |   return isolate->factory()->NewConsString(first_part, second_part); | 
 | } | 
 |  | 
 |  | 
 | int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { | 
 |   if (character1 > 'f') return -1; | 
 |   int hi = kHexValue[character1]; | 
 |   if (hi == -1) return -1; | 
 |   if (character2 > 'f') return -1; | 
 |   int lo = kHexValue[character2]; | 
 |   if (lo == -1) return -1; | 
 |   return (hi << 4) + lo; | 
 | } | 
 |  | 
 |  | 
 | template <typename Char> | 
 | int URIUnescape::UnescapeChar(Vector<const Char> vector, | 
 |                               int i, | 
 |                               int length, | 
 |                               int* step) { | 
 |   uint16_t character = vector[i]; | 
 |   int32_t hi = 0; | 
 |   int32_t lo = 0; | 
 |   if (character == '%' && | 
 |       i <= length - 6 && | 
 |       vector[i + 1] == 'u' && | 
 |       (hi = TwoDigitHex(vector[i + 2], | 
 |                         vector[i + 3])) != -1 && | 
 |       (lo = TwoDigitHex(vector[i + 4], | 
 |                         vector[i + 5])) != -1) { | 
 |     *step = 6; | 
 |     return (hi << 8) + lo; | 
 |   } else if (character == '%' && | 
 |       i <= length - 3 && | 
 |       (lo = TwoDigitHex(vector[i + 1], | 
 |                         vector[i + 2])) != -1) { | 
 |     *step = 3; | 
 |     return lo; | 
 |   } else { | 
 |     *step = 1; | 
 |     return character; | 
 |   } | 
 | } | 
 |  | 
 |  | 
 | class URIEscape : public AllStatic { | 
 |  public: | 
 |   template<typename Char> | 
 |   static Handle<String> Escape(Isolate* isolate, Handle<String> string); | 
 |  | 
 |  private: | 
 |   static const char kHexChars[17]; | 
 |   static const char kNotEscaped[256]; | 
 |  | 
 |   static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } | 
 | }; | 
 |  | 
 |  | 
 | const char URIEscape::kHexChars[] = "0123456789ABCDEF"; | 
 |  | 
 |  | 
 | // kNotEscaped is generated by the following: | 
 | // | 
 | // #!/bin/perl | 
 | // for (my $i = 0; $i < 256; $i++) { | 
 | //   print "\n" if $i % 16 == 0; | 
 | //   my $c = chr($i); | 
 | //   my $escaped = 1; | 
 | //   $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; | 
 | //   print $escaped ? "0, " : "1, "; | 
 | // } | 
 |  | 
 | const char URIEscape::kNotEscaped[] = { | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, | 
 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | 
 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, | 
 |     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 
 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | 
 |  | 
 |  | 
 | template<typename Char> | 
 | Handle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { | 
 |   ASSERT(string->IsFlat()); | 
 |   int escaped_length = 0; | 
 |   int length = string->length(); | 
 |  | 
 |   { DisallowHeapAllocation no_allocation; | 
 |     Vector<const Char> vector = GetCharVector<Char>(string); | 
 |     for (int i = 0; i < length; i++) { | 
 |       uint16_t c = vector[i]; | 
 |       if (c >= 256) { | 
 |         escaped_length += 6; | 
 |       } else if (IsNotEscaped(c)) { | 
 |         escaped_length++; | 
 |       } else { | 
 |         escaped_length += 3; | 
 |       } | 
 |  | 
 |       // We don't allow strings that are longer than a maximal length. | 
 |       ASSERT(String::kMaxLength < 0x7fffffff - 6);  // Cannot overflow. | 
 |       if (escaped_length > String::kMaxLength) { | 
 |         isolate->context()->mark_out_of_memory(); | 
 |         return Handle<String>::null(); | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   // No length change implies no change.  Return original string if no change. | 
 |   if (escaped_length == length) return string; | 
 |  | 
 |   Handle<SeqOneByteString> dest = | 
 |       isolate->factory()->NewRawOneByteString(escaped_length); | 
 |   int dest_position = 0; | 
 |  | 
 |   { DisallowHeapAllocation no_allocation; | 
 |     Vector<const Char> vector = GetCharVector<Char>(string); | 
 |     for (int i = 0; i < length; i++) { | 
 |       uint16_t c = vector[i]; | 
 |       if (c >= 256) { | 
 |         dest->SeqOneByteStringSet(dest_position, '%'); | 
 |         dest->SeqOneByteStringSet(dest_position+1, 'u'); | 
 |         dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]); | 
 |         dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]); | 
 |         dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]); | 
 |         dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]); | 
 |         dest_position += 6; | 
 |       } else if (IsNotEscaped(c)) { | 
 |         dest->SeqOneByteStringSet(dest_position, c); | 
 |         dest_position++; | 
 |       } else { | 
 |         dest->SeqOneByteStringSet(dest_position, '%'); | 
 |         dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]); | 
 |         dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]); | 
 |         dest_position += 3; | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   return dest; | 
 | } | 
 |  | 
 | } }  // namespace v8::internal | 
 |  | 
 | #endif  // V8_URI_H_ |