Source/WebCore/html/parser/AtomicHTMLToken.h - external/Webkit - Git at Google

 /*
  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #ifndef AtomicHTMLToken_h
 #define AtomicHTMLToken_h

 #include "Attribute.h"
 #include "CompactHTMLToken.h"
 #include "HTMLToken.h"
 #include <wtf/RefCounted.h>
 #include <wtf/RefPtr.h>

 namespace WebCore {

 class AtomicHTMLToken {
     WTF_MAKE_NONCOPYABLE(AtomicHTMLToken);
 public:

     bool forceQuirks() const
     {
         ASSERT(m_type == HTMLToken::DOCTYPE);
         return m_doctypeData->m_forceQuirks;
     }

     HTMLToken::Type type() const { return m_type; }

     const AtomicString& name() const
     {
         ASSERT(usesName());
         return m_name;
     }

     void setName(const AtomicString& name)
     {
         ASSERT(usesName());
         m_name = name;
     }

     bool selfClosing() const
     {
         ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
         return m_selfClosing;
     }

     Attribute* getAttributeItem(const QualifiedName& attributeName)
     {
         ASSERT(usesAttributes());
         return findAttributeInVector(m_attributes, attributeName);
     }

     Vector<Attribute>& attributes()
     {
         ASSERT(usesAttributes());
         return m_attributes;
     }

     const Vector<Attribute>& attributes() const
     {
         ASSERT(usesAttributes());
         return m_attributes;
     }

     const UChar* characters() const
     {
         ASSERT(m_type == HTMLToken::Character);
         return m_externalCharacters;
     }

     size_t charactersLength() const
     {
         ASSERT(m_type == HTMLToken::Character);
         return m_externalCharactersLength;
     }

     bool isAll8BitData() const
     {
         return m_isAll8BitData;
     }

     const String& comment() const
     {
         ASSERT(m_type == HTMLToken::Comment);
         return m_data;
     }

     // FIXME: Distinguish between a missing public identifer and an empty one.
     Vector<UChar>& publicIdentifier() const
     {
         ASSERT(m_type == HTMLToken::DOCTYPE);
         return m_doctypeData->m_publicIdentifier;
     }

     // FIXME: Distinguish between a missing system identifer and an empty one.
     Vector<UChar>& systemIdentifier() const
     {
         ASSERT(m_type == HTMLToken::DOCTYPE);
         return m_doctypeData->m_systemIdentifier;
     }

     explicit AtomicHTMLToken(HTMLToken& token)
         : m_type(token.type())
     {
         switch (m_type) {
         case HTMLToken::Uninitialized:
             ASSERT_NOT_REACHED();
             break;
         case HTMLToken::DOCTYPE:
             m_name = AtomicString(token.name());
             m_doctypeData = token.releaseDoctypeData();
             break;
         case HTMLToken::EndOfFile:
             break;
         case HTMLToken::StartTag:
         case HTMLToken::EndTag: {
             m_selfClosing = token.selfClosing();
             m_name = AtomicString(token.name());
             initializeAttributes(token.attributes());
             break;
         }
         case HTMLToken::Comment:
             if (token.isAll8BitData())
                 m_data = String::make8BitFrom16BitSource(token.comment());
             else
                 m_data = String(token.comment());
             break;
         case HTMLToken::Character:
             m_externalCharacters = token.characters().data();
             m_externalCharactersLength = token.characters().size();
             m_isAll8BitData = token.isAll8BitData();
             break;
         }
     }

 #if ENABLE(THREADED_HTML_PARSER)

     explicit AtomicHTMLToken(const CompactHTMLToken& token)
         : m_type(token.type())
     {
         switch (m_type) {
         case HTMLToken::Uninitialized:
             ASSERT_NOT_REACHED();
             break;
         case HTMLToken::DOCTYPE:
             m_name = token.data().asString();
             m_doctypeData = adoptPtr(new DoctypeData());
             m_doctypeData->m_hasPublicIdentifier = true;
             append(m_doctypeData->m_publicIdentifier, token.publicIdentifier().asString());
             m_doctypeData->m_hasSystemIdentifier = true;
             append(m_doctypeData->m_systemIdentifier, token.systemIdentifier());
             m_doctypeData->m_forceQuirks = token.doctypeForcesQuirks();
             break;
         case HTMLToken::EndOfFile:
             break;
         case HTMLToken::StartTag:
             m_attributes.reserveInitialCapacity(token.attributes().size());
             for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) {
                 QualifiedName name(nullAtom, it->name.asString(), nullAtom);
                 // FIXME: This is N^2 for the number of attributes.
                 if (!findAttributeInVector(m_attributes, name))
                     m_attributes.append(Attribute(name, it->value));
             }
             // Fall through!
         case HTMLToken::EndTag:
             m_selfClosing = token.selfClosing();
             m_name = token.data().asString();
             break;
         case HTMLToken::Comment:
             m_data = token.data().asString();
             break;
         case HTMLToken::Character: {
             const String& string = token.data().asString();
             m_externalCharacters = string.characters();
             m_externalCharactersLength = string.length();
             m_isAll8BitData = token.isAll8BitData();
             // FIXME: We would like a stronger ASSERT here:
             // ASSERT(string.is8Bit() == token.isAll8BitData());
             // but currently that fires, likely due to bugs in HTMLTokenizer
             // not setting isAll8BitData in all the times it could.
             ASSERT(!token.isAll8BitData() || string.is8Bit());
             break;
         }
         }
     }

 #endif

     explicit AtomicHTMLToken(HTMLToken::Type type)
         : m_type(type)
         , m_externalCharacters(0)
         , m_externalCharactersLength(0)
         , m_isAll8BitData(false)
         , m_selfClosing(false)
     {
     }

     AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>())
         : m_type(type)
         , m_name(name)
         , m_externalCharacters(0)
         , m_externalCharactersLength(0)
         , m_isAll8BitData(false)
         , m_selfClosing(false)
         , m_attributes(attributes)
     {
         ASSERT(usesName());
     }

 private:
     HTMLToken::Type m_type;

     void initializeAttributes(const HTMLToken::AttributeList& attributes);
     QualifiedName nameForAttribute(const HTMLToken::Attribute&) const;

     bool usesName() const;

     bool usesAttributes() const;

     // "name" for DOCTYPE, StartTag, and EndTag
     AtomicString m_name;

     // "data" for Comment
     String m_data;

     // "characters" for Character
     //
     // We don't want to copy the the characters out of the Token, so we
     // keep a pointer to its buffer instead. This buffer is owned by the
     // Token and causes a lifetime dependence between these objects.
     //
     // FIXME: Add a mechanism for "internalizing" the characters when the
     //        HTMLToken is destructed.
     const UChar* m_externalCharacters;
     size_t m_externalCharactersLength;
     bool m_isAll8BitData;

     // For DOCTYPE
     OwnPtr<DoctypeData> m_doctypeData;

     // For StartTag and EndTag
     bool m_selfClosing;

     Vector<Attribute> m_attributes;
 };

 inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes)
 {
     size_t size = attributes.size();
     if (!size)
         return;

     m_attributes.clear();
     m_attributes.reserveInitialCapacity(size);
     for (size_t i = 0; i < size; ++i) {
         const HTMLToken::Attribute& attribute = attributes[i];
         if (attribute.name.isEmpty())
             continue;

         // FIXME: We should be able to add the following ASSERT once we fix
         // https://bugs.webkit.org/show_bug.cgi?id=62971
         //   ASSERT(attribute.nameRange.start);
         ASSERT(attribute.nameRange.end);
         ASSERT(attribute.valueRange.start);
         ASSERT(attribute.valueRange.end);

         AtomicString value(attribute.value);
         const QualifiedName& name = nameForAttribute(attribute);
         // FIXME: This is N^2 for the number of attributes.
         if (!findAttributeInVector(m_attributes, name))
             m_attributes.append(Attribute(name, value));
     }
 }

 }

 #endif
	/*
	* Copyright (C) 2013 Google, Inc. All Rights Reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#ifndef AtomicHTMLToken_h
	#define AtomicHTMLToken_h

	#include "Attribute.h"
	#include "CompactHTMLToken.h"
	#include "HTMLToken.h"
	#include <wtf/RefCounted.h>
	#include <wtf/RefPtr.h>

	namespace WebCore {

	class AtomicHTMLToken {
	WTF_MAKE_NONCOPYABLE(AtomicHTMLToken);
	public:

	bool forceQuirks() const
	{
	ASSERT(m_type == HTMLToken::DOCTYPE);
	return m_doctypeData->m_forceQuirks;
	}

	HTMLToken::Type type() const { return m_type; }

	const AtomicString& name() const
	{
	ASSERT(usesName());
	return m_name;
	}

	void setName(const AtomicString& name)
	{
	ASSERT(usesName());
	m_name = name;
	}

	bool selfClosing() const
	{
	ASSERT(m_type == HTMLToken::StartTag \|\| m_type == HTMLToken::EndTag);
	return m_selfClosing;
	}

	Attribute* getAttributeItem(const QualifiedName& attributeName)
	{
	ASSERT(usesAttributes());
	return findAttributeInVector(m_attributes, attributeName);
	}

	Vector<Attribute>& attributes()
	{
	ASSERT(usesAttributes());
	return m_attributes;
	}

	const Vector<Attribute>& attributes() const
	{
	ASSERT(usesAttributes());
	return m_attributes;
	}

	const UChar* characters() const
	{
	ASSERT(m_type == HTMLToken::Character);
	return m_externalCharacters;
	}

	size_t charactersLength() const
	{
	ASSERT(m_type == HTMLToken::Character);
	return m_externalCharactersLength;
	}

	bool isAll8BitData() const
	{
	return m_isAll8BitData;
	}

	const String& comment() const
	{
	ASSERT(m_type == HTMLToken::Comment);
	return m_data;
	}

	// FIXME: Distinguish between a missing public identifer and an empty one.
	Vector<UChar>& publicIdentifier() const
	{
	ASSERT(m_type == HTMLToken::DOCTYPE);
	return m_doctypeData->m_publicIdentifier;
	}

	// FIXME: Distinguish between a missing system identifer and an empty one.
	Vector<UChar>& systemIdentifier() const
	{
	ASSERT(m_type == HTMLToken::DOCTYPE);
	return m_doctypeData->m_systemIdentifier;
	}

	explicit AtomicHTMLToken(HTMLToken& token)
	: m_type(token.type())
	{
	switch (m_type) {
	case HTMLToken::Uninitialized:
	ASSERT_NOT_REACHED();
	break;
	case HTMLToken::DOCTYPE:
	m_name = AtomicString(token.name());
	m_doctypeData = token.releaseDoctypeData();
	break;
	case HTMLToken::EndOfFile:
	break;
	case HTMLToken::StartTag:
	case HTMLToken::EndTag: {
	m_selfClosing = token.selfClosing();
	m_name = AtomicString(token.name());
	initializeAttributes(token.attributes());
	break;
	}
	case HTMLToken::Comment:
	if (token.isAll8BitData())
	m_data = String::make8BitFrom16BitSource(token.comment());
	else
	m_data = String(token.comment());
	break;
	case HTMLToken::Character:
	m_externalCharacters = token.characters().data();
	m_externalCharactersLength = token.characters().size();
	m_isAll8BitData = token.isAll8BitData();
	break;
	}
	}

	#if ENABLE(THREADED_HTML_PARSER)

	explicit AtomicHTMLToken(const CompactHTMLToken& token)
	: m_type(token.type())
	{
	switch (m_type) {
	case HTMLToken::Uninitialized:
	ASSERT_NOT_REACHED();
	break;
	case HTMLToken::DOCTYPE:
	m_name = token.data().asString();
	m_doctypeData = adoptPtr(new DoctypeData());
	m_doctypeData->m_hasPublicIdentifier = true;
	append(m_doctypeData->m_publicIdentifier, token.publicIdentifier().asString());
	m_doctypeData->m_hasSystemIdentifier = true;
	append(m_doctypeData->m_systemIdentifier, token.systemIdentifier());
	m_doctypeData->m_forceQuirks = token.doctypeForcesQuirks();
	break;
	case HTMLToken::EndOfFile:
	break;
	case HTMLToken::StartTag:
	m_attributes.reserveInitialCapacity(token.attributes().size());
	for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) {
	QualifiedName name(nullAtom, it->name.asString(), nullAtom);
	// FIXME: This is N^2 for the number of attributes.
	if (!findAttributeInVector(m_attributes, name))
	m_attributes.append(Attribute(name, it->value));
	}
	// Fall through!
	case HTMLToken::EndTag:
	m_selfClosing = token.selfClosing();
	m_name = token.data().asString();
	break;
	case HTMLToken::Comment:
	m_data = token.data().asString();
	break;
	case HTMLToken::Character: {
	const String& string = token.data().asString();
	m_externalCharacters = string.characters();
	m_externalCharactersLength = string.length();
	m_isAll8BitData = token.isAll8BitData();
	// FIXME: We would like a stronger ASSERT here:
	// ASSERT(string.is8Bit() == token.isAll8BitData());
	// but currently that fires, likely due to bugs in HTMLTokenizer
	// not setting isAll8BitData in all the times it could.
	ASSERT(!token.isAll8BitData() \|\| string.is8Bit());
	break;
	}
	}
	}

	#endif

	explicit AtomicHTMLToken(HTMLToken::Type type)
	: m_type(type)
	, m_externalCharacters(0)
	, m_externalCharactersLength(0)
	, m_isAll8BitData(false)
	, m_selfClosing(false)
	{
	}

	AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>())
	: m_type(type)
	, m_name(name)
	, m_externalCharacters(0)
	, m_externalCharactersLength(0)
	, m_isAll8BitData(false)
	, m_selfClosing(false)
	, m_attributes(attributes)
	{
	ASSERT(usesName());
	}

	private:
	HTMLToken::Type m_type;

	void initializeAttributes(const HTMLToken::AttributeList& attributes);
	QualifiedName nameForAttribute(const HTMLToken::Attribute&) const;

	bool usesName() const;

	bool usesAttributes() const;

	// "name" for DOCTYPE, StartTag, and EndTag
	AtomicString m_name;

	// "data" for Comment
	String m_data;

	// "characters" for Character
	//
	// We don't want to copy the the characters out of the Token, so we
	// keep a pointer to its buffer instead. This buffer is owned by the
	// Token and causes a lifetime dependence between these objects.
	//
	// FIXME: Add a mechanism for "internalizing" the characters when the
	// HTMLToken is destructed.
	const UChar* m_externalCharacters;
	size_t m_externalCharactersLength;
	bool m_isAll8BitData;

	// For DOCTYPE
	OwnPtr<DoctypeData> m_doctypeData;

	// For StartTag and EndTag
	bool m_selfClosing;

	Vector<Attribute> m_attributes;
	};

	inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes)
	{
	size_t size = attributes.size();
	if (!size)
	return;

	m_attributes.clear();
	m_attributes.reserveInitialCapacity(size);
	for (size_t i = 0; i < size; ++i) {
	const HTMLToken::Attribute& attribute = attributes[i];
	if (attribute.name.isEmpty())
	continue;

	// FIXME: We should be able to add the following ASSERT once we fix
	// https://bugs.webkit.org/show_bug.cgi?id=62971
	// ASSERT(attribute.nameRange.start);
	ASSERT(attribute.nameRange.end);
	ASSERT(attribute.valueRange.start);
	ASSERT(attribute.valueRange.end);

	AtomicString value(attribute.value);
	const QualifiedName& name = nameForAttribute(attribute);
	// FIXME: This is N^2 for the number of attributes.
	if (!findAttributeInVector(m_attributes, name))
	m_attributes.append(Attribute(name, value));
	}
	}

	}

	#endif