| /* |
| * Copyright (C) 2005-2018 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| * THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #import <WebKitLegacy/WebNSDataExtras.h> |
| |
| #import <wtf/Assertions.h> |
| #import <wtf/RetainPtr.h> |
| #import <wtf/cocoa/SpanCocoa.h> |
| #import <wtf/text/ParsingUtilities.h> |
| #import <wtf/text/StringCommon.h> |
| |
| @implementation NSData (WebNSDataExtras) |
| |
| - (NSString *)_webkit_guessedMIMETypeForXML |
| { |
| auto bytes = span(self); |
| |
| constexpr size_t channelTagLength = 7; |
| |
| size_t remaining = std::min<size_t>(bytes.size(), WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (channelTagLength - 1); |
| bytes = bytes.first(remaining); |
| |
| BOOL foundRDF = false; |
| |
| while (!bytes.empty()) { |
| // Look for a "<". |
| auto hitIndex = WTF::find(bytes, '<'); |
| if (hitIndex == notFound) |
| break; |
| |
| // We are trying to identify RSS or Atom. RSS has a top-level |
| // element of either <rss> or <rdf>. However, there are |
| // non-RSS RDF files, so in the case of <rdf> we further look |
| // for a <channel> element. In the case of an Atom file, a |
| // top-level <feed> element is all we need to see. Only tags |
| // starting with <? or <! can precede the root element. We |
| // bail if we don't find an <rss>, <feed> or <rdf> element |
| // right after those. |
| |
| auto hit = bytes.subspan(hitIndex); |
| if (foundRDF) { |
| if (spanHasPrefixIgnoringASCIICase(hit, "<channel"_span)) |
| return @"application/rss+xml"; |
| } else if (spanHasPrefixIgnoringASCIICase(hit, "<rdf"_span)) |
| foundRDF = TRUE; |
| else if (spanHasPrefixIgnoringASCIICase(hit, "<rss"_span)) |
| return @"application/rss+xml"; |
| else if (spanHasPrefixIgnoringASCIICase(hit, "<feed"_span)) |
| return @"application/atom+xml"; |
| else if (!spanHasPrefixIgnoringASCIICase(hit, "<?"_span) && !spanHasPrefixIgnoringASCIICase(hit, "<!"_span)) |
| return nil; |
| |
| // Skip the "<" and continue. |
| skip(bytes, hitIndex + 1); |
| } |
| |
| return nil; |
| } |
| |
| - (NSString *)_webkit_guessedMIMEType |
| { |
| constexpr size_t scriptTagLength = 7; |
| constexpr size_t textHTMLLength = 9; |
| |
| NSString *MIMEType = [self _webkit_guessedMIMETypeForXML]; |
| if ([MIMEType length]) |
| return MIMEType; |
| |
| auto bytes = span(self); |
| |
| size_t remaining = std::min<size_t>(bytes.size(), WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (scriptTagLength - 1); |
| auto cursor = bytes.first(remaining); |
| while (!cursor.empty()) { |
| // Look for a "<". |
| size_t hitIndex = WTF::find(cursor, '<'); |
| if (hitIndex == notFound) |
| break; |
| |
| auto hit = cursor.subspan(hitIndex); |
| // If we found a "<", look for "<html>" or "<a " or "<script". |
| if (spanHasPrefixIgnoringASCIICase(hit, "<html>"_span) |
| || spanHasPrefixIgnoringASCIICase(hit, "<a "_span) |
| || spanHasPrefixIgnoringASCIICase(hit, "<script"_span) |
| || spanHasPrefixIgnoringASCIICase(hit, "<title>"_span)) { |
| return @"text/html"; |
| } |
| |
| // Skip the "<" and continue. |
| skip(cursor, hitIndex + 1); |
| } |
| |
| // Test for a broken server which has sent the content type as part of the content. |
| // This code could be improved to look for other mime types. |
| remaining = std::min<size_t>(bytes.size(), WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (textHTMLLength - 1); |
| cursor = bytes.first(remaining); |
| while (!cursor.empty()) { |
| // Look for a "t" or "T". |
| size_t lowerHitIndex = WTF::find(cursor, 't'); |
| size_t upperHitIndex = WTF::find(cursor, 'T'); |
| if (lowerHitIndex == notFound && upperHitIndex == notFound) |
| break; |
| |
| static_assert(notFound == std::numeric_limits<size_t>::max()); |
| size_t hitIndex = std::min(lowerHitIndex, upperHitIndex); |
| auto hit = cursor.subspan(hitIndex); |
| |
| // If we found a "t/T", look for "text/html". |
| if (spanHasPrefixIgnoringASCIICase(hit, "text/html"_span)) |
| return @"text/html"; |
| |
| // Skip the "t/T" and continue. |
| skip(cursor, hitIndex + 1); |
| } |
| |
| if (spanHasPrefix(bytes, "BEGIN:VCARD"_span)) |
| return @"text/vcard"; |
| if (spanHasPrefix(bytes, "BEGIN:VCALENDAR"_span)) |
| return @"text/calendar"; |
| |
| // Test for plain text. |
| bool foundBadCharacter = false; |
| for (auto c : bytes) { |
| if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) { |
| foundBadCharacter = true; |
| break; |
| } |
| } |
| if (!foundBadCharacter) { |
| // Didn't encounter any bad characters, looks like plain text. |
| return @"text/plain"; |
| } |
| |
| // Looks like this is a binary file. |
| |
| // Sniff for the JPEG magic number. |
| constexpr std::array<uint8_t, 4> jpegMagicNumber { 0xFF, 0xD8, 0xFF, 0xE0 }; |
| if (spanHasPrefix(bytes, std::span { jpegMagicNumber })) |
| return @"image/jpeg"; |
| |
| return nil; |
| } |
| |
| - (BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string |
| { |
| ASSERT(string); |
| return equalLettersIgnoringASCIICase(span(self), unsafeSpan(string)); |
| } |
| |
| @end |