blob: 0ef41ae8d84fb3644a12d49d130dc487ecc480d8 [file] [log] [blame]
//
// GTMNSString+XML.m
//
// Copyright 2007-2008 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
//
#import "GTMDefines.h"
#import "GTMNSString+XML.h"
// Export a nonsense symbol to suppress a libtool warning when this is linked alone in a static lib.
__attribute__((visibility("default")))
char NSString_GTMNSStringXMLAdditionsExportToSuppressLibToolWarning = 0;
enum {
kGTMXMLCharModeEncodeQUOT = 0,
kGTMXMLCharModeEncodeAMP = 1,
kGTMXMLCharModeEncodeAPOS = 2,
kGTMXMLCharModeEncodeLT = 3,
kGTMXMLCharModeEncodeGT = 4,
kGTMXMLCharModeValid = 99,
kGTMXMLCharModeInvalid = 100,
};
typedef NSUInteger GTMXMLCharMode;
static NSString *gXMLEntityList[] = {
// this must match the above order
@""",
@"&",
@"'",
@"<",
@">",
};
GTM_INLINE GTMXMLCharMode XMLModeForUnichar(UniChar c) {
// Per XML spec Section 2.2 Characters
// ( http://www.w3.org/TR/REC-xml/#charsets )
//
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
// [#x10000-#x10FFFF]
if (c <= 0xd7ff) {
if (c >= 0x20) {
switch (c) {
case 34:
return kGTMXMLCharModeEncodeQUOT;
case 38:
return kGTMXMLCharModeEncodeAMP;
case 39:
return kGTMXMLCharModeEncodeAPOS;
case 60:
return kGTMXMLCharModeEncodeLT;
case 62:
return kGTMXMLCharModeEncodeGT;
default:
return kGTMXMLCharModeValid;
}
} else {
if (c == '\n')
return kGTMXMLCharModeValid;
if (c == '\r')
return kGTMXMLCharModeValid;
if (c == '\t')
return kGTMXMLCharModeValid;
return kGTMXMLCharModeInvalid;
}
}
if (c < 0xE000)
return kGTMXMLCharModeInvalid;
if (c <= 0xFFFD)
return kGTMXMLCharModeValid;
// UniChar can't have the following values
// if (c < 0x10000)
// return kGTMXMLCharModeInvalid;
// if (c <= 0x10FFFF)
// return kGTMXMLCharModeValid;
return kGTMXMLCharModeInvalid;
} // XMLModeForUnichar
static NSString *AutoreleasedCloneForXML(NSString *src, BOOL escaping) {
//
// NOTE:
// We don't use CFXMLCreateStringByEscapingEntities because it's busted in
// 10.3 (http://lists.apple.com/archives/Cocoa-dev/2004/Nov/msg00059.html) and
// it doesn't do anything about the chars that are actually invalid per the
// xml spec.
//
// we can't use the CF call here because it leaves the invalid chars
// in the string.
NSUInteger length = [src length];
if (!length) {
return src;
}
NSMutableString *finalString = [NSMutableString string];
// this block is common between GTMNSString+HTML and GTMNSString+XML but
// it's so short that it isn't really worth trying to share.
const UniChar *buffer = CFStringGetCharactersPtr((CFStringRef)src);
if (!buffer) {
// We want this buffer to be autoreleased.
NSMutableData *data = [NSMutableData dataWithLength:length * sizeof(UniChar)];
if (!data) {
// COV_NF_START - Memory fail case
_GTMDevLog(@"couldn't alloc buffer");
return nil;
// COV_NF_END
}
[src getCharacters:[data mutableBytes]];
buffer = [data bytes];
}
const UniChar *goodRun = buffer;
NSUInteger goodRunLength = 0;
for (NSUInteger i = 0; i < length; ++i) {
GTMXMLCharMode cMode = XMLModeForUnichar(buffer[i]);
// valid chars go as is, and if we aren't doing entities, then
// everything goes as is.
if ((cMode == kGTMXMLCharModeValid) ||
(!escaping && (cMode != kGTMXMLCharModeInvalid))) {
// goes as is
goodRunLength += 1;
} else {
// it's something we have to encode or something invalid
// start by adding what we already collected (if anything)
if (goodRunLength) {
CFStringAppendCharacters((CFMutableStringRef)finalString,
goodRun,
goodRunLength);
goodRunLength = 0;
}
// if it wasn't invalid, add the encoded version
if (cMode != kGTMXMLCharModeInvalid) {
// add this encoded
[finalString appendString:gXMLEntityList[cMode]];
}
// update goodRun to point to the next UniChar
goodRun = buffer + i + 1;
}
}
// anything left to add?
if (goodRunLength) {
CFStringAppendCharacters((CFMutableStringRef)finalString,
goodRun,
goodRunLength);
}
return finalString;
} // AutoreleasedCloneForXML
@implementation NSString (GTMNSStringXMLAdditions)
- (NSString *)gtm_stringBySanitizingAndEscapingForXML {
return AutoreleasedCloneForXML(self, YES);
} // gtm_stringBySanitizingAndEscapingForXML
- (NSString *)gtm_stringBySanitizingToXMLSpec {
return AutoreleasedCloneForXML(self, NO);
} // gtm_stringBySanitizingToXMLSpec
@end