#ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__ | |
#define DATE_TIME_FORMAT_DATE_PARSER_HPP__ | |
/* Copyright (c) 2004-2005 CrystalClear Software, Inc. | |
* Use, modification and distribution is subject to the | |
* Boost Software License, Version 1.0. (See accompanying | |
* file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
* Author: Jeff Garland, Bart Garst | |
* $Date: 2009-06-04 04:24:49 -0400 (Thu, 04 Jun 2009) $ | |
*/ | |
#include "boost/lexical_cast.hpp" | |
#include "boost/date_time/string_parse_tree.hpp" | |
#include "boost/date_time/strings_from_facet.hpp" | |
#include "boost/date_time/special_values_parser.hpp" | |
#include <string> | |
#include <vector> | |
#include <sstream> | |
#include <iterator> | |
#ifndef BOOST_NO_STDC_NAMESPACE | |
# include <cctype> | |
#else | |
# include <ctype.h> | |
#endif | |
#ifdef BOOST_NO_STDC_NAMESPACE | |
namespace std { | |
using ::isspace; | |
using ::isdigit; | |
} | |
#endif | |
namespace boost { namespace date_time { | |
//! Helper function for parsing fixed length strings into integers | |
/*! Will consume 'length' number of characters from stream. Consumed | |
* character are transfered to parse_match_result struct. | |
* Returns '-1' if no number can be parsed or incorrect number of | |
* digits in stream. */ | |
template<typename int_type, typename charT> | |
inline | |
int_type | |
fixed_string_to_int(std::istreambuf_iterator<charT>& itr, | |
std::istreambuf_iterator<charT>& stream_end, | |
parse_match_result<charT>& mr, | |
unsigned int length, | |
const charT& fill_char) | |
{ | |
//typedef std::basic_string<charT> string_type; | |
unsigned int j = 0; | |
//string_type s; | |
while (j < length && itr != stream_end && | |
(std::isdigit(*itr) || *itr == fill_char)) { | |
if(*itr == fill_char) { | |
/* Since a fill_char can be anything, we convert it to a zero. | |
* lexical_cast will behave predictably when zero is used as fill. */ | |
mr.cache += ('0'); | |
} | |
else { | |
mr.cache += (*itr); | |
} | |
itr++; | |
j++; | |
} | |
int_type i = -1; | |
// mr.cache will hold leading zeros. size() tells us when input is too short. | |
if(mr.cache.size() < length) { | |
return i; | |
} | |
try { | |
i = boost::lexical_cast<int_type>(mr.cache); | |
}catch(bad_lexical_cast&){ | |
// we want to return -1 if the cast fails so nothing to do here | |
} | |
return i; | |
} | |
//! Helper function for parsing fixed length strings into integers | |
/*! Will consume 'length' number of characters from stream. Consumed | |
* character are transfered to parse_match_result struct. | |
* Returns '-1' if no number can be parsed or incorrect number of | |
* digits in stream. */ | |
template<typename int_type, typename charT> | |
inline | |
int_type | |
fixed_string_to_int(std::istreambuf_iterator<charT>& itr, | |
std::istreambuf_iterator<charT>& stream_end, | |
parse_match_result<charT>& mr, | |
unsigned int length) | |
{ | |
return fixed_string_to_int<int_type, charT>(itr, stream_end, mr, length, '0'); | |
} | |
//! Helper function for parsing varied length strings into integers | |
/*! Will consume 'max_length' characters from stream only if those | |
* characters are digits. Returns '-1' if no number can be parsed. | |
* Will not parse a number preceeded by a '+' or '-'. */ | |
template<typename int_type, typename charT> | |
inline | |
int_type | |
var_string_to_int(std::istreambuf_iterator<charT>& itr, | |
const std::istreambuf_iterator<charT>& stream_end, | |
unsigned int max_length) | |
{ | |
typedef std::basic_string<charT> string_type; | |
unsigned int j = 0; | |
string_type s; | |
while (itr != stream_end && (j < max_length) && std::isdigit(*itr)) { | |
s += (*itr); | |
++itr; | |
++j; | |
} | |
int_type i = -1; | |
if(!s.empty()) { | |
i = boost::lexical_cast<int_type>(s); | |
} | |
return i; | |
} | |
//! Class with generic date parsing using a format string | |
/*! The following is the set of recognized format specifiers | |
- %a - Short weekday name | |
- %A - Long weekday name | |
- %b - Abbreviated month name | |
- %B - Full month name | |
- %d - Day of the month as decimal 01 to 31 | |
- %j - Day of year as decimal from 001 to 366 | |
- %m - Month name as a decimal 01 to 12 | |
- %U - Week number 00 to 53 with first Sunday as the first day of week 1? | |
- %w - Weekday as decimal number 0 to 6 where Sunday == 0 | |
- %W - Week number 00 to 53 where Monday is first day of week 1 | |
- %x - facet default date representation | |
- %y - Year without the century - eg: 04 for 2004 | |
- %Y - Year with century | |
The weekday specifiers (%a and %A) do not add to the date construction, | |
but they provide a way to skip over the weekday names for formats that | |
provide them. | |
todo -- Another interesting feature that this approach could provide is | |
an option to fill in any missing fields with the current values | |
from the clock. So if you have %m-%d the parser would detect | |
the missing year value and fill it in using the clock. | |
todo -- What to do with the %x. %x in the classic facet is just bad... | |
*/ | |
template<class date_type, typename charT> | |
class format_date_parser | |
{ | |
public: | |
typedef std::basic_string<charT> string_type; | |
typedef std::basic_istringstream<charT> stringstream_type; | |
typedef std::istreambuf_iterator<charT> stream_itr_type; | |
typedef typename string_type::const_iterator const_itr; | |
typedef typename date_type::year_type year_type; | |
typedef typename date_type::month_type month_type; | |
typedef typename date_type::day_type day_type; | |
typedef typename date_type::duration_type duration_type; | |
typedef typename date_type::day_of_week_type day_of_week_type; | |
typedef typename date_type::day_of_year_type day_of_year_type; | |
typedef string_parse_tree<charT> parse_tree_type; | |
typedef typename parse_tree_type::parse_match_result_type match_results; | |
typedef std::vector<std::basic_string<charT> > input_collection_type; | |
// TODO sv_parser uses its default constructor - write the others | |
format_date_parser(const string_type& format_str, | |
const input_collection_type& month_short_names, | |
const input_collection_type& month_long_names, | |
const input_collection_type& weekday_short_names, | |
const input_collection_type& weekday_long_names) : | |
m_format(format_str), | |
m_month_short_names(month_short_names, 1), | |
m_month_long_names(month_long_names, 1), | |
m_weekday_short_names(weekday_short_names), | |
m_weekday_long_names(weekday_long_names) | |
{} | |
format_date_parser(const string_type& format_str, | |
const std::locale& locale) : | |
m_format(format_str), | |
m_month_short_names(gather_month_strings<charT>(locale), 1), | |
m_month_long_names(gather_month_strings<charT>(locale, false), 1), | |
m_weekday_short_names(gather_weekday_strings<charT>(locale)), | |
m_weekday_long_names(gather_weekday_strings<charT>(locale, false)) | |
{} | |
format_date_parser(const format_date_parser<date_type,charT>& fdp) | |
{ | |
this->m_format = fdp.m_format; | |
this->m_month_short_names = fdp.m_month_short_names; | |
this->m_month_long_names = fdp.m_month_long_names; | |
this->m_weekday_short_names = fdp.m_weekday_short_names; | |
this->m_weekday_long_names = fdp.m_weekday_long_names; | |
} | |
string_type format() const | |
{ | |
return m_format; | |
} | |
void format(string_type format_str) | |
{ | |
m_format = format_str; | |
} | |
void short_month_names(const input_collection_type& month_names) | |
{ | |
m_month_short_names = parse_tree_type(month_names, 1); | |
} | |
void long_month_names(const input_collection_type& month_names) | |
{ | |
m_month_long_names = parse_tree_type(month_names, 1); | |
} | |
void short_weekday_names(const input_collection_type& weekday_names) | |
{ | |
m_weekday_short_names = parse_tree_type(weekday_names); | |
} | |
void long_weekday_names(const input_collection_type& weekday_names) | |
{ | |
m_weekday_long_names = parse_tree_type(weekday_names); | |
} | |
date_type | |
parse_date(const string_type& value, | |
const string_type& format_str, | |
const special_values_parser<date_type,charT>& sv_parser) const | |
{ | |
stringstream_type ss(value); | |
stream_itr_type sitr(ss); | |
stream_itr_type stream_end; | |
return parse_date(sitr, stream_end, format_str, sv_parser); | |
} | |
date_type | |
parse_date(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
const special_values_parser<date_type,charT>& sv_parser) const | |
{ | |
return parse_date(sitr, stream_end, m_format, sv_parser); | |
} | |
/*! Of all the objects that the format_date_parser can parse, only a | |
* date can be a special value. Therefore, only parse_date checks | |
* for special_values. */ | |
date_type | |
parse_date(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str, | |
const special_values_parser<date_type,charT>& sv_parser) const | |
{ | |
bool use_current_char = false; | |
// skip leading whitespace | |
while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } | |
charT current_char = *sitr; | |
short year(0), month(0), day(0), day_of_year(0);// wkday(0); | |
/* Initialized the following to their minimum values. These intermediate | |
* objects are used so we get specific exceptions when part of the input | |
* is unparsable. | |
* Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/ | |
year_type t_year(1400); | |
month_type t_month(1); | |
day_type t_day(1); | |
day_of_week_type wkday(0); | |
const_itr itr(format_str.begin()); | |
while (itr != format_str.end() && (sitr != stream_end)) { | |
if (*itr == '%') { | |
itr++; | |
if (*itr != '%') { | |
switch(*itr) { | |
case 'a': | |
{ | |
//this value is just throw away. It could be used for | |
//error checking potentially, but it isn't helpful in | |
//actually constructing the date - we just need to get it | |
//out of the stream | |
match_results mr = m_weekday_short_names.match(sitr, stream_end); | |
if(mr.current_match == match_results::PARSE_ERROR) { | |
// check special_values | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
wkday = mr.current_match; | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'A': | |
{ | |
//this value is just throw away. It could be used for | |
//error checking potentially, but it isn't helpful in | |
//actually constructing the date - we just need to get it | |
//out of the stream | |
match_results mr = m_weekday_long_names.match(sitr, stream_end); | |
if(mr.current_match == match_results::PARSE_ERROR) { | |
// check special_values | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
wkday = mr.current_match; | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'b': | |
{ | |
match_results mr = m_month_short_names.match(sitr, stream_end); | |
if(mr.current_match == match_results::PARSE_ERROR) { | |
// check special_values | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
t_month = month_type(mr.current_match); | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'B': | |
{ | |
match_results mr = m_month_long_names.match(sitr, stream_end); | |
if(mr.current_match == match_results::PARSE_ERROR) { | |
// check special_values | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
t_month = month_type(mr.current_match); | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'd': | |
{ | |
match_results mr; | |
day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); | |
if(day == -1) { | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
t_day = day_type(day); | |
break; | |
} | |
case 'e': | |
{ | |
match_results mr; | |
day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2, ' '); | |
if(day == -1) { | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
t_day = day_type(day); | |
break; | |
} | |
case 'j': | |
{ | |
match_results mr; | |
day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3); | |
if(day_of_year == -1) { | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
// these next two lines are so we get an exception with bad input | |
day_of_year_type t_day_of_year(1); | |
t_day_of_year = day_of_year_type(day_of_year); | |
break; | |
} | |
case 'm': | |
{ | |
match_results mr; | |
month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); | |
if(month == -1) { | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
t_month = month_type(month); | |
break; | |
} | |
case 'Y': | |
{ | |
match_results mr; | |
year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4); | |
if(year == -1) { | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
t_year = year_type(year); | |
break; | |
} | |
case 'y': | |
{ | |
match_results mr; | |
year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); | |
if(year == -1) { | |
if(sv_parser.match(sitr, stream_end, mr)) { | |
return date_type(static_cast<special_values>(mr.current_match)); | |
} | |
} | |
year += 2000; //make 2 digit years in this century | |
t_year = year_type(year); | |
break; | |
} | |
default: | |
{} //ignore those we don't understand | |
}//switch | |
} | |
else { // itr == '%', second consecutive | |
sitr++; | |
} | |
itr++; //advance past format specifier | |
} | |
else { //skip past chars in format and in buffer | |
itr++; | |
if (use_current_char) { | |
use_current_char = false; | |
current_char = *sitr; | |
} | |
else { | |
sitr++; | |
} | |
} | |
} | |
if (day_of_year > 0) { | |
date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year | |
return d + duration_type(day_of_year); | |
} | |
return date_type(t_year, t_month, t_day); // exceptions were thrown earlier | |
// if input was no good | |
} | |
//! Throws bad_month if unable to parse | |
month_type | |
parse_month(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str) const | |
{ | |
match_results mr; | |
return parse_month(sitr, stream_end, format_str, mr); | |
} | |
//! Throws bad_month if unable to parse | |
month_type | |
parse_month(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str, | |
match_results& mr) const | |
{ | |
bool use_current_char = false; | |
// skip leading whitespace | |
while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } | |
charT current_char = *sitr; | |
short month(0); | |
const_itr itr(format_str.begin()); | |
while (itr != format_str.end() && (sitr != stream_end)) { | |
if (*itr == '%') { | |
itr++; | |
if (*itr != '%') { | |
switch(*itr) { | |
case 'b': | |
{ | |
mr = m_month_short_names.match(sitr, stream_end); | |
month = mr.current_match; | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'B': | |
{ | |
mr = m_month_long_names.match(sitr, stream_end); | |
month = mr.current_match; | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'm': | |
{ | |
month = var_string_to_int<short, charT>(sitr, stream_end, 2); | |
// var_string_to_int returns -1 if parse failed. That will | |
// cause a bad_month exception to be thrown so we do nothing here | |
break; | |
} | |
default: | |
{} //ignore those we don't understand | |
}//switch | |
} | |
else { // itr == '%', second consecutive | |
sitr++; | |
} | |
itr++; //advance past format specifier | |
} | |
else { //skip past chars in format and in buffer | |
itr++; | |
if (use_current_char) { | |
use_current_char = false; | |
current_char = *sitr; | |
} | |
else { | |
sitr++; | |
} | |
} | |
} | |
return month_type(month); // throws bad_month exception when values are zero | |
} | |
//! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse | |
day_type | |
parse_var_day_of_month(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end) const | |
{ | |
// skip leading whitespace | |
while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } | |
return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2)); | |
} | |
//! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse | |
day_type | |
parse_day_of_month(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end) const | |
{ | |
// skip leading whitespace | |
while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } | |
//return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2)); | |
match_results mr; | |
return day_type(fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2)); | |
} | |
day_of_week_type | |
parse_weekday(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str) const | |
{ | |
match_results mr; | |
return parse_weekday(sitr, stream_end, format_str, mr); | |
} | |
day_of_week_type | |
parse_weekday(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str, | |
match_results& mr) const | |
{ | |
bool use_current_char = false; | |
// skip leading whitespace | |
while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } | |
charT current_char = *sitr; | |
short wkday(0); | |
const_itr itr(format_str.begin()); | |
while (itr != format_str.end() && (sitr != stream_end)) { | |
if (*itr == '%') { | |
itr++; | |
if (*itr != '%') { | |
switch(*itr) { | |
case 'a': | |
{ | |
//this value is just throw away. It could be used for | |
//error checking potentially, but it isn't helpful in | |
//actually constructing the date - we just need to get it | |
//out of the stream | |
mr = m_weekday_short_names.match(sitr, stream_end); | |
wkday = mr.current_match; | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'A': | |
{ | |
//this value is just throw away. It could be used for | |
//error checking potentially, but it isn't helpful in | |
//actually constructing the date - we just need to get it | |
//out of the stream | |
mr = m_weekday_long_names.match(sitr, stream_end); | |
wkday = mr.current_match; | |
if (mr.has_remaining()) { | |
current_char = mr.last_char(); | |
use_current_char = true; | |
} | |
break; | |
} | |
case 'w': | |
{ | |
// weekday as number 0-6, Sunday == 0 | |
wkday = var_string_to_int<short, charT>(sitr, stream_end, 2); | |
break; | |
} | |
default: | |
{} //ignore those we don't understand | |
}//switch | |
} | |
else { // itr == '%', second consecutive | |
sitr++; | |
} | |
itr++; //advance past format specifier | |
} | |
else { //skip past chars in format and in buffer | |
itr++; | |
if (use_current_char) { | |
use_current_char = false; | |
current_char = *sitr; | |
} | |
else { | |
sitr++; | |
} | |
} | |
} | |
return day_of_week_type(wkday); // throws bad_day_of_month exception | |
// when values are zero | |
} | |
//! throws bad_year if unable to parse | |
year_type | |
parse_year(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str) const | |
{ | |
match_results mr; | |
return parse_year(sitr, stream_end, format_str, mr); | |
} | |
//! throws bad_year if unable to parse | |
year_type | |
parse_year(std::istreambuf_iterator<charT>& sitr, | |
std::istreambuf_iterator<charT>& stream_end, | |
string_type format_str, | |
match_results& mr) const | |
{ | |
bool use_current_char = false; | |
// skip leading whitespace | |
while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } | |
charT current_char = *sitr; | |
unsigned short year(0); | |
const_itr itr(format_str.begin()); | |
while (itr != format_str.end() && (sitr != stream_end)) { | |
if (*itr == '%') { | |
itr++; | |
if (*itr != '%') { | |
//match_results mr; | |
switch(*itr) { | |
case 'Y': | |
{ | |
// year from 4 digit string | |
year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4); | |
break; | |
} | |
case 'y': | |
{ | |
// year from 2 digit string (no century) | |
year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2); | |
year += 2000; //make 2 digit years in this century | |
break; | |
} | |
default: | |
{} //ignore those we don't understand | |
}//switch | |
} | |
else { // itr == '%', second consecutive | |
sitr++; | |
} | |
itr++; //advance past format specifier | |
} | |
else { //skip past chars in format and in buffer | |
itr++; | |
if (use_current_char) { | |
use_current_char = false; | |
current_char = *sitr; | |
} | |
else { | |
sitr++; | |
} | |
} | |
} | |
return year_type(year); // throws bad_year exception when values are zero | |
} | |
private: | |
string_type m_format; | |
parse_tree_type m_month_short_names; | |
parse_tree_type m_month_long_names; | |
parse_tree_type m_weekday_short_names; | |
parse_tree_type m_weekday_long_names; | |
}; | |
} } //namespace | |
#endif | |