#ifndef DATE_TIME_TZ_DB_BASE_HPP__ | |
#define DATE_TIME_TZ_DB_BASE_HPP__ | |
/* Copyright (c) 2003-2005 CrystalClear Software, Inc. | |
* Subject to the Boost Software License, Version 1.0. | |
* (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
* Author: Jeff Garland, Bart Garst | |
* $Date: 2008-11-12 14:37:53 -0500 (Wed, 12 Nov 2008) $ | |
*/ | |
#include <map> | |
#include <vector> | |
#include <string> | |
#include <sstream> | |
#include <fstream> | |
#include <stdexcept> | |
#include <boost/tokenizer.hpp> | |
#include <boost/shared_ptr.hpp> | |
#include <boost/throw_exception.hpp> | |
#include <boost/date_time/compiler_config.hpp> | |
#include <boost/date_time/time_zone_names.hpp> | |
#include <boost/date_time/time_zone_base.hpp> | |
#include <boost/date_time/time_parsing.hpp> | |
namespace boost { | |
namespace date_time { | |
//! Exception thrown when tz database cannot locate requested data file | |
class data_not_accessible : public std::logic_error | |
{ | |
public: | |
data_not_accessible() : | |
std::logic_error(std::string("Unable to locate or access the required datafile.")) | |
{} | |
data_not_accessible(const std::string& filespec) : | |
std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) | |
{} | |
}; | |
//! Exception thrown when tz database locates incorrect field structure in data file | |
class bad_field_count : public std::out_of_range | |
{ | |
public: | |
bad_field_count(const std::string& s) : | |
std::out_of_range(s) | |
{} | |
}; | |
//! Creates a database of time_zones from csv datafile | |
/*! The csv file containing the zone_specs used by the | |
* tz_db_base is intended to be customized by the | |
* library user. When customizing this file (or creating your own) the | |
* file must follow a specific format. | |
* | |
* This first line is expected to contain column headings and is therefore | |
* not processed by the tz_db_base. | |
* | |
* Each record (line) must have eleven fields. Some of those fields can | |
* be empty. Every field (even empty ones) must be enclosed in | |
* double-quotes. | |
* Ex: | |
* @code | |
* "America/Phoenix" <- string enclosed in quotes | |
* "" <- empty field | |
* @endcode | |
* | |
* Some fields represent a length of time. The format of these fields | |
* must be: | |
* @code | |
* "{+|-}hh:mm[:ss]" <- length-of-time format | |
* @endcode | |
* Where the plus or minus is mandatory and the seconds are optional. | |
* | |
* Since some time zones do not use daylight savings it is not always | |
* necessary for every field in a zone_spec to contain a value. All | |
* zone_specs must have at least ID and GMT offset. Zones that use | |
* daylight savings must have all fields filled except: | |
* STD ABBR, STD NAME, DST NAME. You should take note | |
* that DST ABBR is mandatory for zones that use daylight savings | |
* (see field descriptions for further details). | |
* | |
* ******* Fields and their description/details ********* | |
* | |
* ID: | |
* Contains the identifying string for the zone_spec. Any string will | |
* do as long as it's unique. No two ID's can be the same. | |
* | |
* STD ABBR: | |
* STD NAME: | |
* DST ABBR: | |
* DST NAME: | |
* These four are all the names and abbreviations used by the time | |
* zone being described. While any string will do in these fields, | |
* care should be taken. These fields hold the strings that will be | |
* used in the output of many of the local_time classes. | |
* Ex: | |
* @code | |
* time_zone nyc = tz_db.time_zone_from_region("America/New_York"); | |
* local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); | |
* cout << ny_time.to_long_string() << endl; | |
* // 2004-Aug-30 00:00:00 Eastern Daylight Time | |
* cout << ny_time.to_short_string() << endl; | |
* // 2004-Aug-30 00:00:00 EDT | |
* @endcode | |
* | |
* NOTE: The exact format/function names may vary - see local_time | |
* documentation for further details. | |
* | |
* GMT offset: | |
* This is the number of hours added to utc to get the local time | |
* before any daylight savings adjustments are made. Some examples | |
* are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. | |
* The format must follow the length-of-time format described above. | |
* | |
* DST adjustment: | |
* The amount of time added to gmt_offset when daylight savings is in | |
* effect. The format must follow the length-of-time format described | |
* above. | |
* | |
* DST Start Date rule: | |
* This is a specially formatted string that describes the day of year | |
* in which the transition take place. It holds three fields of it's own, | |
* separated by semicolons. | |
* The first field indicates the "nth" weekday of the month. The possible | |
* values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), | |
* and -1 (last). | |
* The second field indicates the day-of-week from 0-6 (Sun=0). | |
* The third field indicates the month from 1-12 (Jan=1). | |
* | |
* Examples are: "-1;5;9"="Last Friday of September", | |
* "2;1;3"="Second Monday of March" | |
* | |
* Start time: | |
* Start time is the number of hours past midnight, on the day of the | |
* start transition, the transition takes place. More simply put, the | |
* time of day the transition is made (in 24 hours format). The format | |
* must follow the length-of-time format described above with the | |
* exception that it must always be positive. | |
* | |
* DST End date rule: | |
* See DST Start date rule. The difference here is this is the day | |
* daylight savings ends (transition to STD). | |
* | |
* End time: | |
* Same as Start time. | |
*/ | |
template<class time_zone_type, class rule_type> | |
class tz_db_base { | |
public: | |
/* Having CharT as a template parameter created problems | |
* with posix_time::duration_from_string. Templatizing | |
* duration_from_string was not possible at this time, however, | |
* it should be possible in the future (when poor compilers get | |
* fixed or stop being used). | |
* Since this class was designed to use CharT as a parameter it | |
* is simply typedef'd here to ease converting in back to a | |
* parameter the future */ | |
typedef char char_type; | |
typedef typename time_zone_type::base_type time_zone_base_type; | |
typedef typename time_zone_type::time_duration_type time_duration_type; | |
typedef time_zone_names_base<char_type> time_zone_names; | |
typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; | |
typedef std::basic_string<char_type> string_type; | |
//! Constructs an empty database | |
tz_db_base() {} | |
//! Process csv data file, may throw exceptions | |
/*! May throw data_not_accessible, or bad_field_count exceptions */ | |
void load_from_file(const std::string& pathspec) | |
{ | |
string_type in_str; | |
std::string buff; | |
std::ifstream ifs(pathspec.c_str()); | |
if(!ifs){ | |
boost::throw_exception(data_not_accessible(pathspec)); | |
} | |
std::getline(ifs, buff); // first line is column headings | |
while( std::getline(ifs, buff)) { | |
parse_string(buff); | |
} | |
} | |
//! returns true if record successfully added to map | |
/*! Takes a region name in the form of "America/Phoenix", and a | |
* time_zone object for that region. The id string must be a unique | |
* name that does not already exist in the database. */ | |
bool add_record(const string_type& region, | |
boost::shared_ptr<time_zone_base_type> tz) | |
{ | |
typename map_type::value_type p(region, tz); | |
return (m_zone_map.insert(p)).second; | |
} | |
//! Returns a time_zone object built from the specs for the given region | |
/*! Returns a time_zone object built from the specs for the given | |
* region. If region does not exist a local_time::record_not_found | |
* exception will be thrown */ | |
boost::shared_ptr<time_zone_base_type> | |
time_zone_from_region(const string_type& region) const | |
{ | |
// get the record | |
typename map_type::const_iterator record = m_zone_map.find(region); | |
if(record == m_zone_map.end()){ | |
return boost::shared_ptr<time_zone_base_type>(); //null pointer | |
} | |
return record->second; | |
} | |
//! Returns a vector of strings holding the time zone regions in the database | |
std::vector<std::string> region_list() const | |
{ | |
typedef std::vector<std::string> vector_type; | |
vector_type regions; | |
typename map_type::const_iterator itr = m_zone_map.begin(); | |
while(itr != m_zone_map.end()) { | |
regions.push_back(itr->first); | |
++itr; | |
} | |
return regions; | |
} | |
private: | |
typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; | |
map_type m_zone_map; | |
// start and end rule are of the same type | |
typedef typename rule_type::start_rule::week_num week_num; | |
/* TODO: mechanisms need to be put in place to handle different | |
* types of rule specs. parse_rules() only handles nth_kday | |
* rule types. */ | |
//! parses rule specs for transition day rules | |
rule_type* parse_rules(const string_type& sr, const string_type& er) const | |
{ | |
using namespace gregorian; | |
// start and end rule are of the same type, | |
// both are included here for readability | |
typedef typename rule_type::start_rule start_rule; | |
typedef typename rule_type::end_rule end_rule; | |
// these are: [start|end] nth, day, month | |
int s_nth = 0, s_d = 0, s_m = 0; | |
int e_nth = 0, e_d = 0, e_m = 0; | |
split_rule_spec(s_nth, s_d, s_m, sr); | |
split_rule_spec(e_nth, e_d, e_m, er); | |
typename start_rule::week_num s_wn, e_wn; | |
s_wn = get_week_num(s_nth); | |
e_wn = get_week_num(e_nth); | |
return new rule_type(start_rule(s_wn, s_d, s_m), | |
end_rule(e_wn, e_d, e_m)); | |
} | |
//! helper function for parse_rules() | |
week_num get_week_num(int nth) const | |
{ | |
typedef typename rule_type::start_rule start_rule; | |
switch(nth){ | |
case 1: | |
return start_rule::first; | |
case 2: | |
return start_rule::second; | |
case 3: | |
return start_rule::third; | |
case 4: | |
return start_rule::fourth; | |
case 5: | |
case -1: | |
return start_rule::fifth; | |
default: | |
// shouldn't get here - add error handling later | |
break; | |
} | |
return start_rule::fifth; // silence warnings | |
} | |
//! splits the [start|end]_date_rule string into 3 ints | |
void split_rule_spec(int& nth, int& d, int& m, string_type rule) const | |
{ | |
typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; | |
typedef boost::tokenizer<char_separator_type, | |
std::basic_string<char_type>::const_iterator, | |
std::basic_string<char_type> > tokenizer; | |
typedef boost::tokenizer<char_separator_type, | |
std::basic_string<char_type>::const_iterator, | |
std::basic_string<char_type> >::iterator tokenizer_iterator; | |
const char_type sep_char[] = { ';', '\0'}; | |
char_separator_type sep(sep_char); | |
tokenizer tokens(rule, sep); // 3 fields | |
tokenizer_iterator tok_iter = tokens.begin(); | |
nth = std::atoi(tok_iter->c_str()); ++tok_iter; | |
d = std::atoi(tok_iter->c_str()); ++tok_iter; | |
m = std::atoi(tok_iter->c_str()); | |
} | |
//! Take a line from the csv, turn it into a time_zone_type. | |
/*! Take a line from the csv, turn it into a time_zone_type, | |
* and add it to the map. Zone_specs in csv file are expected to | |
* have eleven fields that describe the time zone. Returns true if | |
* zone_spec successfully added to database */ | |
bool parse_string(string_type& s) | |
{ | |
std::vector<string_type> result; | |
typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; | |
token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); | |
token_iter_type end; | |
while (i != end) { | |
result.push_back(*i); | |
i++; | |
} | |
enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, | |
DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, | |
END_TIME, FIELD_COUNT }; | |
//take a shot at fixing gcc 4.x error | |
const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); | |
if (result.size() != expected_fields) { | |
std::ostringstream msg; | |
msg << "Expecting " << FIELD_COUNT << " fields, got " | |
<< result.size() << " fields in line: " << s; | |
boost::throw_exception(bad_field_count(msg.str())); | |
BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach | |
} | |
// initializations | |
bool has_dst = true; | |
if(result[DSTABBR] == std::string()){ | |
has_dst = false; | |
} | |
// start building components of a time_zone | |
time_zone_names names(result[STDNAME], result[STDABBR], | |
result[DSTNAME], result[DSTABBR]); | |
time_duration_type utc_offset = | |
str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); | |
dst_adjustment_offsets adjust(time_duration_type(0,0,0), | |
time_duration_type(0,0,0), | |
time_duration_type(0,0,0)); | |
boost::shared_ptr<rule_type> rules; | |
if(has_dst){ | |
adjust = dst_adjustment_offsets( | |
str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), | |
str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), | |
str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) | |
); | |
rules = | |
boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], | |
result[END_DATE_RULE])); | |
} | |
string_type id(result[ID]); | |
boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); | |
return (add_record(id, zone)); | |
} | |
}; | |
} } // namespace | |
#endif // DATE_TIME_TZ_DB_BASE_HPP__ |