1 /** D date/time parse functions. 2 * 3 * Authors: Vitaly Livshic, shiche@yandex.ru 4 * Bugs: Uses C setLocale which uses system localization via dlocale library. 5 * License: LGPLv3 6 */ 7 module dateparse; 8 @safe 9 10 import std.datetime; 11 import dlocale; 12 13 import std.array; 14 import std.string; 15 import std.conv : parse; 16 import std.algorithm.searching; 17 18 /// Parsing exception. 19 export class DateParseException : Exception 20 { 21 uint position; 22 this(string msg, uint position = 0) { super("%s at position %d.".format( msg, position )); this.position = position; } 23 } 24 25 /// Struct that must hold all parts for date/time. 26 private struct tm 27 { 28 ubyte day = 255; 29 ubyte month = 255; 30 uint year = 10000; 31 ubyte hour = 255; 32 ubyte minute = 255; 33 ubyte second = 255; 34 uint millisecond = 10000; 35 uint microsecond = 10000; 36 uint nanosecond = 10000; 37 38 TimeZone timezone = null; 39 } 40 41 /** 42 * Parse string to SysTime with standard string representation. 43 * Params: 44 * source = string to parse 45 * formatString = string in C strftime function format. 46 * defaultZone = timezone for date 47 * locale = used locale 48 * Returns: 49 * parsed SysTime or throws DateParseException exception if there are any errors. 50 * Examples: 51 * -------------------- 52 * auto locale = initDateformatLocale("C"); 53 * ... 54 * auto date = SysTime(DateTime(2020, 3, 8, 14, 33, 52), UTC()); 55 * assert(parseSysTime("Sun Mar 8 14:33:52 2020 UTC", "%c") == date); 56 * -------------------- 57 */ 58 export SysTime parseSysTime(string source, 59 string formatString, 60 immutable TimeZone defaultZone = null, 61 Locale locale = getDefaultLocale) 62 { 63 string parse2(string source, out ubyte value, ref uint spos) 64 { 65 auto len = source.length > 1 && source[ 1 .. 2 ].isNumeric ? 2 : 1; 66 spos += len; 67 auto part = source[ 0 .. len ].stripLeft; 68 value = parse!ubyte(part); 69 return source[ len .. $ ]; 70 } 71 72 string extractYear(string source, ref uint spos, int dataLen, out uint year) 73 { 74 if (source.length < dataLen) 75 throw new DateParseException("Not enough data to parse year!", spos); 76 77 auto part = source[ 0 .. dataLen ]; 78 if (!isNumeric(part)) 79 throw new DateParseException(format("Year must contain %d digits!", dataLen), spos); 80 year = parse!uint(part); 81 spos += dataLen; 82 83 return source[ dataLen .. $ ]; 84 } 85 86 bool isLetter(immutable char ch) 87 { 88 return ch > 255 || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); 89 } 90 91 string extractPart(string source, ref uint spos, string[] names, string message, out ubyte pos, out string part) 92 { 93 auto p = source.indexOf(" "); 94 if (p < 0) 95 p = source.length; 96 part = source[ 0 .. p ]; 97 98 // Remove non letters 99 while (part.length > 0 && !isLetter(part[ part.length - 1 ])) 100 { 101 p--; 102 part = part[ 0 .. $ - 1]; 103 } 104 105 // Compare with array 106 for (pos = 0; pos < names.length; pos++) 107 if (names[pos] == part) 108 { 109 spos += p; 110 pos++; 111 return source[ p .. $ ]; 112 } 113 // Throw exception if there is no value equals part in the array 114 throw new DateParseException(message, spos); 115 } 116 117 if (source == null || formatString == null) 118 throw new DateParseException("Source or formatString is null"); 119 120 formatString = formatString 121 .replace("%c", locale.dateTime) 122 .replace("%x", locale.date) 123 .replace("%X", locale.time) 124 .replace("%D","%m/%d/%y") 125 .replace("%r", "%I:%M:%S %p") 126 .replace("%R", "%H:%M") 127 .replace("%T", "%H:%M:%S") 128 .replace("%F", "%Y-%m-%d"); 129 130 uint spos = 0, fpos = 0; 131 tm tm; 132 while (source.length > 0 && formatString.length > 0) 133 { 134 // Ignore non data-character 135 if (formatString[0] != '%') 136 { 137 if (source[0] == formatString[0]) 138 { 139 source = source[ 1 .. $ ]; 140 formatString = formatString[ 1 .. $ ]; 141 spos++; fpos++; 142 continue; 143 } 144 else 145 throw new DateParseException("Invalid character " ~ source[0], spos); 146 } 147 148 // Format symbol detected 149 if (formatString.length < 2) 150 throw new DateParseException("Invalid format string. % without specifier.", fpos); 151 auto fmtChar = formatString[1]; 152 formatString = formatString[ 2 .. $ ]; 153 switch (fmtChar) 154 { 155 case 'a': 156 case 'A': 157 { 158 // Weekday silently ignored itself, but check it presence. 159 string part; 160 ubyte pos; 161 source = extractPart(source, 162 spos, 163 fmtChar == 'a' ? locale.abbrWeekDays : locale.weekDays, 164 "Invalid week day name", 165 pos, 166 part); 167 } 168 break; 169 case 'b': 170 case 'B': 171 { 172 // Month name 173 string part; 174 ubyte pos; 175 source = extractPart(source, 176 spos, 177 fmtChar == 'b' ? locale.abbrMonthes : locale.monthes, 178 "Invalid month name", 179 pos, 180 part); 181 tm.month = pos; 182 } 183 break; 184 case 'e': 185 case 'd': 186 source = parse2(source, tm.day, spos); 187 break; 188 case 'm': 189 source = parse2(source, tm.month, spos); 190 break; 191 case 'Y': 192 source = extractYear(source, spos, 4, tm.year); 193 break; 194 case 'y': 195 { 196 source = extractYear(source, spos, 2, tm.year); 197 tm.year += tm.year >= 69 ? 1900 : 2000; 198 } 199 break; 200 case 'I': 201 case 'H': 202 source = parse2(source, tm.hour, spos); 203 break; 204 case 'M': 205 source = parse2(source, tm.minute, spos); 206 break; 207 case 'S': 208 source = parse2(source, tm.second, spos); 209 break; 210 case 'z': 211 { 212 if (source.length < 3) 213 throw new DateParseException("Cannot read offset for time zone, string too short!", spos); 214 auto part = source[ 0 .. 3 ]; 215 spos += 3; 216 source = source[ 0 .. 3 ]; 217 auto utcOffset = parse!int(part); 218 tm.timezone = new SimpleTimeZone(utcOffset.hours); 219 } 220 break; 221 case 'Z': 222 { 223 auto p = source.indexOf(' '); 224 if (p < 0) 225 p = source.length; 226 auto part = source[ 0 .. p ]; 227 source = source[ p .. $ ]; 228 spos += p; 229 version(Posix) 230 tm.timezone = cast(TimeZone)PosixTimeZone.getTimeZone(part); 231 version(Windows) 232 tm.timezone = cast(TimeZone)WindowsTimeZone.getTimeZone(part); 233 } 234 break; 235 236 default: throw new DateParseException("Unknown format specifier: %" ~ fmtChar, fpos); 237 } 238 fpos += 2; 239 } 240 241 // Check tm struct 242 if (tm.day == 255 || tm.month == 255 || tm.year == 10000) 243 throw new DateParseException("Not enough data to parse Date!", 0); 244 if (tm.second == 255) 245 tm.second = 0; 246 if (tm.hour == 255 || tm.minute == 255) 247 throw new DateParseException("Not enough data to parse time part!", 0); 248 if (tm.timezone is null) 249 tm.timezone = cast(TimeZone)defaultZone; 250 251 // If cannot determine parts of second - it will be zero 252 tm.millisecond = tm.millisecond > 999 ? 0 : tm.millisecond; 253 tm.microsecond = tm.microsecond > 999 ? 0 : tm.microsecond; 254 tm.nanosecond = tm.nanosecond > 999 ? 0 : tm.nanosecond; 255 256 return SysTime(DateTime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second), cast(immutable TimeZone)tm.timezone); 257 } 258 259 /** 260 * Parse string to SysTime with standard string representation. Equals parseSysTime(source, "%c", locale). 261 * Params: 262 * source = string to parse 263 * defaultZone = default time zone if it cannot be determined. 264 * locale = used locale 265 * Returns: 266 * parsed SysTime or throws DateParseException exception if there are any errors. 267 */ 268 export SysTime parseSysTime(string source, immutable TimeZone defaultZone = null, Locale locale = getDefaultLocale) 269 { 270 return parseSysTime(source, "%c", defaultZone, locale); 271 } 272 273 unittest 274 { 275 import std.stdio; 276 277 writeln("\nDate parsing test"); 278 279 Locale locale; 280 version(Posix) 281 locale = initDateformatLocale("C"); 282 else 283 version(Windows) 284 locale = initDateformatLocale("en"); 285 else 286 locale = initDateformatLocale("en-US"); 287 setDefaultLocale(locale.name); 288 writeln("Test's locale is " ~ locale.name); 289 290 SysTime date; 291 string source; 292 293 // Locale specific 294 version(Posix) 295 { 296 date = SysTime(DateTime(2020, 3, 8, 14, 33, 52), UTC()); 297 source = "Sun Mar 8 14:33:52 2020 UTC"; 298 assert(parseSysTime(source, "%c", UTC()) == date); 299 assert(parseSysTime(source, UTC()) == date); 300 301 assert(parseSysTime("12/07/20 17:45:10", "%x %X", UTC()) == SysTime(DateTime(2020, 12, 7, 17, 45, 10), UTC())); 302 303 // Manual formats 304 assert(parseSysTime(source, "%a %b %d %H:%M:%S %Y %Z") == date); 305 assert(parseSysTime("Sunday March 8 14:33:52 2020 UTC", "%A %B %d %R:%S %Y %Z") == date); 306 307 assert(parseSysTime("03/08/20 14:33:52 UTC", "%D %R:%S %Z") == date); 308 assert(parseSysTime("03/08/20 14:33:52 UTC", "%D %T %Z") == date); 309 310 assert(parseSysTime("03/08/20 14:33:52 UTC", "%m/%d/%y %T %Z") == date); 311 } 312 version(Windows) 313 { 314 source = "Monday, December 07, 2020 05:45"; 315 date = SysTime(DateTime(2020, 12, 7, 5, 45, 0), UTC()); 316 317 assert(parseSysTime(source, "%c", UTC()) == date); 318 assert(parseSysTime(source, UTC()) == date); 319 320 // Manual formats 321 assert(parseSysTime("Mon, Dec, 07 5:45:0 2020 UTC", "%a, %b, %d %H:%M:%S %Y %Z") == date); 322 assert(parseSysTime("Monday December 07 5:45:00 2020 UTC", "%A %B %d %R:%S %Y %Z") == date); 323 324 assert(parseSysTime("12/07/20 5:45:0 UTC", "%D %R:%S %Z") == date); 325 assert(parseSysTime("12/07/20 5:45:0 UTC", "%D %T %Z") == date); 326 327 assert(parseSysTime("12/07/20 5:45:0 UTC", "%m/%d/%y %T %Z") == date); 328 } 329 330 writeln("Date parsing test finished\n"); 331 }