1 /** D date/time parse functions.
2 *
3 *    Authors: Vitaly Livshic, shiche@yandex.ru
4 *    Bugs: Uses C setLocale which uses system localization via dlocale library.
5 *    License: LGPLv3 
6 */
7 module dateparse;
8 @safe
9 
10 import std.datetime;
11 import dlocale;
12 
13 import std.array;
14 import std.string;
15 import std.conv : parse;
16 import std.algorithm.searching;
17 
18 /// Parsing exception.
19 export class DateParseException : Exception
20 {
21     uint position;
22     this(string msg, uint position = 0) { super("%s at position %d.".format( msg, position )); this.position = position; }
23 }
24 
25 /// Struct that must hold all parts for date/time.
26 private struct tm
27 {
28     ubyte day = 255;
29     ubyte month = 255;
30     uint year = 10000;
31     ubyte hour = 255;
32     ubyte minute = 255;
33     ubyte second = 255;
34     uint millisecond = 10000;
35     uint microsecond = 10000;
36     uint nanosecond = 10000;
37 
38     TimeZone timezone = null;
39 }
40 
41 /**
42 * Parse string to SysTime with standard string representation.
43 * Params:
44 *   source = string to parse
45 *   formatString = string in C strftime function format.
46 *   defaultZone = timezone for date
47 *   locale = used locale
48 * Returns:
49 *   parsed SysTime or throws DateParseException exception if there are any errors.
50 * Examples:
51 * --------------------
52 * auto locale = initDateformatLocale("C");
53 * ...
54 * auto date = SysTime(DateTime(2020, 3, 8, 14, 33, 52), UTC());
55 * assert(parseSysTime("Sun Mar  8 14:33:52 2020 UTC", "%c") == date);
56 * --------------------
57 */
58 export SysTime parseSysTime(string source, 
59     string formatString, 
60     immutable TimeZone defaultZone = null, 
61     Locale locale = getDefaultLocale)
62 {
63     string parse2(string source, out ubyte value, ref uint spos)
64     {
65         auto len = source.length > 1 && source[ 1 .. 2 ].isNumeric ? 2 : 1;
66         spos += len;
67         auto part = source[ 0 .. len ].stripLeft;
68         value = parse!ubyte(part);
69         return source[ len .. $ ];
70     }
71 
72     string extractYear(string source, ref uint spos, int dataLen, out uint year)
73     {
74         if (source.length < dataLen)
75             throw new DateParseException("Not enough data to parse year!", spos);
76         
77         auto part = source[ 0 .. dataLen ];
78         if (!isNumeric(part))
79             throw new DateParseException(format("Year must contain %d digits!", dataLen), spos);
80         year = parse!uint(part);
81         spos += dataLen;
82 
83         return source[ dataLen .. $ ];
84     }
85 
86     bool isLetter(immutable char ch)
87     {
88         return ch > 255 || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
89     }
90 
91     string extractPart(string source, ref uint spos, string[] names, string message, out ubyte pos, out string part)
92     {
93         auto p = source.indexOf(" ");
94         if (p < 0)
95             p = source.length;
96         part = source[ 0 .. p ];
97 
98 		// Remove non letters
99         while (part.length > 0 && !isLetter(part[ part.length - 1 ]))
100         {
101             p--;
102             part = part[ 0 .. $ - 1];
103         }
104 
105         // Compare with array
106         for (pos = 0; pos < names.length; pos++)
107             if (names[pos] == part)
108             {
109                 spos += p;
110                 pos++;
111                 return source[ p .. $ ];
112             }
113         // Throw exception if there is no value equals part in the array
114         throw new DateParseException(message, spos);
115     }
116 
117     if (source == null || formatString == null)
118         throw new DateParseException("Source or formatString is null");
119 
120     formatString = formatString
121         .replace("%c", locale.dateTime)
122         .replace("%x", locale.date)
123         .replace("%X", locale.time)
124         .replace("%D","%m/%d/%y")
125         .replace("%r", "%I:%M:%S %p")
126         .replace("%R", "%H:%M")
127         .replace("%T", "%H:%M:%S")
128         .replace("%F", "%Y-%m-%d");
129 
130     uint spos = 0, fpos = 0;
131     tm tm;
132     while (source.length > 0 && formatString.length > 0)
133     {
134         // Ignore non data-character
135         if (formatString[0] != '%')
136         {
137             if (source[0] == formatString[0])
138             {
139                 source = source[ 1 .. $ ];
140                 formatString = formatString[ 1 .. $ ];
141                 spos++; fpos++;
142                 continue;
143             }
144             else
145                 throw new DateParseException("Invalid character " ~ source[0], spos);
146         }
147 
148         // Format symbol detected
149         if (formatString.length < 2)
150             throw new DateParseException("Invalid format string. % without specifier.", fpos);
151         auto fmtChar = formatString[1];
152         formatString = formatString[ 2 .. $ ];
153         switch (fmtChar)
154         {
155             case 'a':
156             case 'A':
157                 {
158                     // Weekday silently ignored itself, but check it presence.
159                     string part;
160                     ubyte pos;
161                     source = extractPart(source,
162                         spos,
163                         fmtChar == 'a' ? locale.abbrWeekDays : locale.weekDays,
164                         "Invalid week day name",
165                         pos,
166                         part);
167                 }
168                 break;
169             case 'b':
170             case 'B':
171                 {
172                     // Month name
173                     string part;
174                     ubyte pos;
175                     source = extractPart(source, 
176                         spos,  
177                         fmtChar == 'b' ? locale.abbrMonthes : locale.monthes,
178                         "Invalid month name",
179                         pos,
180                         part);
181                     tm.month = pos;
182                 }
183                 break;
184             case 'e':
185             case 'd':
186                 source = parse2(source, tm.day, spos);
187                 break;
188             case 'm':
189                 source = parse2(source, tm.month, spos);
190                 break;
191             case 'Y':
192                 source = extractYear(source, spos, 4, tm.year);
193                 break;
194             case 'y':
195                 {
196                     source = extractYear(source, spos, 2, tm.year);
197                     tm.year += tm.year >= 69 ? 1900 : 2000;
198                 }
199                 break;
200 			case 'I':
201             case 'H':
202                 source = parse2(source, tm.hour, spos);
203                 break;
204             case 'M':
205                 source = parse2(source, tm.minute, spos);
206                 break;
207             case 'S':
208                 source = parse2(source, tm.second, spos);
209                 break;
210             case 'z':
211                 {
212                     if (source.length < 3)
213                         throw new DateParseException("Cannot read offset for time zone, string too short!", spos);
214                     auto part = source[ 0 .. 3 ];
215                     spos += 3;
216                     source = source[ 0 .. 3 ];
217                     auto utcOffset =  parse!int(part);
218                     tm.timezone = new SimpleTimeZone(utcOffset.hours);
219                 }
220                 break;
221             case 'Z':
222                 {
223                     auto p = source.indexOf(' ');
224                     if (p < 0)
225                         p = source.length;
226                     auto part = source[ 0 .. p ];
227                     source = source[ p .. $ ];
228                     spos += p;
229                     version(Posix)
230                         tm.timezone = cast(TimeZone)PosixTimeZone.getTimeZone(part);
231                     version(Windows)
232                         tm.timezone = cast(TimeZone)WindowsTimeZone.getTimeZone(part);
233                 }
234                 break; 
235 
236             default: throw new DateParseException("Unknown format specifier: %" ~ fmtChar, fpos);
237         }
238         fpos += 2;
239     }
240 
241     // Check tm struct
242     if (tm.day == 255 || tm.month == 255 || tm.year == 10000)
243         throw new DateParseException("Not enough data to parse Date!", 0);
244 	if (tm.second == 255)
245 		tm.second = 0;
246     if (tm.hour == 255 || tm.minute == 255)
247         throw new DateParseException("Not enough data to parse time part!", 0);
248     if (tm.timezone is null)
249         tm.timezone = cast(TimeZone)defaultZone;
250 
251     // If cannot determine parts of second  - it will be zero 
252     tm.millisecond = tm.millisecond > 999 ? 0 : tm.millisecond;
253     tm.microsecond = tm.microsecond > 999 ? 0 : tm.microsecond;
254     tm.nanosecond = tm.nanosecond > 999 ? 0 : tm.nanosecond;
255 
256     return SysTime(DateTime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second), cast(immutable TimeZone)tm.timezone);
257 }
258 
259 /**
260 * Parse string to SysTime with standard string representation. Equals parseSysTime(source, "%c", locale).
261 * Params:
262 *   source = string to parse
263 *   defaultZone = default time zone if it cannot be determined.
264 *   locale = used locale
265 * Returns:
266 *   parsed SysTime or throws DateParseException exception if there are any errors.
267 */
268 export SysTime parseSysTime(string source, immutable TimeZone defaultZone = null, Locale locale = getDefaultLocale)
269 {
270     return parseSysTime(source, "%c", defaultZone, locale);
271 }
272 
273 unittest
274 {
275     import std.stdio;
276 
277     writeln("\nDate parsing test");
278 
279     Locale locale;
280     version(Posix)
281         locale = initDateformatLocale("C");
282     else
283         version(Windows)
284             locale = initDateformatLocale("en");
285         else
286             locale = initDateformatLocale("en-US");
287     setDefaultLocale(locale.name);
288     writeln("Test's locale is " ~ locale.name);
289 
290     SysTime date;
291     string source;
292 
293     // Locale specific
294 	version(Posix)
295 	{
296 		date = SysTime(DateTime(2020, 3, 8, 14, 33, 52), UTC());
297 		source = "Sun Mar  8 14:33:52 2020 UTC";
298 		assert(parseSysTime(source, "%c", UTC()) == date);
299 		assert(parseSysTime(source, UTC()) == date);
300 
301 		assert(parseSysTime("12/07/20 17:45:10", "%x %X", UTC()) == SysTime(DateTime(2020, 12, 7, 17, 45, 10), UTC()));
302 
303         // Manual formats
304         assert(parseSysTime(source, "%a %b  %d %H:%M:%S %Y %Z") == date); 
305         assert(parseSysTime("Sunday March  8 14:33:52 2020 UTC", "%A %B  %d %R:%S %Y %Z") == date);
306         
307         assert(parseSysTime("03/08/20 14:33:52 UTC", "%D %R:%S %Z") == date); 
308         assert(parseSysTime("03/08/20 14:33:52 UTC", "%D %T %Z") == date); 
309     
310         assert(parseSysTime("03/08/20 14:33:52 UTC", "%m/%d/%y %T %Z") == date);
311 	}
312 	version(Windows)
313 	{
314 		source = "Monday, December 07, 2020 05:45";
315 		date = SysTime(DateTime(2020, 12, 7, 5, 45, 0), UTC());
316 		
317 		assert(parseSysTime(source, "%c", UTC()) == date);
318 		assert(parseSysTime(source, UTC()) == date);
319 		
320 		// Manual formats
321         assert(parseSysTime("Mon, Dec, 07 5:45:0 2020 UTC", "%a, %b, %d %H:%M:%S %Y %Z") == date); 
322         assert(parseSysTime("Monday December  07 5:45:00 2020 UTC", "%A %B  %d %R:%S %Y %Z") == date);
323 
324         assert(parseSysTime("12/07/20 5:45:0 UTC", "%D %R:%S %Z") == date); 
325         assert(parseSysTime("12/07/20 5:45:0 UTC", "%D %T %Z") == date); 
326     
327         assert(parseSysTime("12/07/20 5:45:0 UTC", "%m/%d/%y %T %Z") == date);
328 	}
329 
330     writeln("Date parsing test finished\n");
331 }