-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathfuzzy_test.go
More file actions
301 lines (267 loc) · 9.7 KB
/
fuzzy_test.go
File metadata and controls
301 lines (267 loc) · 9.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
package fuzzytime
import (
"testing"
)
func TestDateTimes(t *testing.T) {
// TODO: add some more tests with numeric timezones
// TODO: use DateTime.String()-style strings for expected results
testData := []struct {
in string
expected string
}{
{"2010-04-02T12:35:44+00:00", "2010-04-02T12:35:44Z"}, // iso 8601
{"2008-03-10 13:21:36 GMT", "2008-03-10T13:21:36Z"}, //
{"May 25 2010 3:34PM", "2010-05-25T15:34"}, //(thetimes.co.uk)
{"Thursday August 21 2008 10:42 am", "2008-08-21T10:42"}, //(guardian blogs in their new cms)
{"Tuesday 16 December 2008 16.23 GMT", "2008-12-16T16:23Z"}, //(Guardian blogs in their new cms)
{"3:19pm on Tue 29 Jan 08", "2008-01-29T15:19"}, //(herald blogs)
{"2007/03/18 10:59:02", "2007-03-18T10:59:02"},
{"Mar 3, 2007 12:00 AM", "2007-03-03T00:00"},
{"Jul 21, 08 10:00 AM", "2008-07-21T10:00"}, //(mirror blogs)
{"09-Apr-2007 00:00", "2007-04-09T00:00"}, //(times, sundaytimes)
{"09-Apr-07 00:00", "2007-04-09T00:00"}, //(scotsman)
{"Friday August 11, 2006", "2006-08-11"}, //(express, guardian/observer)
{"20:12pm 23rd November 2007", "2007-11-23T20:12"}, //(dailymail)
{"2:42 PM on 22nd May 2008", "2008-05-22T14:42"}, //(dailymail)
{"February 10 2008 22:05", "2008-02-10T22:05"}, //(ft)
{"Feb 2, 2009 at 17:01:09", "2009-02-02T17:01:09"}, //(telegraph blogs)
{"18 Oct 07, 04:50 PM", "2007-10-18T16:50"}, //(BBC blogs)
{"02 August 2007 1:21 PM", "2007-08-02T13:21"}, //(Daily Mail blogs)
{"October 22, 2007 5:31 PM", "2007-10-22T17:31"}, //(old Guardian blogs, ft blogs)
{"October 15, 2007", "2007-10-15"}, //(Times blogs)
{"February 12 2008", "2008-02-12"}, //(Herald)
{"Monday, 22 October 2007", "2007-10-22"}, //(Independent blogs, Sun (page date))
{"22 October 2007", "2007-10-22"}, //(Sky News blogs)
{"11 Dec 2007", "2007-12-11"}, //(Sun (article date))
{"12 February 2008", "2008-02-12"}, //(scotsman)
{"Tuesday, 21 January, 2003, 15:29 GMT", "2003-01-21T15:29Z"}, //(historical bbcnews)
{"2003/01/21 15:29:49", "2003-01-21T15:29:49"}, //(historical bbcnews (meta tag))
{"2010-07-01", "2010-07-01"},
{"2010/07/01", "2010-07-01"},
{"Feb 20th, 2000", "2000-02-20"},
{"Monday, May. 17, 2010", "2010-05-17"}, // (time.com)
{"APRIL 10, 2014", "2014-04-10"}, // nytimes.com
{"Tuesday October 14 2008 00.01 GMT", "2008-10-14T00:01Z"},
{"10 ABR 2014 - 20:36 CET", "2014-04-10T20:36+01:00"}, // elpais.com
{"9:11 p.m. EDT April 10, 2014", "2014-04-10T21:11-04:00"}, // usatoday.com
// some leading/trailing/embedded text in the wild
{"September, 26th 2011 by Christo Hall", "2011-09-26"}, // (www.thenewwolf.co.uk)
// ('\u00a0' is )
{"\n By\u00a0Edgar R. BattePosted\u00a0\n Sunday, December 20\u00a0\n2015\u00a0at\u00a0\n02:00", "2015-12-20T02:00"}, // http://www.monitor.co.ug
// some more obscure cases...
{"May 2008", "2008-05"},
// fractional seconds
{"21:59:59,994", "T21:59:59.994"},
{"21:59:59.994GMT", "T21:59:59.994Z"},
// tricky ones where hour can get picked up as year if not careful!
{"Thu Aug 25 10:46:55 GMT 2011", "2011-08-25T10:46:55Z"}, // (www.yorkshireeveningpost.co.uk)
{"Wed Apr 16 17:17:43 NZST 2014", "2014-04-16T17:17:43+12:00"}, // unix date command
// BST is ambiguous by default
//{"Tuesday October 14 2008 00.01 BST", "2008-10-14T00:01+01:00"}, //(Guardian blogs in their new cms)
//{"26 May 2007, 02:10:36 BST", "2007-05-26T02:10:36+01:00"}, //(newsoftheworld)
//{"2:43pm BST 16/04/2007", "2007-04-16T14:43+01:00"}, //(telegraph, after munging)
//{"Monday 30 July 2012 08.38 BST", *"2012-7-30T8:38:0+01:00")}, // (guardian.co.uk)
// Other possible formats to support:
// http://en.wikipedia.org/wiki/Date_and_time_notation_in_the_United_States#Date-time_group
//{"091630Z JUL 11", "2011-07T09:16:30Z"
// Russian
{"Май 2008", "2008-05"},
{"10 апреля 2014", "2014-04-10"},
// *****
// Ones that should fail
// *****
// ambiguous (at least with the default date resolver)
{"03/09/2007", ""}, //(Sky News blogs, mirror)
{"03/09/12", ""},
{"01.12.2011", ""},
{"01.12.11", ""},
// time or date?
{"10.12", ""},
// invalid values:
{"25:10:01GMT", ""},
{"2000-15-02", ""},
{"2000-11-92", ""},
{"52nd feb 2000", "2000-02"}, // hmm. should reject outright?
{"100:30GMT", ""},
{"21.59.59.9942", ""},
// *****
// Ones we _should_ be able to cope with, but can't yet:
// *****
//
// 12.05 ambiguous, but not in this context
// {"9 Sep 2009 12.05", "2009-09-09T12:05"}, //(heraldscotland blogs)
//
// ambiguous format, but with values that provide enough info
// {"25/11/2004","2004-11-25"}
//
// iso-8601 (when non-ambiguous)
// "20100201T131443Z",
}
for _, dat := range testData {
dt, _, _ := Extract(dat.in)
//if err != nil {
// t.Errorf("Extract(%s) failed: %s", dat.in, err)
//}
got := dt.ISOFormat()
if got != dat.expected {
t.Errorf("Extract(%s): expected %s, but got %s", dat.in, dat.expected, got)
}
}
}
func TestPartial(t *testing.T) {
testData := []struct {
in string
expected string
}{
{"Thu April 24th", "????-04-24 ??:??:??"},
{"April 24th", "????-04-24 ??:??:??"},
{"May 2", "????-05-02 ??:??:??"},
{"8:50am Thu April 24th", "????-04-24 08:50:??"},
}
for _, dat := range testData {
dt, _, err := Extract(dat.in)
if err != nil {
t.Errorf("Extract(%s) failed: %s", dat.in, err)
}
got := dt.String()
if got != dat.expected {
t.Errorf("Extract(%s): expected %s, but got %s", dat.in, dat.expected, got)
}
}
}
func TestAmbiguous(t *testing.T) {
usaData := []struct {
in string
expected string
}{
{"2003-02-01", "2003-02-01"}, // for sanity check
{"2/3/10", "2010-02-03"},
{"1/2/2003", "2003-01-02"},
{"1.2.2003", "2003-01-02"},
// these are from https://github.com/bcampbell/fuzzytime/issues/1
{"12/01/2016 - 12:19", "2016-12-01T12:19"},
{"12/01 - 12:19:01", "T12:19:01"}, // date part too ambiguous (MM/YY? DD/MM? MM/DD?)
{"12-01-2016 - 12:19", "2016-12-01T12:19"},
{"20-12-2016", ""}, // invalid as US date
{"20/12/2016", ""},
{"20.12.2016", ""},
{"03:43 21-12-2016", "T03:43"},
{"12/16/2016 - 08:00am", "2016-12-16T08:00"},
{"12/21/16 at 1:12 am", "2016-12-21T01:12"},
// TODO: add some US timezone tests
}
for _, dat := range usaData {
dt, _, err := USContext.Extract(dat.in)
if err != nil {
t.Errorf("Extract(%s) failed: %s", dat.in, err)
}
got := dt.ISOFormat()
if got != dat.expected {
t.Errorf("Extract(%s): expected %s, but got %s", dat.in, dat.expected, got)
}
}
ukData := []struct {
in string
expected string
}{
{"2003-02-01", "2003-02-01"}, // for sanity check
{"1/2/03", "2003-02-01"},
{"4:48PM GMT 22/02/2008", "2008-02-22T16:48Z"},
{"4:48PM BST 22/02/2008", "2008-02-22T16:48+01:00"},
// these are from https://github.com/bcampbell/fuzzytime/issues/1
{"12/01/2016 - 12:19", "2016-01-12T12:19"},
{"12/01 - 12:19:01", "T12:19:01"}, // date part too ambiguous (MM/YY? DD/MM? MM/DD?)
{"12-01-2016 - 12:19", "2016-01-12T12:19"},
{"20-12-2016", "2016-12-20"},
{"20/12/2016", "2016-12-20"},
{"20.12.2016", "2016-12-20"},
{"20.12.16", "2016-12-20"},
{"03:43 21-12-2016", "2016-12-21T03:43"},
{"12/16/2016 - 08:00am", "T08:00"}, // invalid month
{"12/21/16 at 1:12 am", "T01:12"}, // invalid month
}
for _, dat := range ukData {
dt, _, err := WesternContext.Extract(dat.in)
if err != nil {
t.Errorf("Extract(%s) failed: %s", dat.in, err)
}
got := dt.ISOFormat()
if got != dat.expected {
t.Errorf("Extract(%s): expected %s, but got %s", dat.in, dat.expected, got)
}
}
}
// Test timezone parsing
func TestParseTimeZone(t *testing.T) {
/*
testData := []struct {
in string
expected int
}{
{"Z", 0},
{"+0100", 1 * 60 * 60},
{"-0430", -(4*60*60 + 30*60)},
{"NZDT", 13 * 60 * 60},
}
// TODO
*/
}
func TestTZToOffset(t *testing.T) {
testData := []struct {
in string
expected int
}{
{"Z", 0},
{"+00:00", 0},
{"-00:00", 0},
{"+0000", 0},
{"+1000", 10 * 60 * 60},
{"-01:35", -(1*60*60 + 35*60)},
}
for _, dat := range testData {
got, err := TZToOffset(dat.in)
if err != nil {
t.Errorf("TZToOffset(%s) error: %s", dat.in, err)
} else if got != dat.expected {
t.Errorf("TZToOffset(%s): expected '%d' but got '%d'", dat.in, dat.expected, got)
}
}
}
func TestOffsetToTZ(t *testing.T) {
testData := []struct {
in int
expected string
}{
{0, "Z"},
{30 * 60, "+00:30"},
{-45 * 60, "-00:45"},
{(10 * 60 * 60) + (0 * 60), "+10:00"},
{-((10 * 60 * 60) + (15 * 60)), "-10:15"},
}
for _, dat := range testData {
got := OffsetToTZ(dat.in)
if got != dat.expected {
t.Errorf("OffsetToTZ(%d): expected '%s' but got '%s'", dat.in, dat.expected, got)
}
}
}
func TestTZ(t *testing.T) {
testData := []struct {
in string
preferred string // preferred local(es)
expected int
}{
{"BST", "GB", (1 * 60 * 60)},
{"PST", "GB,MX", -(8 * 60 * 60)},
{"IST", "IL,IE", (2 * 60 * 60)}, // prefer israel standard time over irish summer time
}
for _, dat := range testData {
tzResolver := DefaultTZResolver(dat.preferred)
got, err := tzResolver(dat.in)
if err != nil {
t.Errorf("tz resolver (%s, %s) error: %s", dat.in, dat.preferred, err)
} else if got != dat.expected {
t.Errorf("tz resolver(%s, %s): expected '%d' but got '%d'", dat.in, dat.preferred, dat.expected, got)
}
}
}