Skip to content

Commit 3cb6222

Browse files
authored
add fixed-length reader.Read() implementation (#118)
1 parent bf59862 commit 3cb6222

File tree

2 files changed

+180
-14
lines changed

2 files changed

+180
-14
lines changed

extensions/omniv21/fileformat/fixedlength/reader.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,41 @@ func (r *reader) readByHeaderFooterEnvelope() (*idr.Node, error) {
135135
}
136136
}
137137

138+
func (r *reader) Read() (node *idr.Node, err error) {
139+
if r.target != nil {
140+
// This is just in case Release() isn't called by ingester.
141+
idr.RemoveAndReleaseTree(r.target)
142+
r.target = nil
143+
}
144+
readEnvelope:
145+
if r.decl.envelopeType() == envelopeTypeByRows {
146+
node, err = r.readByRowsEnvelope()
147+
if err != nil {
148+
return nil, err
149+
}
150+
idr.AddChild(r.root, node)
151+
} else {
152+
node, err = r.readByHeaderFooterEnvelope()
153+
if err != nil {
154+
return nil, err
155+
}
156+
idr.AddChild(r.root, node)
157+
if r.decl.Envelopes[r.envelopeIndex].NotTarget {
158+
// If this by_header_footer envelope isn't target envelope then we consider it
159+
// a global envelope and keep it in the idr tree.
160+
goto readEnvelope
161+
}
162+
}
163+
// now the envelope is the target envelope, let's do a target xpath filtering.
164+
// if it filters out, then we need to remove it from the idr tree.
165+
if r.xpath != nil && !idr.MatchAny(node, r.xpath) {
166+
idr.RemoveAndReleaseTree(node)
167+
goto readEnvelope
168+
}
169+
r.target = node
170+
return node, err
171+
}
172+
138173
func (r *reader) fmtErrStr(format string, args ...interface{}) string {
139174
return fmt.Sprintf("input '%s' line %d: %s", r.inputName, r.line, fmt.Sprintf(format, args...))
140175
}

extensions/omniv21/fileformat/fixedlength/reader_test.go

Lines changed: 145 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"strings"
88
"testing"
99

10+
"github.com/antchfx/xpath"
11+
"github.com/jf-tech/go-corelib/caches"
1012
"github.com/jf-tech/go-corelib/strs"
1113
"github.com/jf-tech/go-corelib/testlib"
1214
"github.com/stretchr/testify/assert"
@@ -20,17 +22,30 @@ func TestIsErrInvalidEnvelope(t *testing.T) {
2022
assert.False(t, IsErrInvalidEnvelope(errors.New("test")))
2123
}
2224

23-
func testReader(r io.Reader, decl *fileDecl) *reader {
25+
func testReader(tb testing.TB, r io.Reader, decl *fileDecl) *reader {
26+
return testReader2(tb, r, decl, "")
27+
}
28+
29+
func testReader2(tb testing.TB, r io.Reader, decl *fileDecl, xpathStr string) *reader {
2430
return &reader{
2531
inputName: "test",
2632
r: bufio.NewReader(r),
2733
decl: decl,
28-
line: 1,
34+
xpath: func() *xpath.Expr {
35+
if xpathStr == "" {
36+
return nil
37+
}
38+
xpathExpr, err := caches.GetXPathExpr(xpathStr)
39+
assert.NoError(tb, err)
40+
return xpathExpr
41+
}(),
42+
root: idr.CreateNode(idr.DocumentNode, "#root"),
43+
line: 1,
2944
}
3045
}
3146

3247
func TestReadLine(t *testing.T) {
33-
r := testReader(strings.NewReader("abc\n\nefg\n \nxyz\n"), nil)
48+
r := testReader(t, strings.NewReader("abc\n\nefg\n \nxyz\n"), nil)
3449
assert.Equal(t, 1, r.line)
3550

3651
line, err := r.readLine()
@@ -58,15 +73,17 @@ func TestReadLine(t *testing.T) {
5873
// io.EOF shouldn't bump up current line number.
5974
line, err = r.readLine()
6075
assert.Equal(t, io.EOF, err)
76+
assert.Nil(t, line)
6177
assert.Equal(t, 6, r.line)
6278

6379
// reading again should still return io.EOF and line number stays.
6480
line, err = r.readLine()
6581
assert.Equal(t, io.EOF, err)
82+
assert.Nil(t, line)
6683
assert.Equal(t, 6, r.line)
6784

6885
// Another scenario that io.Reader fails
69-
r = testReader(testlib.NewMockReadCloser("read error", nil), nil)
86+
r = testReader(t, testlib.NewMockReadCloser("read error", nil), nil)
7087
assert.Equal(t, 1, r.line)
7188
line, err = r.readLine()
7289
assert.Error(t, err)
@@ -77,7 +94,7 @@ func TestReadLine(t *testing.T) {
7794

7895
func TestReadByRowsEnvelope_ByRowsDefault(t *testing.T) {
7996
// default by_rows = 1
80-
r := testReader(strings.NewReader("abc\n\nefghijklmn\n \nxyz\n"),
97+
r := testReader(t, strings.NewReader("abc\n\nefghijklmn\n \nxyz\n"),
8198
&fileDecl{Envelopes: []*envelopeDecl{{
8299
Name: strs.StrPtr("env1"),
83100
Columns: []*columnDecl{
@@ -115,7 +132,7 @@ func TestReadByRowsEnvelope_ByRowsDefault(t *testing.T) {
115132
}
116133

117134
func TestReadByRowsEnvelope_ByRowsNonDefault(t *testing.T) {
118-
r := testReader(strings.NewReader("abcdefg\n\nhijklmn\n \nabc012345\n"),
135+
r := testReader(t, strings.NewReader("abcdefg\n\nhijklmn\n \nabc012345\n"),
119136
&fileDecl{Envelopes: []*envelopeDecl{{
120137
Name: strs.StrPtr("env1"),
121138
ByRows: testlib.IntPtr(3),
@@ -158,7 +175,7 @@ var (
158175
// BenchmarkReadByRowsEnvelope-8 624 1891740 ns/op 133140 B/op 9005 allocs/op
159176
func BenchmarkReadByRowsEnvelope(b *testing.B) {
160177
for i := 0; i < b.N; i++ {
161-
r := testReader(strings.NewReader(benchReadByRowsEnvelopeInput), benchReadByRowsEnvelopeDecl)
178+
r := testReader(b, strings.NewReader(benchReadByRowsEnvelopeInput), benchReadByRowsEnvelopeDecl)
162179
for {
163180
n, err := r.readByRowsEnvelope()
164181
if err != nil {
@@ -173,14 +190,14 @@ func BenchmarkReadByRowsEnvelope(b *testing.B) {
173190
}
174191

175192
func TestReadByHeaderFooterEnvelope_EOFBeforeStart(t *testing.T) {
176-
r := testReader(strings.NewReader(""), &fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
193+
r := testReader(t, strings.NewReader(""), &fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
177194
n, err := r.readByHeaderFooterEnvelope()
178195
assert.Equal(t, io.EOF, err)
179196
assert.Nil(t, n)
180197
}
181198

182199
func TestReadByHeaderFooterEnvelope_ReadErrorBeforeStart(t *testing.T) {
183-
r := testReader(
200+
r := testReader(t,
184201
testlib.NewMockReadCloser("read error", nil),
185202
&fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
186203
n, err := r.readByHeaderFooterEnvelope()
@@ -191,7 +208,7 @@ func TestReadByHeaderFooterEnvelope_ReadErrorBeforeStart(t *testing.T) {
191208
}
192209

193210
func TestReadByHeaderFooterEnvelope_NoEnvelopeMatch(t *testing.T) {
194-
r := testReader(
211+
r := testReader(t,
195212
strings.NewReader("efg"),
196213
&fileDecl{Envelopes: []*envelopeDecl{{
197214
Name: strs.StrPtr("env1"),
@@ -203,7 +220,7 @@ func TestReadByHeaderFooterEnvelope_NoEnvelopeMatch(t *testing.T) {
203220
}
204221

205222
func TestReadByHeaderFooterEnvelope_IncompleteEnvelope(t *testing.T) {
206-
r := testReader(
223+
r := testReader(t,
207224
strings.NewReader("abc"),
208225
&fileDecl{Envelopes: []*envelopeDecl{{
209226
Name: strs.StrPtr("env1"),
@@ -218,8 +235,8 @@ func TestReadByHeaderFooterEnvelope_IncompleteEnvelope(t *testing.T) {
218235

219236
func lf(s string) string { return s + "\n" }
220237

221-
func TestReadByHeaderFooterEnvelope_Success1(t *testing.T) {
222-
r := testReader(
238+
func TestReadByHeaderFooterEnvelope_Success(t *testing.T) {
239+
r := testReader(t,
223240
strings.NewReader(
224241
lf("begin")+
225242
lf("header-01")+
@@ -311,7 +328,7 @@ var (
311328
// BenchmarkReadByHeaderFooterEnvelope-8 310 3819649 ns/op 213840 B/op 14009 allocs/op
312329
func BenchmarkReadByHeaderFooterEnvelope(b *testing.B) {
313330
for i := 0; i < b.N; i++ {
314-
r := testReader(strings.NewReader(benchReadByHeaderFooterEnvelopeInput), benchReadByHeaderFooterEnvelopeDecl)
331+
r := testReader(b, strings.NewReader(benchReadByHeaderFooterEnvelopeInput), benchReadByHeaderFooterEnvelopeDecl)
315332
for {
316333
n, err := r.readByHeaderFooterEnvelope()
317334
if err != nil {
@@ -324,3 +341,117 @@ func BenchmarkReadByHeaderFooterEnvelope(b *testing.B) {
324341
}
325342
}
326343
}
344+
345+
func TestRead_ByRows(t *testing.T) {
346+
r := testReader2(t,
347+
strings.NewReader(
348+
// data block 1
349+
lf("a001-abc")+
350+
lf("a002-def")+
351+
lf("a003-ghi")+
352+
// data block 2
353+
lf("a001-!@#")+
354+
lf("a002-$%^")+
355+
lf("a003-&*(")+
356+
// data block 3
357+
lf("a001-012")+
358+
lf("a002-345")+
359+
lf("a003-678")),
360+
&fileDecl{Envelopes: []*envelopeDecl{
361+
{
362+
Name: strs.StrPtr("data"),
363+
ByRows: testlib.IntPtr(3),
364+
Columns: []*columnDecl{
365+
{Name: "a001_first2chars", StartPos: 6, Length: 2, LinePattern: strs.StrPtr("^a001")},
366+
{Name: "a003_last2chars", StartPos: 7, Length: 2, LinePattern: strs.StrPtr("^a003")},
367+
{Name: "a001_last1char", StartPos: 8, Length: 1, LinePattern: strs.StrPtr("^a001")},
368+
},
369+
},
370+
}},
371+
".[not(contains(a001_first2chars, '!'))]")
372+
n, err := r.Read()
373+
assert.NoError(t, err)
374+
assert.Equal(t,
375+
`{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}`, idr.JSONify2(n))
376+
assert.Equal(t,
377+
`{"data":{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}}`, idr.JSONify2(r.root))
378+
379+
n, err = r.Read()
380+
assert.NoError(t, err)
381+
assert.Equal(t,
382+
`{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}`, idr.JSONify2(n))
383+
assert.Equal(t,
384+
`{"data":{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}}`, idr.JSONify2(r.root))
385+
386+
n, err = r.Read()
387+
assert.Equal(t, io.EOF, err)
388+
assert.Nil(t, n)
389+
}
390+
391+
func TestRead_ByHeaderFooter(t *testing.T) {
392+
r := testReader2(t,
393+
strings.NewReader(
394+
// global header
395+
lf("begin")+
396+
// data block 1
397+
lf("header-01")+
398+
lf("a001-abc")+
399+
lf("a002-def")+
400+
lf("a003-ghi")+
401+
lf("footer")+
402+
// data block 2
403+
lf("header-02")+
404+
lf("a001-!@#")+
405+
lf("a002-$%^")+
406+
lf("a003-&*(")+
407+
lf("footer")+
408+
// data block 3
409+
lf("header-03")+
410+
lf("a001-012")+
411+
lf("a002-345")+
412+
lf("a003-678")+
413+
lf("footer")+
414+
// global footer
415+
lf("end")),
416+
&fileDecl{Envelopes: []*envelopeDecl{
417+
{
418+
Name: strs.StrPtr("begin"),
419+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^begin", Footer: "^begin"},
420+
NotTarget: true,
421+
},
422+
{
423+
Name: strs.StrPtr("data"),
424+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^header", Footer: "^footer"},
425+
Columns: []*columnDecl{
426+
{Name: "a001_first2chars", StartPos: 6, Length: 2, LinePattern: strs.StrPtr("^a001")},
427+
{Name: "a003_last2chars", StartPos: 7, Length: 2, LinePattern: strs.StrPtr("^a003")},
428+
{Name: "a001_last1char", StartPos: 8, Length: 1, LinePattern: strs.StrPtr("^a001")},
429+
},
430+
},
431+
{
432+
Name: strs.StrPtr("end"),
433+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^end", Footer: "^end"},
434+
NotTarget: true,
435+
},
436+
}},
437+
".[not(contains(a001_first2chars, '!'))]")
438+
n, err := r.Read()
439+
assert.NoError(t, err)
440+
assert.Equal(t,
441+
`{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}`, idr.JSONify2(n))
442+
assert.Equal(t,
443+
`{"begin":{},"data":{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi"}}`,
444+
idr.JSONify2(r.root))
445+
446+
n, err = r.Read()
447+
assert.NoError(t, err)
448+
assert.Equal(t,
449+
`{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}`, idr.JSONify2(n))
450+
assert.Equal(t,
451+
`{"begin":{},"data":{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78"}}`,
452+
idr.JSONify2(r.root))
453+
454+
n, err = r.Read()
455+
assert.Equal(t, io.EOF, err)
456+
assert.Nil(t, n)
457+
}

0 commit comments

Comments
 (0)