Skip to content

Commit bf59862

Browse files
authored
add reader.readByHeaderFooterEnvelope (#116)
1 parent 1fd2460 commit bf59862

File tree

2 files changed

+206
-5
lines changed

2 files changed

+206
-5
lines changed

extensions/omniv21/fileformat/fixedlength/reader.go

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@ import (
66
"io"
77

88
"github.com/antchfx/xpath"
9+
"github.com/jf-tech/go-corelib/caches"
910
"github.com/jf-tech/go-corelib/ios"
1011

1112
"github.com/jf-tech/omniparser/idr"
1213
)
1314

15+
// ErrInvalidEnvelope indicates a fixed-length input envelope is invalid. This is a fatal, non-continuable error.
1416
type ErrInvalidEnvelope string
1517

1618
func (e ErrInvalidEnvelope) Error() string { return string(e) }
1719

20+
// IsErrInvalidEnvelope checks if an error is of ErrInvalidEnvelope type.
1821
func IsErrInvalidEnvelope(err error) bool {
1922
switch err.(type) {
2023
case ErrInvalidEnvelope:
@@ -42,10 +45,7 @@ func (r *reader) readLine() ([]byte, error) {
4245
switch err {
4346
case nil:
4447
r.line++
45-
case io.EOF:
46-
return nil, err
4748
default:
48-
r.line++
4949
return nil, err
5050
}
5151
// skip only truly empty lines.
@@ -87,6 +87,54 @@ func (r *reader) readByRowsEnvelope() (*idr.Node, error) {
8787
return node, nil
8888
}
8989

90+
func (r *reader) readByHeaderFooterEnvelope() (*idr.Node, error) {
91+
line, err := r.readLine()
92+
if err != nil {
93+
if err == io.EOF {
94+
return nil, err
95+
}
96+
return nil, ErrInvalidEnvelope(r.fmtErrStr("incomplete envelope: %s", err.Error()))
97+
}
98+
for ; r.envelopeIndex < len(r.decl.Envelopes); r.envelopeIndex++ {
99+
// regex's are already validated
100+
headerRegex, _ := caches.GetRegex(r.decl.Envelopes[r.envelopeIndex].ByHeaderFooter.Header)
101+
if headerRegex.Match(line) {
102+
break
103+
}
104+
}
105+
if r.envelopeIndex >= len(r.decl.Envelopes) {
106+
return nil, io.EOF
107+
}
108+
envelopeDecl := r.decl.Envelopes[r.envelopeIndex]
109+
footerRegex, _ := caches.GetRegex(envelopeDecl.ByHeaderFooter.Footer)
110+
node := idr.CreateNode(idr.ElementNode, *envelopeDecl.Name)
111+
columnsDone := make([]bool, len(envelopeDecl.Columns))
112+
for {
113+
for col := range envelopeDecl.Columns {
114+
if columnsDone[col] {
115+
continue
116+
}
117+
colDecl := envelopeDecl.Columns[col]
118+
if !colDecl.lineMatch(line) {
119+
continue
120+
}
121+
colNode := idr.CreateNode(idr.ElementNode, colDecl.Name)
122+
idr.AddChild(node, colNode)
123+
colVal := idr.CreateNode(idr.TextNode, colDecl.lineToColumnValue(line))
124+
idr.AddChild(colNode, colVal)
125+
columnsDone[col] = true
126+
}
127+
if footerRegex.Match(line) {
128+
return node, nil
129+
}
130+
line, err = r.readLine()
131+
// Since the envelope has started, any reading error, including EOF, indicates incomplete envelope error.
132+
if err != nil {
133+
return nil, ErrInvalidEnvelope(r.fmtErrStr("incomplete envelope: %s", err.Error()))
134+
}
135+
}
136+
}
137+
90138
func (r *reader) fmtErrStr(format string, args ...interface{}) string {
91139
return fmt.Sprintf("input '%s' line %d: %s", r.inputName, r.line, fmt.Sprintf(format, args...))
92140
}

extensions/omniv21/fileformat/fixedlength/reader_test.go

Lines changed: 155 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ func TestReadLine(t *testing.T) {
7272
assert.Error(t, err)
7373
assert.Equal(t, "read error", err.Error())
7474
assert.Nil(t, line)
75-
// reading error (unless it's EOF) bumps current line number
76-
assert.Equal(t, 2, r.line)
75+
assert.Equal(t, 1, r.line)
7776
}
7877

7978
func TestReadByRowsEnvelope_ByRowsDefault(t *testing.T) {
@@ -133,6 +132,7 @@ func TestReadByRowsEnvelope_ByRowsNonDefault(t *testing.T) {
133132

134133
n, err = r.readByRowsEnvelope()
135134
assert.Error(t, err)
135+
assert.True(t, IsErrInvalidEnvelope(err))
136136
assert.Equal(t, "input 'test' line 6: incomplete envelope, missing 2 row(s)", err.Error())
137137
assert.Nil(t, n)
138138
}
@@ -171,3 +171,156 @@ func BenchmarkReadByRowsEnvelope(b *testing.B) {
171171
}
172172
}
173173
}
174+
175+
func TestReadByHeaderFooterEnvelope_EOFBeforeStart(t *testing.T) {
176+
r := testReader(strings.NewReader(""), &fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
177+
n, err := r.readByHeaderFooterEnvelope()
178+
assert.Equal(t, io.EOF, err)
179+
assert.Nil(t, n)
180+
}
181+
182+
func TestReadByHeaderFooterEnvelope_ReadErrorBeforeStart(t *testing.T) {
183+
r := testReader(
184+
testlib.NewMockReadCloser("read error", nil),
185+
&fileDecl{Envelopes: []*envelopeDecl{{Name: strs.StrPtr("env1")}}})
186+
n, err := r.readByHeaderFooterEnvelope()
187+
assert.Error(t, err)
188+
assert.True(t, IsErrInvalidEnvelope(err))
189+
assert.Equal(t, `input 'test' line 1: incomplete envelope: read error`, err.Error())
190+
assert.Nil(t, n)
191+
}
192+
193+
func TestReadByHeaderFooterEnvelope_NoEnvelopeMatch(t *testing.T) {
194+
r := testReader(
195+
strings.NewReader("efg"),
196+
&fileDecl{Envelopes: []*envelopeDecl{{
197+
Name: strs.StrPtr("env1"),
198+
ByHeaderFooter: &byHeaderFooterDecl{Header: "abc", Footer: "abc"},
199+
}}})
200+
n, err := r.readByHeaderFooterEnvelope()
201+
assert.Equal(t, io.EOF, err)
202+
assert.Nil(t, n)
203+
}
204+
205+
func TestReadByHeaderFooterEnvelope_IncompleteEnvelope(t *testing.T) {
206+
r := testReader(
207+
strings.NewReader("abc"),
208+
&fileDecl{Envelopes: []*envelopeDecl{{
209+
Name: strs.StrPtr("env1"),
210+
ByHeaderFooter: &byHeaderFooterDecl{Header: "abc", Footer: "efg"},
211+
}}})
212+
n, err := r.readByHeaderFooterEnvelope()
213+
assert.Error(t, err)
214+
assert.True(t, IsErrInvalidEnvelope(err))
215+
assert.Equal(t, `input 'test' line 2: incomplete envelope: EOF`, err.Error())
216+
assert.Nil(t, n)
217+
}
218+
219+
func lf(s string) string { return s + "\n" }
220+
221+
func TestReadByHeaderFooterEnvelope_Success1(t *testing.T) {
222+
r := testReader(
223+
strings.NewReader(
224+
lf("begin")+
225+
lf("header-01")+
226+
lf("a001-abc")+
227+
lf("a002-def")+
228+
lf("a003-ghi")+
229+
lf("footer")+
230+
lf("header-02")+
231+
lf("a001-012")+
232+
lf("a002-345")+
233+
lf("a003-678")+
234+
lf("footer")+
235+
lf("end")),
236+
&fileDecl{Envelopes: []*envelopeDecl{
237+
{
238+
Name: strs.StrPtr("begin"),
239+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^begin", Footer: "^begin"},
240+
},
241+
{
242+
Name: strs.StrPtr("data"),
243+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^header", Footer: "^footer"},
244+
Columns: []*columnDecl{
245+
{Name: "data_id", StartPos: 8, Length: 2, LinePattern: strs.StrPtr("^header")},
246+
{Name: "a001_first2chars", StartPos: 6, Length: 2, LinePattern: strs.StrPtr("^a001")},
247+
{Name: "a003_last2chars", StartPos: 7, Length: 2, LinePattern: strs.StrPtr("^a003")},
248+
{Name: "a001_last1char", StartPos: 8, Length: 1, LinePattern: strs.StrPtr("^a001")},
249+
},
250+
},
251+
{
252+
Name: strs.StrPtr("end"),
253+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^end", Footer: "^end"},
254+
},
255+
}})
256+
n, err := r.readByHeaderFooterEnvelope()
257+
assert.NoError(t, err)
258+
assert.Equal(t, "begin", n.Data)
259+
260+
n, err = r.readByHeaderFooterEnvelope()
261+
assert.NoError(t, err)
262+
assert.Equal(t,
263+
`{"a001_first2chars":"ab","a001_last1char":"c","a003_last2chars":"hi","data_id":"01"}`, idr.JSONify2(n))
264+
265+
n, err = r.readByHeaderFooterEnvelope()
266+
assert.NoError(t, err)
267+
assert.Equal(t,
268+
`{"a001_first2chars":"01","a001_last1char":"2","a003_last2chars":"78","data_id":"02"}`, idr.JSONify2(n))
269+
270+
n, err = r.readByHeaderFooterEnvelope()
271+
assert.NoError(t, err)
272+
assert.Equal(t, "end", n.Data)
273+
274+
n, err = r.readByHeaderFooterEnvelope()
275+
assert.Equal(t, io.EOF, err)
276+
assert.Nil(t, n)
277+
}
278+
279+
var (
280+
benchReadByHeaderFooterEnvelopeInput = lf("begin") +
281+
strings.Repeat(
282+
lf("header")+
283+
lf("a001-abcdefghijklmnopqrstuvwxyz0123456789")+
284+
lf("a002-abcdefghijklmnopqrstuvwxyz0123456789")+
285+
lf("a003-abcdefghijklmnopqrstuvwxyz0123456789")+
286+
lf("footer"), 1000) +
287+
lf("end")
288+
benchReadByHeaderFooterEnvelopeDecl = &fileDecl{
289+
Envelopes: []*envelopeDecl{
290+
{
291+
Name: strs.StrPtr("begin"),
292+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^begin", Footer: "^begin"},
293+
},
294+
{
295+
Name: strs.StrPtr("data"),
296+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^header", Footer: "^footer"},
297+
Columns: []*columnDecl{
298+
{Name: "a001_1", StartPos: 6, Length: 12, LinePattern: strs.StrPtr("^a001")},
299+
{Name: "a003_1", StartPos: 7, Length: 9, LinePattern: strs.StrPtr("^a003")},
300+
{Name: "a001_2", StartPos: 30, Length: 20, LinePattern: strs.StrPtr("^a001")},
301+
},
302+
},
303+
{
304+
Name: strs.StrPtr("end"),
305+
ByHeaderFooter: &byHeaderFooterDecl{Header: "^end", Footer: "^end"},
306+
},
307+
},
308+
}
309+
)
310+
311+
// BenchmarkReadByHeaderFooterEnvelope-8 310 3819649 ns/op 213840 B/op 14009 allocs/op
312+
func BenchmarkReadByHeaderFooterEnvelope(b *testing.B) {
313+
for i := 0; i < b.N; i++ {
314+
r := testReader(strings.NewReader(benchReadByHeaderFooterEnvelopeInput), benchReadByHeaderFooterEnvelopeDecl)
315+
for {
316+
n, err := r.readByHeaderFooterEnvelope()
317+
if err != nil {
318+
if err == io.EOF {
319+
break
320+
}
321+
b.FailNow()
322+
}
323+
idr.RemoveAndReleaseTree(n)
324+
}
325+
}
326+
}

0 commit comments

Comments
 (0)