|
| 1 | +package csv |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "regexp" |
| 6 | + |
| 7 | + "github.com/jf-tech/go-corelib/maths" |
| 8 | + |
| 9 | + "github.com/jf-tech/omniparser/extensions/omniv21/fileformat/flatfile" |
| 10 | +) |
| 11 | + |
| 12 | +// ColumnDecl describes a column of an csv record column. |
| 13 | +type ColumnDecl struct { |
| 14 | + Name string `json:"name,omitempty"` |
| 15 | +} |
| 16 | + |
| 17 | +// Design note: given currently ColumnDecl contains only Name field, we could've simply |
| 18 | +// change RecordDecl.Columns into a []string. But in the future, if we ever need to add |
| 19 | +// anything to a column decl, we'd have to introduce a breaking schema change. |
| 20 | + |
| 21 | +const ( |
| 22 | + typeRecord = "record" |
| 23 | + typeGroup = "record_group" |
| 24 | +) |
| 25 | + |
| 26 | +// RecordDecl describes an record of a csv/delimited input. |
| 27 | +// If Rows/Header/Footer are all nil, then it defaults to Rows = 1. |
| 28 | +// If Rows specified, then Header/Footer must be nil. (JSON schema validation will ensure this.) |
| 29 | +// If Header is specified, Rows must be nil. (JSON schema validation will ensure this.) |
| 30 | +// Footer is optional; If not specified, Header will be used for a single-line record matching. |
| 31 | +type RecordDecl struct { |
| 32 | + Name string `json:"name,omitempty"` |
| 33 | + Rows *int `json:"rows,omitempty"` |
| 34 | + Header *string `json:"header,omitempty"` |
| 35 | + Footer *string `json:"footer,omitempty"` |
| 36 | + Type *string `json:"type,omitempty"` |
| 37 | + IsTarget bool `json:"is_target,omitempty"` |
| 38 | + Min *int `json:"min,omitempty"` |
| 39 | + Max *int `json:"max,omitempty"` |
| 40 | + Columns []*ColumnDecl `json:"columns,omitempty"` |
| 41 | + Children []*RecordDecl `json:"child_records,omitempty"` |
| 42 | + |
| 43 | + fqdn string // fully hierarchical name to the record. |
| 44 | + childRecDecls []flatfile.RecDecl |
| 45 | + headerRegexp *regexp.Regexp |
| 46 | + footerRegexp *regexp.Regexp |
| 47 | +} |
| 48 | + |
| 49 | +func (r *RecordDecl) DeclName() string { |
| 50 | + return r.Name |
| 51 | +} |
| 52 | + |
| 53 | +func (r *RecordDecl) Target() bool { |
| 54 | + return r.IsTarget |
| 55 | +} |
| 56 | + |
| 57 | +func (r *RecordDecl) Group() bool { |
| 58 | + return r.Type != nil && *r.Type == typeGroup |
| 59 | +} |
| 60 | + |
| 61 | +// MinOccurs defaults to 0. CSV/delimited input most common scenario is min=0/max=unbounded. |
| 62 | +func (r *RecordDecl) MinOccurs() int { |
| 63 | + switch r.Min { |
| 64 | + case nil: |
| 65 | + return 0 |
| 66 | + default: |
| 67 | + return *r.Min |
| 68 | + } |
| 69 | +} |
| 70 | + |
| 71 | +// MaxOccurs defaults to unbounded. CSV/delimited input most common scenario is min=0/max=unbounded. |
| 72 | +func (r *RecordDecl) MaxOccurs() int { |
| 73 | + switch { |
| 74 | + case r.Max == nil: |
| 75 | + fallthrough |
| 76 | + case *r.Max < 0: |
| 77 | + return maths.MaxIntValue |
| 78 | + default: |
| 79 | + return *r.Max |
| 80 | + } |
| 81 | +} |
| 82 | + |
| 83 | +func (r *RecordDecl) ChildDecls() []flatfile.RecDecl { |
| 84 | + return r.childRecDecls |
| 85 | +} |
| 86 | + |
| 87 | +func (r *RecordDecl) rowsBased() bool { |
| 88 | + if r.Group() { |
| 89 | + panic("record_group is neither rows based nor header/footer based") |
| 90 | + } |
| 91 | + // for header/footer based record, header must be specified; otherwise, it's rows based. |
| 92 | + return r.Header == nil |
| 93 | +} |
| 94 | + |
| 95 | +// rows() defaults to 1. csv/delimited most common scenario is rows-based single line record. |
| 96 | +func (r *RecordDecl) rows() int { |
| 97 | + if !r.rowsBased() { |
| 98 | + panic(fmt.Sprintf("record '%s' is not rows based", r.fqdn)) |
| 99 | + } |
| 100 | + if r.Rows == nil { |
| 101 | + return 1 |
| 102 | + } |
| 103 | + return *r.Rows |
| 104 | +} |
| 105 | + |
| 106 | +func (r *RecordDecl) matchHeader(line []byte) bool { |
| 107 | + if r.headerRegexp == nil { |
| 108 | + panic(fmt.Sprintf("record '%s' is not header/footer based", r.fqdn)) |
| 109 | + } |
| 110 | + return r.headerRegexp.Match(line) |
| 111 | +} |
| 112 | + |
| 113 | +// Footer is optional. If not specified, it always matches. Thus for a header/footer record, |
| 114 | +// if the footer isn't specified, it effectively becomes a single-row record matched by header, |
| 115 | +// given that after the header matches a line, matchFooter is called on the same line. |
| 116 | +func (r *RecordDecl) matchFooter(line []byte) bool { |
| 117 | + if r.footerRegexp == nil { |
| 118 | + return true |
| 119 | + } |
| 120 | + return r.footerRegexp.Match(line) |
| 121 | +} |
| 122 | + |
| 123 | +func toFlatFileRecDecls(rs []*RecordDecl) []flatfile.RecDecl { |
| 124 | + if len(rs) == 0 { |
| 125 | + return nil |
| 126 | + } |
| 127 | + ret := make([]flatfile.RecDecl, len(rs)) |
| 128 | + for i, r := range rs { |
| 129 | + ret[i] = r |
| 130 | + } |
| 131 | + return ret |
| 132 | +} |
| 133 | + |
| 134 | +// FileDecl describes csv/delimited schema `file_declaration` setting. |
| 135 | +type FileDecl struct { |
| 136 | + Delimiter string `json:"delimiter,omitempty"` |
| 137 | + ReplaceDoubleQuotes bool `json:"replace_double_quotes,omitempty"` |
| 138 | + Records []*RecordDecl `json:"records,omitempty"` |
| 139 | +} |
0 commit comments