diff --git a/fields.go b/fields.go index f16dafc..fae8fb5 100644 --- a/fields.go +++ b/fields.go @@ -92,7 +92,7 @@ func validateFieldsV0(publiccode PublicCode, parser *Parser, network bool, baseU // to use uppercase on an invalid country. if publiccodev0.IntendedAudience.Countries != nil { for i, c := range *publiccodev0.IntendedAudience.Countries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.countries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), @@ -104,7 +104,7 @@ func validateFieldsV0(publiccode PublicCode, parser *Parser, network bool, baseU if publiccodev0.IntendedAudience.UnsupportedCountries != nil { for i, c := range *publiccodev0.IntendedAudience.UnsupportedCountries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.unsupportedCountries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), @@ -242,7 +242,7 @@ func validateFieldsV0(publiccode PublicCode, parser *Parser, network bool, baseU } if it.Riuso.CodiceIPA != "" { - if sharedValidate.Var(it.Riuso.CodiceIPA, "is_italian_ipa_code") == nil { + if parser.validate.Var(it.Riuso.CodiceIPA, "is_italian_ipa_code") == nil { vr = append(vr, ValidationWarning{ "IT.riuso.codiceIPA", fmt.Sprintf( @@ -324,7 +324,7 @@ func validateFieldsV1(publiccode PublicCode, parser *Parser, network bool, baseU // to use uppercase on an invalid country. if publiccodev1.IntendedAudience.Countries != nil { for i, c := range *publiccodev1.IntendedAudience.Countries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.countries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), @@ -336,7 +336,7 @@ func validateFieldsV1(publiccode PublicCode, parser *Parser, network bool, baseU if publiccodev1.IntendedAudience.UnsupportedCountries != nil { for i, c := range *publiccodev1.IntendedAudience.UnsupportedCountries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.unsupportedCountries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), diff --git a/parser.go b/parser.go index 1a48773..abe48f3 100644 --- a/parser.go +++ b/parser.go @@ -1,6 +1,7 @@ package publiccode import ( + "bufio" "bytes" "context" "errors" @@ -30,21 +31,37 @@ import ( publiccodeValidator "github.com/italia/publiccode-parser-go/v5/validators" ) -// Build Validator and Translator once at package init. -var ( - sharedValidate *validator.Validate - sharedTrans ut.Translator -) +// fetchIPACodes downloads the IPA codes list from the given URL and returns it +// as a set. The format is expected to match the Agid export: one code per line. +func fetchIPACodes(client *http.Client, rawURL string) (map[string]struct{}, error) { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, rawURL, nil) + if err != nil { + return nil, fmt.Errorf("building IPA codes request for %q: %w", rawURL, err) + } -func init() { - sharedValidate = publiccodeValidator.New() + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching IPA codes from %q: %w", rawURL, err) + } - enLocale := en.New() - uni := ut.New(enLocale, enLocale) + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("fetching IPA codes from %q: unexpected HTTP status %d", rawURL, resp.StatusCode) //nolint:err113,lll // dynamic status code + } + + codes := make(map[string]struct{}, 24000) + scanner := bufio.NewScanner(resp.Body) + + for scanner.Scan() { + codes[strings.ToLower(scanner.Text())] = struct{}{} + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("reading IPA codes from %q: %w", rawURL, err) + } - sharedTrans, _ = uni.GetTranslator("en") - _ = en_translations.RegisterDefaultTranslations(sharedValidate, sharedTrans) - _ = publiccodeValidator.RegisterLocalErrorMessages(sharedValidate, sharedTrans) + return codes, nil } var reMapKey = regexp.MustCompile(`\[([[:alpha:]]+)\]`) @@ -75,6 +92,15 @@ type ParserConfig struct { // Timeout is the maximum duration for each HTTP request during external checks. // Defaults to 30s if zero. Timeout time.Duration + + // IPACodesURL, if set, causes the parser to fetch a fresh list of Italian + // Public Administration codes from this URL at creation time, instead of + // using the embedded snapshot. The expected format matches the Agid export: + // one code per line (https://www.indicepa.gov.it). + // + // Leave empty (default) to use the embedded snapshot, which is updated + // periodically via the repo's automated workflow. + IPACodesURL string } const defaultHTTPTimeout = 30 * time.Second @@ -88,6 +114,8 @@ type Parser struct { baseURL *url.URL client *http.Client httpclient *httpclient.Client + validate *validator.Validate + trans ut.Translator } // Domain is a single code hosting service. @@ -112,6 +140,25 @@ func NewParser(config ParserConfig) (*Parser, error) { httpClient := &http.Client{Timeout: timeout} vcsurl.Client = httpClient + + ipaCodes := publiccodeValidator.DefaultIPACodes() + + if config.IPACodesURL != "" { + var err error + if ipaCodes, err = fetchIPACodes(httpClient, config.IPACodesURL); err != nil { + return nil, err + } + } + + validate := publiccodeValidator.New(ipaCodes) + + enLocale := en.New() + uni := ut.New(enLocale, enLocale) + + trans, _ := uni.GetTranslator("en") + _ = en_translations.RegisterDefaultTranslations(validate, trans) + _ = publiccodeValidator.RegisterLocalErrorMessages(validate, trans) + p := Parser{ disableNetwork: config.DisableNetwork, disableExternalChecks: config.DisableExternalChecks, @@ -119,6 +166,8 @@ func NewParser(config ParserConfig) (*Parser, error) { branch: config.Branch, client: httpClient, httpclient: httpclient.NewClient(httpClient), + validate: validate, + trans: trans, } if config.BaseURL != "" { @@ -289,7 +338,7 @@ func (p *Parser) parseStream(in io.Reader, fileURL *url.URL) (PublicCode, error) ve = append(ve, decodeResults...) } - err = sharedValidate.Struct(publiccode) + err = p.validate.Struct(publiccode) if err != nil { var validationErrs validator.ValidationErrors if errors.As(err, &validationErrs) { @@ -301,7 +350,7 @@ func (p *Parser) parseStream(in io.Reader, fileURL *url.URL) (PublicCode, error) ve = append(ve, ValidationError{ Key: key, - Description: err.Translate(sharedTrans), + Description: err.Translate(p.trans), Line: line, Column: column, }) diff --git a/parser_extra_test.go b/parser_extra_test.go index 6121961..ed9bf9c 100644 --- a/parser_extra_test.go +++ b/parser_extra_test.go @@ -133,6 +133,50 @@ func TestParseStreamSyntaxError(t *testing.T) { } } +// TestIPACodesURLFetch verifies that WithIPACodesURL fetches and uses the +// provided list, making a code from the served file valid. +func TestIPACodesURLFetch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte("TESTCODE\n")) + })) + defer srv.Close() + + p, err := NewParser(ParserConfig{IPACodesURL: srv.URL}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if err := p.validate.Var("TESTCODE", "is_italian_ipa_code"); err != nil { + t.Errorf("expected TESTCODE to be valid with custom list: %v", err) + } + + if err := p.validate.Var("pcm", "is_italian_ipa_code"); err == nil { + t.Error("expected 'pcm' to be invalid when not in custom list") + } +} + +// TestIPACodesURLFetchError verifies that an unreachable IPACodesURL returns an +// error from NewParser. +func TestIPACodesURLFetchError(t *testing.T) { + _, err := NewParser(ParserConfig{IPACodesURL: "http://127.0.0.1:1/ipa_codes.txt"}) + if err == nil { + t.Fatal("expected error for unreachable IPACodesURL") + } +} + +// TestIPACodesDefaultEmbedded verifies that the embedded list is used when +// IPACodesURL is empty. +func TestIPACodesDefaultEmbedded(t *testing.T) { + p, err := NewParser(ParserConfig{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if err := p.validate.Var("pcm", "is_italian_ipa_code"); err != nil { + t.Errorf("expected 'pcm' to be valid with embedded list: %v", err) + } +} + func TestParseStreamReaderError(t *testing.T) { p, _ := NewParser(ParserConfig{DisableNetwork: true}) _, err := p.ParseStream(errReader{}) diff --git a/validators/common.go b/validators/common.go index 3af7284..9836f2d 100644 --- a/validators/common.go +++ b/validators/common.go @@ -80,37 +80,42 @@ func isURL(fl validator.FieldLevel) bool { panic(fmt.Sprintf("Bad field type for %T. Must implement fmt.Stringer", fl.Field().Interface())) } -func isOrganisationURI(fl validator.FieldLevel) bool { - field := fl.Field().String() +// MakeIsOrganisationURI returns a validator.Func that validates an organisation URI, +// including Italian PA URNs (urn:x-italian-pa:), using the provided +// IPA codes set. The inner validator is built once and captured in the closure. +func MakeIsOrganisationURI(codes map[string]struct{}) validator.Func { + inner := validator.New(validator.WithRequiredStructEnabled()) + _ = inner.RegisterValidation("is_italian_ipa_code", MakeIsItalianIpaCode(codes)) - u, err := url.ParseRequestURI(field) - if err != nil { - return false - } + return func(fl validator.FieldLevel) bool { + field := fl.Field().String() - // Validate URNs as well - if strings.EqualFold(u.Scheme, "urn") { - err := sharedValidator.Var(field, "urn_rfc2141") + u, err := url.ParseRequestURI(field) if err != nil { return false } - if strings.HasPrefix(strings.ToLower(u.Opaque), "x-italian-pa:") { - ipa := u.Opaque[len("x-italian-pa:"):] + // Validate URNs as well + if strings.EqualFold(u.Scheme, "urn") { + if err := inner.Var(field, "urn_rfc2141"); err != nil { + return false + } + + if strings.HasPrefix(strings.ToLower(u.Opaque), "x-italian-pa:") { + ipa := u.Opaque[len("x-italian-pa:"):] - _, ok := ipaCodes[strings.ToLower(ipa)] + return inner.Var(ipa, "is_italian_ipa_code") == nil + } - return ok + return true } - return true - } + if u.Scheme == "" || u.Host == "" { + return false + } - if u.Scheme == "" || u.Host == "" { - return false + return true } - - return true } // Custom validator to work around https://github.com/go-playground/validator/issues/1260 diff --git a/validators/common_test.go b/validators/common_test.go index d5cee9a..774e83a 100644 --- a/validators/common_test.go +++ b/validators/common_test.go @@ -9,7 +9,7 @@ import ( ) func TestBCP47KeysValidMap(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { M map[string]string `validate:"bcp47_keys"` @@ -22,7 +22,7 @@ func TestBCP47KeysValidMap(t *testing.T) { } func TestBCP47KeysInvalidMap(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { M map[string]string `validate:"bcp47_keys"` @@ -35,7 +35,7 @@ func TestBCP47KeysInvalidMap(t *testing.T) { } func TestIsHTTPURLValid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_http_url"` @@ -49,7 +49,7 @@ func TestIsHTTPURLValid(t *testing.T) { } func TestIsHTTPURLInvalid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_http_url"` @@ -63,7 +63,7 @@ func TestIsHTTPURLInvalid(t *testing.T) { } func TestIsURLValid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_url"` @@ -77,7 +77,7 @@ func TestIsURLValid(t *testing.T) { } func TestIsURLInvalid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_url"` @@ -139,7 +139,7 @@ func TestIsHTTPURLPanicNonStringer(t *testing.T) { } }() - v := New() + v := New(DefaultIPACodes()) type S struct { U int `validate:"url_http_url"` @@ -155,7 +155,7 @@ func TestIsURLPanicNonStringer(t *testing.T) { } }() - v := New() + v := New(DefaultIPACodes()) type S struct { U int `validate:"url_url"` @@ -171,7 +171,7 @@ func TestBCP47KeysPanicNonMap(t *testing.T) { } }() - v := New() + v := New(DefaultIPACodes()) type S struct { M int `validate:"bcp47_keys"` diff --git a/validators/it.go b/validators/it.go index 4af6d35..1f742e1 100644 --- a/validators/it.go +++ b/validators/it.go @@ -8,22 +8,26 @@ import ( "github.com/italia/publiccode-parser-go/v5/data" ) -// ipaCodes is a set of valid IPA codes, lowercased for case-insensitive lookup. -var ipaCodes map[string]struct{} - -func init() { +// DefaultIPACodes parses the embedded IPA codes list and returns it as a set. +func DefaultIPACodes() map[string]struct{} { scanner := bufio.NewScanner(strings.NewReader(data.ItIpaCodes)) - ipaCodes = make(map[string]struct{}, 24000) + codes := make(map[string]struct{}, 24000) for scanner.Scan() { - ipaCodes[strings.ToLower(scanner.Text())] = struct{}{} + codes[strings.ToLower(scanner.Text())] = struct{}{} } + + return codes } -// isItalianIpaCode returns true if the field is a valid Italian Public Administration Code -// (iPA) from https://github.com/publiccodeyml/italian-organizations-ipa-vocabulary. -func isItalianIpaCode(fl validator.FieldLevel) bool { - _, ok := ipaCodes[strings.ToLower(fl.Field().String())] +// MakeIsItalianIpaCode returns a validator.Func that checks whether a field value +// is a valid Italian Public Administration Code (iPA) from +// https://github.com/publiccodeyml/italian-organizations-ipa-vocabulary, +// using the provided codes set. +func MakeIsItalianIpaCode(codes map[string]struct{}) validator.Func { + return func(fl validator.FieldLevel) bool { + _, ok := codes[strings.ToLower(fl.Field().String())] - return ok + return ok + } } diff --git a/validators/validations_test.go b/validators/validations_test.go index da63d95..651c0e9 100644 --- a/validators/validations_test.go +++ b/validators/validations_test.go @@ -5,7 +5,7 @@ import ( ) func TestIsOrganisationURI(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) tests := []struct { value string diff --git a/validators/validator.go b/validators/validator.go index 01a2577..617c168 100644 --- a/validators/validator.go +++ b/validators/validator.go @@ -9,7 +9,10 @@ import ( "github.com/go-playground/validator/v10" ) -func New() *validator.Validate { +// New returns a configured validator.Validate instance using the provided IPA +// codes set for Italian-specific validations. Use DefaultIPACodes() to get the +// embedded snapshot, or supply a freshly fetched set when using IPACodesURL. +func New(ipaCodes map[string]struct{}) *validator.Validate { validate := validator.New(validator.WithRequiredStructEnabled()) _ = validate.RegisterValidation("is_mime_type", isMIMEType) _ = validate.RegisterValidation("iso3166_1_alpha2_lower_or_upper", isIso3166Alpha2LowerOrUpper) @@ -17,13 +20,13 @@ func New() *validator.Validate { _ = validate.RegisterValidation("umin", uMin) _ = validate.RegisterValidation("url_http_url", isHTTPURL) _ = validate.RegisterValidation("url_url", isURL) - _ = validate.RegisterValidation("organisation_uri", isOrganisationURI) + _ = validate.RegisterValidation("organisation_uri", MakeIsOrganisationURI(ipaCodes)) _ = validate.RegisterValidation("is_spdx_expression", isSPDXExpression) _ = validate.RegisterValidation("is_category_v0", isCategoryV0) _ = validate.RegisterValidation("is_scope_v0", isScopeV0) - _ = validate.RegisterValidation("is_italian_ipa_code", isItalianIpaCode) + _ = validate.RegisterValidation("is_italian_ipa_code", MakeIsItalianIpaCode(ipaCodes)) _ = validate.RegisterValidation("bcp47_keys", bcp47_keys) diff --git a/validators/validator_test.go b/validators/validator_test.go index 799eaf2..2369c3b 100644 --- a/validators/validator_test.go +++ b/validators/validator_test.go @@ -9,14 +9,14 @@ import ( ) func TestNewReturnsNonNil(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) if v == nil { t.Fatal("New() returned nil") } } func TestRegisterLocalErrorMessages(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en") @@ -29,7 +29,7 @@ func TestRegisterLocalErrorMessages(t *testing.T) { } func TestNewTagNameFuncWithDashTag(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) // A struct with yaml:"-" should have the field excluded from validation. type S struct { @@ -47,7 +47,7 @@ func TestNewTagNameFuncWithDashTag(t *testing.T) { func TestRegisterLocalErrorMessagesOrgURIAlreadyRegistered(t *testing.T) { // Pre-register "organisation_uri" so the customRegisFunc for that tag // fails on ut.Add, covering the error-return paths in RegisterLocalErrorMessages. - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en") @@ -65,7 +65,7 @@ func TestRegisterLocalErrorMessagesOrgURIInvalidIPAAlreadyRegistered(t *testing. // Pre-register only organisation_uri_invalid_italian_pa so the first ut.Add // in customRegisFunc succeeds but the second fails, covering the second // error-return path. - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en") @@ -85,7 +85,7 @@ func TestTranslateFuncFallback(t *testing.T) { // and triggering a validation that uses it. // This is an indirect test: just ensure RegisterLocalErrorMessages succeeds // and the validator can validate with translations without panicking. - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en")