From 3b7f0a9e6c44ef0dad04817d984978f8157a8738 Mon Sep 17 00:00:00 2001 From: creed-bratton <74052163+creed-bratton@users.noreply.github.com> Date: Sat, 28 Mar 2026 10:50:12 +0000 Subject: [PATCH] feat: add IPACodesURL option to fetch fresh IPA codes at runtime ParserConfig.IPACodesURL, when set, causes NewParser to download a fresh Italian IPA codes list from the given URL instead of using the embedded snapshot. The format matches the Agid export (one code per line), so the Agid URL can be passed directly. The validator is now per-Parser rather than a package-level global, which also makes concurrent parsers with different IPA code sets safe. As a side effect, MakeIsOrganisationURI now builds its inner validator once per Parser (via closure) instead of on every validation call. The embedded list remains the default; WASM callers omit the option and continue to use it as before. --- fields.go | 10 ++--- parser.go | 77 +++++++++++++++++++++++++++------- parser_extra_test.go | 44 +++++++++++++++++++ validators/common.go | 43 ++++++++++--------- validators/common_test.go | 18 ++++---- validators/it.go | 26 +++++++----- validators/validations_test.go | 2 +- validators/validator.go | 9 ++-- validators/validator_test.go | 12 +++--- 9 files changed, 173 insertions(+), 68 deletions(-) diff --git a/fields.go b/fields.go index f16dafc..fae8fb5 100644 --- a/fields.go +++ b/fields.go @@ -92,7 +92,7 @@ func validateFieldsV0(publiccode PublicCode, parser *Parser, network bool, baseU // to use uppercase on an invalid country. if publiccodev0.IntendedAudience.Countries != nil { for i, c := range *publiccodev0.IntendedAudience.Countries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.countries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), @@ -104,7 +104,7 @@ func validateFieldsV0(publiccode PublicCode, parser *Parser, network bool, baseU if publiccodev0.IntendedAudience.UnsupportedCountries != nil { for i, c := range *publiccodev0.IntendedAudience.UnsupportedCountries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.unsupportedCountries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), @@ -242,7 +242,7 @@ func validateFieldsV0(publiccode PublicCode, parser *Parser, network bool, baseU } if it.Riuso.CodiceIPA != "" { - if sharedValidate.Var(it.Riuso.CodiceIPA, "is_italian_ipa_code") == nil { + if parser.validate.Var(it.Riuso.CodiceIPA, "is_italian_ipa_code") == nil { vr = append(vr, ValidationWarning{ "IT.riuso.codiceIPA", fmt.Sprintf( @@ -324,7 +324,7 @@ func validateFieldsV1(publiccode PublicCode, parser *Parser, network bool, baseU // to use uppercase on an invalid country. if publiccodev1.IntendedAudience.Countries != nil { for i, c := range *publiccodev1.IntendedAudience.Countries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.countries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), @@ -336,7 +336,7 @@ func validateFieldsV1(publiccode PublicCode, parser *Parser, network bool, baseU if publiccodev1.IntendedAudience.UnsupportedCountries != nil { for i, c := range *publiccodev1.IntendedAudience.UnsupportedCountries { - if sharedValidate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { + if parser.validate.Var(c, "iso3166_1_alpha2_lower_or_upper") == nil && c == strings.ToLower(c) { vr = append(vr, ValidationWarning{ fmt.Sprintf("intendedAudience.unsupportedCountries[%d]", i), fmt.Sprintf("Lowercase country codes are DEPRECATED. Use uppercase instead ('%s')", strings.ToUpper(c)), diff --git a/parser.go b/parser.go index 1a48773..abe48f3 100644 --- a/parser.go +++ b/parser.go @@ -1,6 +1,7 @@ package publiccode import ( + "bufio" "bytes" "context" "errors" @@ -30,21 +31,37 @@ import ( publiccodeValidator "github.com/italia/publiccode-parser-go/v5/validators" ) -// Build Validator and Translator once at package init. -var ( - sharedValidate *validator.Validate - sharedTrans ut.Translator -) +// fetchIPACodes downloads the IPA codes list from the given URL and returns it +// as a set. The format is expected to match the Agid export: one code per line. +func fetchIPACodes(client *http.Client, rawURL string) (map[string]struct{}, error) { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, rawURL, nil) + if err != nil { + return nil, fmt.Errorf("building IPA codes request for %q: %w", rawURL, err) + } -func init() { - sharedValidate = publiccodeValidator.New() + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching IPA codes from %q: %w", rawURL, err) + } - enLocale := en.New() - uni := ut.New(enLocale, enLocale) + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("fetching IPA codes from %q: unexpected HTTP status %d", rawURL, resp.StatusCode) //nolint:err113,lll // dynamic status code + } + + codes := make(map[string]struct{}, 24000) + scanner := bufio.NewScanner(resp.Body) + + for scanner.Scan() { + codes[strings.ToLower(scanner.Text())] = struct{}{} + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("reading IPA codes from %q: %w", rawURL, err) + } - sharedTrans, _ = uni.GetTranslator("en") - _ = en_translations.RegisterDefaultTranslations(sharedValidate, sharedTrans) - _ = publiccodeValidator.RegisterLocalErrorMessages(sharedValidate, sharedTrans) + return codes, nil } var reMapKey = regexp.MustCompile(`\[([[:alpha:]]+)\]`) @@ -75,6 +92,15 @@ type ParserConfig struct { // Timeout is the maximum duration for each HTTP request during external checks. // Defaults to 30s if zero. Timeout time.Duration + + // IPACodesURL, if set, causes the parser to fetch a fresh list of Italian + // Public Administration codes from this URL at creation time, instead of + // using the embedded snapshot. The expected format matches the Agid export: + // one code per line (https://www.indicepa.gov.it). + // + // Leave empty (default) to use the embedded snapshot, which is updated + // periodically via the repo's automated workflow. + IPACodesURL string } const defaultHTTPTimeout = 30 * time.Second @@ -88,6 +114,8 @@ type Parser struct { baseURL *url.URL client *http.Client httpclient *httpclient.Client + validate *validator.Validate + trans ut.Translator } // Domain is a single code hosting service. @@ -112,6 +140,25 @@ func NewParser(config ParserConfig) (*Parser, error) { httpClient := &http.Client{Timeout: timeout} vcsurl.Client = httpClient + + ipaCodes := publiccodeValidator.DefaultIPACodes() + + if config.IPACodesURL != "" { + var err error + if ipaCodes, err = fetchIPACodes(httpClient, config.IPACodesURL); err != nil { + return nil, err + } + } + + validate := publiccodeValidator.New(ipaCodes) + + enLocale := en.New() + uni := ut.New(enLocale, enLocale) + + trans, _ := uni.GetTranslator("en") + _ = en_translations.RegisterDefaultTranslations(validate, trans) + _ = publiccodeValidator.RegisterLocalErrorMessages(validate, trans) + p := Parser{ disableNetwork: config.DisableNetwork, disableExternalChecks: config.DisableExternalChecks, @@ -119,6 +166,8 @@ func NewParser(config ParserConfig) (*Parser, error) { branch: config.Branch, client: httpClient, httpclient: httpclient.NewClient(httpClient), + validate: validate, + trans: trans, } if config.BaseURL != "" { @@ -289,7 +338,7 @@ func (p *Parser) parseStream(in io.Reader, fileURL *url.URL) (PublicCode, error) ve = append(ve, decodeResults...) } - err = sharedValidate.Struct(publiccode) + err = p.validate.Struct(publiccode) if err != nil { var validationErrs validator.ValidationErrors if errors.As(err, &validationErrs) { @@ -301,7 +350,7 @@ func (p *Parser) parseStream(in io.Reader, fileURL *url.URL) (PublicCode, error) ve = append(ve, ValidationError{ Key: key, - Description: err.Translate(sharedTrans), + Description: err.Translate(p.trans), Line: line, Column: column, }) diff --git a/parser_extra_test.go b/parser_extra_test.go index 6121961..ed9bf9c 100644 --- a/parser_extra_test.go +++ b/parser_extra_test.go @@ -133,6 +133,50 @@ func TestParseStreamSyntaxError(t *testing.T) { } } +// TestIPACodesURLFetch verifies that WithIPACodesURL fetches and uses the +// provided list, making a code from the served file valid. +func TestIPACodesURLFetch(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte("TESTCODE\n")) + })) + defer srv.Close() + + p, err := NewParser(ParserConfig{IPACodesURL: srv.URL}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if err := p.validate.Var("TESTCODE", "is_italian_ipa_code"); err != nil { + t.Errorf("expected TESTCODE to be valid with custom list: %v", err) + } + + if err := p.validate.Var("pcm", "is_italian_ipa_code"); err == nil { + t.Error("expected 'pcm' to be invalid when not in custom list") + } +} + +// TestIPACodesURLFetchError verifies that an unreachable IPACodesURL returns an +// error from NewParser. +func TestIPACodesURLFetchError(t *testing.T) { + _, err := NewParser(ParserConfig{IPACodesURL: "http://127.0.0.1:1/ipa_codes.txt"}) + if err == nil { + t.Fatal("expected error for unreachable IPACodesURL") + } +} + +// TestIPACodesDefaultEmbedded verifies that the embedded list is used when +// IPACodesURL is empty. +func TestIPACodesDefaultEmbedded(t *testing.T) { + p, err := NewParser(ParserConfig{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if err := p.validate.Var("pcm", "is_italian_ipa_code"); err != nil { + t.Errorf("expected 'pcm' to be valid with embedded list: %v", err) + } +} + func TestParseStreamReaderError(t *testing.T) { p, _ := NewParser(ParserConfig{DisableNetwork: true}) _, err := p.ParseStream(errReader{}) diff --git a/validators/common.go b/validators/common.go index 3af7284..9836f2d 100644 --- a/validators/common.go +++ b/validators/common.go @@ -80,37 +80,42 @@ func isURL(fl validator.FieldLevel) bool { panic(fmt.Sprintf("Bad field type for %T. Must implement fmt.Stringer", fl.Field().Interface())) } -func isOrganisationURI(fl validator.FieldLevel) bool { - field := fl.Field().String() +// MakeIsOrganisationURI returns a validator.Func that validates an organisation URI, +// including Italian PA URNs (urn:x-italian-pa:), using the provided +// IPA codes set. The inner validator is built once and captured in the closure. +func MakeIsOrganisationURI(codes map[string]struct{}) validator.Func { + inner := validator.New(validator.WithRequiredStructEnabled()) + _ = inner.RegisterValidation("is_italian_ipa_code", MakeIsItalianIpaCode(codes)) - u, err := url.ParseRequestURI(field) - if err != nil { - return false - } + return func(fl validator.FieldLevel) bool { + field := fl.Field().String() - // Validate URNs as well - if strings.EqualFold(u.Scheme, "urn") { - err := sharedValidator.Var(field, "urn_rfc2141") + u, err := url.ParseRequestURI(field) if err != nil { return false } - if strings.HasPrefix(strings.ToLower(u.Opaque), "x-italian-pa:") { - ipa := u.Opaque[len("x-italian-pa:"):] + // Validate URNs as well + if strings.EqualFold(u.Scheme, "urn") { + if err := inner.Var(field, "urn_rfc2141"); err != nil { + return false + } + + if strings.HasPrefix(strings.ToLower(u.Opaque), "x-italian-pa:") { + ipa := u.Opaque[len("x-italian-pa:"):] - _, ok := ipaCodes[strings.ToLower(ipa)] + return inner.Var(ipa, "is_italian_ipa_code") == nil + } - return ok + return true } - return true - } + if u.Scheme == "" || u.Host == "" { + return false + } - if u.Scheme == "" || u.Host == "" { - return false + return true } - - return true } // Custom validator to work around https://github.com/go-playground/validator/issues/1260 diff --git a/validators/common_test.go b/validators/common_test.go index d5cee9a..774e83a 100644 --- a/validators/common_test.go +++ b/validators/common_test.go @@ -9,7 +9,7 @@ import ( ) func TestBCP47KeysValidMap(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { M map[string]string `validate:"bcp47_keys"` @@ -22,7 +22,7 @@ func TestBCP47KeysValidMap(t *testing.T) { } func TestBCP47KeysInvalidMap(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { M map[string]string `validate:"bcp47_keys"` @@ -35,7 +35,7 @@ func TestBCP47KeysInvalidMap(t *testing.T) { } func TestIsHTTPURLValid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_http_url"` @@ -49,7 +49,7 @@ func TestIsHTTPURLValid(t *testing.T) { } func TestIsHTTPURLInvalid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_http_url"` @@ -63,7 +63,7 @@ func TestIsHTTPURLInvalid(t *testing.T) { } func TestIsURLValid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_url"` @@ -77,7 +77,7 @@ func TestIsURLValid(t *testing.T) { } func TestIsURLInvalid(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) type S struct { U *testURL `validate:"omitnil,url_url"` @@ -139,7 +139,7 @@ func TestIsHTTPURLPanicNonStringer(t *testing.T) { } }() - v := New() + v := New(DefaultIPACodes()) type S struct { U int `validate:"url_http_url"` @@ -155,7 +155,7 @@ func TestIsURLPanicNonStringer(t *testing.T) { } }() - v := New() + v := New(DefaultIPACodes()) type S struct { U int `validate:"url_url"` @@ -171,7 +171,7 @@ func TestBCP47KeysPanicNonMap(t *testing.T) { } }() - v := New() + v := New(DefaultIPACodes()) type S struct { M int `validate:"bcp47_keys"` diff --git a/validators/it.go b/validators/it.go index 4af6d35..1f742e1 100644 --- a/validators/it.go +++ b/validators/it.go @@ -8,22 +8,26 @@ import ( "github.com/italia/publiccode-parser-go/v5/data" ) -// ipaCodes is a set of valid IPA codes, lowercased for case-insensitive lookup. -var ipaCodes map[string]struct{} - -func init() { +// DefaultIPACodes parses the embedded IPA codes list and returns it as a set. +func DefaultIPACodes() map[string]struct{} { scanner := bufio.NewScanner(strings.NewReader(data.ItIpaCodes)) - ipaCodes = make(map[string]struct{}, 24000) + codes := make(map[string]struct{}, 24000) for scanner.Scan() { - ipaCodes[strings.ToLower(scanner.Text())] = struct{}{} + codes[strings.ToLower(scanner.Text())] = struct{}{} } + + return codes } -// isItalianIpaCode returns true if the field is a valid Italian Public Administration Code -// (iPA) from https://github.com/publiccodeyml/italian-organizations-ipa-vocabulary. -func isItalianIpaCode(fl validator.FieldLevel) bool { - _, ok := ipaCodes[strings.ToLower(fl.Field().String())] +// MakeIsItalianIpaCode returns a validator.Func that checks whether a field value +// is a valid Italian Public Administration Code (iPA) from +// https://github.com/publiccodeyml/italian-organizations-ipa-vocabulary, +// using the provided codes set. +func MakeIsItalianIpaCode(codes map[string]struct{}) validator.Func { + return func(fl validator.FieldLevel) bool { + _, ok := codes[strings.ToLower(fl.Field().String())] - return ok + return ok + } } diff --git a/validators/validations_test.go b/validators/validations_test.go index da63d95..651c0e9 100644 --- a/validators/validations_test.go +++ b/validators/validations_test.go @@ -5,7 +5,7 @@ import ( ) func TestIsOrganisationURI(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) tests := []struct { value string diff --git a/validators/validator.go b/validators/validator.go index 01a2577..617c168 100644 --- a/validators/validator.go +++ b/validators/validator.go @@ -9,7 +9,10 @@ import ( "github.com/go-playground/validator/v10" ) -func New() *validator.Validate { +// New returns a configured validator.Validate instance using the provided IPA +// codes set for Italian-specific validations. Use DefaultIPACodes() to get the +// embedded snapshot, or supply a freshly fetched set when using IPACodesURL. +func New(ipaCodes map[string]struct{}) *validator.Validate { validate := validator.New(validator.WithRequiredStructEnabled()) _ = validate.RegisterValidation("is_mime_type", isMIMEType) _ = validate.RegisterValidation("iso3166_1_alpha2_lower_or_upper", isIso3166Alpha2LowerOrUpper) @@ -17,13 +20,13 @@ func New() *validator.Validate { _ = validate.RegisterValidation("umin", uMin) _ = validate.RegisterValidation("url_http_url", isHTTPURL) _ = validate.RegisterValidation("url_url", isURL) - _ = validate.RegisterValidation("organisation_uri", isOrganisationURI) + _ = validate.RegisterValidation("organisation_uri", MakeIsOrganisationURI(ipaCodes)) _ = validate.RegisterValidation("is_spdx_expression", isSPDXExpression) _ = validate.RegisterValidation("is_category_v0", isCategoryV0) _ = validate.RegisterValidation("is_scope_v0", isScopeV0) - _ = validate.RegisterValidation("is_italian_ipa_code", isItalianIpaCode) + _ = validate.RegisterValidation("is_italian_ipa_code", MakeIsItalianIpaCode(ipaCodes)) _ = validate.RegisterValidation("bcp47_keys", bcp47_keys) diff --git a/validators/validator_test.go b/validators/validator_test.go index 799eaf2..2369c3b 100644 --- a/validators/validator_test.go +++ b/validators/validator_test.go @@ -9,14 +9,14 @@ import ( ) func TestNewReturnsNonNil(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) if v == nil { t.Fatal("New() returned nil") } } func TestRegisterLocalErrorMessages(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en") @@ -29,7 +29,7 @@ func TestRegisterLocalErrorMessages(t *testing.T) { } func TestNewTagNameFuncWithDashTag(t *testing.T) { - v := New() + v := New(DefaultIPACodes()) // A struct with yaml:"-" should have the field excluded from validation. type S struct { @@ -47,7 +47,7 @@ func TestNewTagNameFuncWithDashTag(t *testing.T) { func TestRegisterLocalErrorMessagesOrgURIAlreadyRegistered(t *testing.T) { // Pre-register "organisation_uri" so the customRegisFunc for that tag // fails on ut.Add, covering the error-return paths in RegisterLocalErrorMessages. - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en") @@ -65,7 +65,7 @@ func TestRegisterLocalErrorMessagesOrgURIInvalidIPAAlreadyRegistered(t *testing. // Pre-register only organisation_uri_invalid_italian_pa so the first ut.Add // in customRegisFunc succeeds but the second fails, covering the second // error-return path. - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en") @@ -85,7 +85,7 @@ func TestTranslateFuncFallback(t *testing.T) { // and triggering a validation that uses it. // This is an indirect test: just ensure RegisterLocalErrorMessages succeeds // and the validator can validate with translations without panicking. - v := New() + v := New(DefaultIPACodes()) enLocale := en.New() uni := ut.New(enLocale, enLocale) trans, _ := uni.GetTranslator("en")