Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions src/control/server/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,18 @@ type FabricConfig struct {
NumSecondaryEndpoints []int `yaml:"secondary_provider_endpoints,omitempty" cmdLongFlag:"--nr_sec_ctx,nonzero" cmdShortFlag:"-S,nonzero"`
DisableSRX bool `yaml:"disable_srx,omitempty" cmdEnv:"FI_OFI_RXM_USE_SRX,invertBool,intBool"`
AuthKey string `yaml:"fabric_auth_key,omitempty" cmdEnv:"D_PROVIDER_AUTH_KEY"`
// AddrFormat selects the preferred IP address family for fabric init. It is
// forwarded verbatim to CaRT/Mercury via the D_ADDR_FORMAT environment
// variable. An empty value leaves the historical (IPv4-preferring) default
// untouched. For multi-provider configurations it accepts a comma-separated
// list, one entry per provider, matching the provider ordering.
AddrFormat string `yaml:"addr_format,omitempty" cmdEnv:"D_ADDR_FORMAT"`
}

// FabricAddrFormats enumerates the address-family hints accepted by addr_format,
// mirroring the values parsed by CaRT's crt_str_to_addr_format().
var FabricAddrFormats = []string{"unspec", "ipv4", "ipv6", "native"}

// GetPrimaryProvider parses the primary provider from the Provider string.
func (fc *FabricConfig) GetPrimaryProvider() (string, error) {
providers, err := fc.GetProviders()
Expand Down Expand Up @@ -120,6 +130,17 @@ func (fc *FabricConfig) GetInterfaces() ([]string, error) {
return interfaces, nil
}

// GetAddrFormats parses the AddrFormat string into zero or more per-provider
// address-family hints, matched one-to-one with the providers. Returns an empty
// slice when addr_format is unset, which preserves the default behavior.
func (fc *FabricConfig) GetAddrFormats() []string {
if fc == nil {
return nil
}

return splitMultiProviderStr(fc.AddrFormat)
}

// GetInterfacePorts parses the InterfacePort string to one or more ports.
func (fc *FabricConfig) GetInterfacePorts() ([]int, error) {
if fc == nil {
Expand Down Expand Up @@ -157,6 +178,9 @@ func (fc *FabricConfig) Update(other FabricConfig) {
if fc.AuthKey == "" {
fc.AuthKey = other.AuthKey
}
if fc.AddrFormat == "" {
fc.AddrFormat = other.AddrFormat
}
if len(fc.NumSecondaryEndpoints) == 0 {
fc.setNumSecondaryEndpoints(other.NumSecondaryEndpoints)
}
Expand Down Expand Up @@ -200,6 +224,10 @@ func (fc *FabricConfig) Validate() error {
return errors.Errorf("provider, fabric_iface and fabric_iface_port must include the same number of items delimited by %q", MultiProviderSeparator)
}

if err := fc.validateAddrFormat(numProv); err != nil {
return err
}

numSecProv := numProv - numPrimaryProviders
if numSecProv > 0 {
if len(fc.NumSecondaryEndpoints) != 0 && len(fc.NumSecondaryEndpoints) != numSecProv {
Expand All @@ -216,6 +244,31 @@ func (fc *FabricConfig) Validate() error {
return nil
}

// validateAddrFormat ensures every comma-separated addr_format entry is a
// recognized address-family hint and that the count matches the number of
// providers (one hint per provider, as with fabric_iface). An empty
// addr_format is valid and preserves the default behavior.
func (fc *FabricConfig) validateAddrFormat(numProv int) error {
if fc.AddrFormat == "" {
return nil
}

formats := splitMultiProviderStr(fc.AddrFormat)
for _, f := range formats {
if !common.Includes(FabricAddrFormats, f) {
return errors.Errorf("invalid addr_format %q, must be one of %v",
f, FabricAddrFormats)
}
}

if len(formats) != numProv {
return errors.Errorf("addr_format must include one value per provider, delimited by %q",
MultiProviderSeparator)
}

return nil
}

// cleanEnvVars scrubs the supplied slice of environment
// variables by removing all variables not included in the
// allow list.
Expand Down Expand Up @@ -711,6 +764,14 @@ func (c *Config) WithFabricAuthKey(key string) *Config {
return c
}

// WithFabricAddrFormat sets the preferred IP address family for fabric init
// (forwarded to CaRT/Mercury as D_ADDR_FORMAT). Accepted values are listed in
// FabricAddrFormats; an empty value preserves the default behavior.
func (c *Config) WithFabricAddrFormat(format string) *Config {
c.Fabric.AddrFormat = format
return c
}

// WithSrxDisabled disables or enables SRX.
func (c *Config) WithSrxDisabled(disable bool) *Config {
c.Fabric.DisableSRX = disable
Expand Down
43 changes: 43 additions & 0 deletions src/control/server/engine/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ func TestConfig_ToCmdVals(t *testing.T) {
WithFabricProvider(provider).
WithFabricInterface(interfaceName).
WithFabricInterfacePort(interfacePort).
WithFabricAddrFormat("ipv6").
WithPinnedNumaNode(pinnedNumaNode).
WithBypassHealthChk(&bypass).
WithModules(modules).
Expand Down Expand Up @@ -727,6 +728,7 @@ func TestConfig_ToCmdVals(t *testing.T) {
"D_INTERFACE=" + interfaceName,
"D_PORT=" + strconv.Itoa(interfacePort),
"D_PROVIDER=" + provider,
"D_ADDR_FORMAT=ipv6",
"D_LOG_FILE=" + logFile,
"D_LOG_MASK=" + logMask,
"CRT_TIMEOUT=" + strconv.FormatUint(uint64(crtTimeout), 10),
Expand Down Expand Up @@ -899,6 +901,47 @@ func TestFabricConfig_GetInterfaces(t *testing.T) {
}
}

func TestFabricConfig_GetAddrFormats(t *testing.T) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any validation testing against improper inputs, could we have some of that please?

for name, tc := range map[string]struct {
cfg *FabricConfig
expAddrFormats []string
}{
"nil": {
expAddrFormats: nil,
},
"unset": {
cfg: &FabricConfig{},
expAddrFormats: []string{},
},
"single": {
cfg: &FabricConfig{
AddrFormat: "ipv6",
},
expAddrFormats: []string{"ipv6"},
},
"multi": {
cfg: &FabricConfig{
AddrFormat: multiProviderString("ipv6", "ipv4"),
},
expAddrFormats: []string{"ipv6", "ipv4"},
},
"excessive whitespace": {
cfg: &FabricConfig{
AddrFormat: multiProviderString(" ipv6 ", "", "ipv4 "),
},
expAddrFormats: []string{"ipv6", "ipv4"},
},
} {
t.Run(name, func(t *testing.T) {
addrFormats := tc.cfg.GetAddrFormats()

if diff := cmp.Diff(tc.expAddrFormats, addrFormats); diff != "" {
t.Fatalf("(-want, +got):\n%s", diff)
}
})
}
}

func TestFabricConfig_GetPrimaryInterface(t *testing.T) {
for name, tc := range map[string]struct {
cfg *FabricConfig
Expand Down
18 changes: 17 additions & 1 deletion src/control/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,15 +448,31 @@ func (srv *server) setupGrpc() error {
return err
}

// Address family is a property of this system's fabric, so it must travel
// to attaching clients alongside the provider. Surface the operator's
// addr_format choice as a D_ADDR_FORMAT entry in the per-provider client
// hint env, which the client applies before crt_init (see
// dc_mgmt_net_cfg_init). A client/agent attached to several systems gets
// each system's family from that system's own hint. Empty addr_format
// leaves the hint untouched, preserving the historical default.
addrFormats := srv.cfg.Fabric.GetAddrFormats()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this change require any other dependent changes to offer functionality? what is the order of landing for all dependencies and related changes?


clientNetHints := make([]*mgmtpb.ClientNetHint, 0, len(providers))
for i, p := range providers {
envVars := srv.cfg.ClientEnvVars

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks like an odd place to update ClientEnvVars, could you have a look at existing precedent for updating this within the config processing and validation workflow. there may be a more appropriate place to update this value.

if i < len(addrFormats) && addrFormats[i] != "" {
// Copy rather than append in place to avoid aliasing the shared
// ClientEnvVars slice across providers.
envVars = append(append([]string{}, envVars...), "D_ADDR_FORMAT="+addrFormats[i])
}

clientNetHints = append(clientNetHints, &mgmtpb.ClientNetHint{
Provider: p,
CrtTimeout: srv.cfg.Fabric.CrtTimeout,
NetDevClass: uint32(srv.netDevClass[i]),
SrvSrxSet: srxSetting,
ProviderIdx: uint32(i),
EnvVars: srv.cfg.ClientEnvVars,
EnvVars: envVars,
})
}
srv.mgmtSvc.clientNetworkHint = clientNetHints
Expand Down
18 changes: 18 additions & 0 deletions utils/config/daos_server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,24 @@
#fabric_auth_key: foo:bar
#
#
## CART: Fabric address family
## Selects the preferred IP address family for fabric initialization,
## forwarded to CaRT/Mercury as D_ADDR_FORMAT. Leave unset to keep the
## historical default (the provider's own preference; IPv4 for verbs/RoCE).
## Set to "ipv6" for IPv6-only fabric NIC deployments, where the default
## IPv4 preference would otherwise hide the only usable interfaces.
## Accepted values: unspec (default), ipv4, ipv6, native. For multi-provider
## configurations, supply a comma-separated list, one entry per provider.
##
## This value is also advertised to attaching clients (as D_ADDR_FORMAT in the
## GetAttachInfo network hint), so clients select the same address family as
## this system's engines without separate client configuration. A client or
## agent attached to several systems picks up each system's family from that
## system's hint.
#
#addr_format: ipv6
#
#
## Core Dump Filter
## Optional filter to control which mappings are written to the core
## dump in the event of a crash. See the following URL for more detail:
Expand Down
Loading