Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pkg/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ type Backend interface {

// backend is the implementation of Backend.
type backend struct {
store storage.Storage
store storage.Storage
storageDir string
}

// New creates a new backend.
Expand All @@ -81,6 +82,7 @@ func New(storageDir string) (Backend, error) {
}

return &backend{
store: store,
store: store,
storageDir: storageDir,
}, nil
}
75 changes: 75 additions & 0 deletions pkg/backend/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"io"
"os"
"path/filepath"
"strings"

retry "github.com/avast/retry-go/v4"
modelspec "github.com/modelpack/model-spec/specs-go/v1"
Expand All @@ -34,6 +35,7 @@ import (
"github.com/modelpack/modctl/pkg/backend/build/hooks"
"github.com/modelpack/modctl/pkg/backend/processor"
"github.com/modelpack/modctl/pkg/config"
"github.com/modelpack/modctl/pkg/diskspace"
"github.com/modelpack/modctl/pkg/modelfile"
"github.com/modelpack/modctl/pkg/source"
)
Expand Down Expand Up @@ -67,6 +69,14 @@ func (b *backend) Build(ctx context.Context, modelfilePath, workDir, target stri
return fmt.Errorf("failed to get source info: %w", err)
}

// Check disk space before building (only for local output).
if !cfg.OutputRemote {
totalSize := estimateBuildSize(workDir, modelfile)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if there are duplicates on the local disk? E.g., can I pull the same model multiple times?

if err := diskspace.Check(b.storageDir, totalSize); err != nil {
logrus.Warnf("build: %v", err)
}
}

// using the local output by default.
outputType := build.OutputTypeLocal
if cfg.OutputRemote {
Expand Down Expand Up @@ -263,3 +273,68 @@ func getSourceInfo(workspace string, buildConfig *config.Build) (*source.Info, e

return info, nil
}

// estimateBuildSize estimates the total size of files that will be built by summing
// the sizes of all files referenced in the modelfile. Patterns are expanded using
// the same rules as processor/base.go (literal paths with absolute-path support, or
// filepath.Glob for patterns containing wildcards), so the estimate reflects what
// the builder will actually process.
func estimateBuildSize(workDir string, mf modelfile.Modelfile) int64 {
var totalSize int64

patterns := []string{}
patterns = append(patterns, mf.GetConfigs()...)
patterns = append(patterns, mf.GetModels()...)
patterns = append(patterns, mf.GetCodes()...)
patterns = append(patterns, mf.GetDocs()...)

absWorkDir, err := filepath.Abs(workDir)
if err != nil {
logrus.Warnf("build: failed to resolve workDir %s for size estimation: %v", workDir, err)
return 0
}

var matchedPaths []string
for _, pattern := range patterns {
if !strings.ContainsAny(pattern, "*?[]") {
var fullPath string
if filepath.IsAbs(pattern) {
fullPath = pattern
} else {
fullPath = filepath.Join(absWorkDir, pattern)
}
matchedPaths = append(matchedPaths, fullPath)
continue
}
matches, err := filepath.Glob(filepath.Join(absWorkDir, pattern))
if err != nil {
logrus.Warnf("build: failed to expand pattern %s for size estimation: %v", pattern, err)
continue
}
matchedPaths = append(matchedPaths, matches...)
}

for _, path := range matchedPaths {
info, err := os.Stat(path)
if err != nil {
logrus.Warnf("build: failed to stat %s for size estimation: %v", path, err)
continue
}
if info.IsDir() {
_ = filepath.Walk(path, func(walkPath string, fi os.FileInfo, err error) error {
if err != nil {
logrus.Warnf("build: failed to access %s for size estimation: %v", walkPath, err)
return nil
}
if !fi.IsDir() {
totalSize += fi.Size()
}
return nil
})
} else {
totalSize += info.Size()
}
}

return totalSize
}
72 changes: 70 additions & 2 deletions pkg/backend/build_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
package backend

import (
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"

"github.com/modelpack/modctl/pkg/config"
"github.com/modelpack/modctl/test/mocks/modelfile"

"github.com/stretchr/testify/assert"
)

func TestGetProcessors(t *testing.T) {
Expand All @@ -41,3 +43,69 @@ func TestGetProcessors(t *testing.T) {
assert.Equal(t, "code", processors[2].Name())
assert.Equal(t, "doc", processors[3].Name())
}

func TestEstimateBuildSize(t *testing.T) {
t.Run("single files", func(t *testing.T) {
workDir := t.TempDir()

// Create test files with known sizes.
assert.NoError(t, os.WriteFile(filepath.Join(workDir, "model.bin"), make([]byte, 1024), 0644))
assert.NoError(t, os.WriteFile(filepath.Join(workDir, "config.json"), make([]byte, 256), 0644))

mf := &modelfile.Modelfile{}
mf.On("GetConfigs").Return([]string{"config.json"})
mf.On("GetModels").Return([]string{"model.bin"})
mf.On("GetCodes").Return([]string{})
mf.On("GetDocs").Return([]string{})

size := estimateBuildSize(workDir, mf)
assert.Equal(t, int64(1280), size)
})

t.Run("directory entry", func(t *testing.T) {
workDir := t.TempDir()

// Create a subdirectory with files.
subDir := filepath.Join(workDir, "models")
assert.NoError(t, os.MkdirAll(subDir, 0755))
assert.NoError(t, os.WriteFile(filepath.Join(subDir, "a.bin"), make([]byte, 512), 0644))
assert.NoError(t, os.WriteFile(filepath.Join(subDir, "b.bin"), make([]byte, 512), 0644))

mf := &modelfile.Modelfile{}
mf.On("GetConfigs").Return([]string{})
mf.On("GetModels").Return([]string{"models"})
mf.On("GetCodes").Return([]string{})
mf.On("GetDocs").Return([]string{})

size := estimateBuildSize(workDir, mf)
assert.Equal(t, int64(1024), size)
})

t.Run("nonexistent file is skipped", func(t *testing.T) {
workDir := t.TempDir()

assert.NoError(t, os.WriteFile(filepath.Join(workDir, "real.bin"), make([]byte, 100), 0644))

mf := &modelfile.Modelfile{}
mf.On("GetConfigs").Return([]string{})
mf.On("GetModels").Return([]string{"real.bin", "missing.bin"})
mf.On("GetCodes").Return([]string{})
mf.On("GetDocs").Return([]string{})

size := estimateBuildSize(workDir, mf)
assert.Equal(t, int64(100), size)
})

t.Run("empty modelfile", func(t *testing.T) {
workDir := t.TempDir()

mf := &modelfile.Modelfile{}
mf.On("GetConfigs").Return([]string{})
mf.On("GetModels").Return([]string{})
mf.On("GetCodes").Return([]string{})
mf.On("GetDocs").Return([]string{})

size := estimateBuildSize(workDir, mf)
assert.Equal(t, int64(0), size)
})
}
16 changes: 16 additions & 0 deletions pkg/backend/pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/modelpack/modctl/pkg/backend/remote"
"github.com/modelpack/modctl/pkg/codec"
"github.com/modelpack/modctl/pkg/config"
"github.com/modelpack/modctl/pkg/diskspace"
"github.com/modelpack/modctl/pkg/storage"
)

Expand Down Expand Up @@ -72,6 +73,21 @@ func (b *backend) Pull(ctx context.Context, target string, cfg *config.Pull) err

logrus.Debugf("pull: loaded manifest for target %s [manifest: %+v]", target, manifest)

// Check disk space before pulling layers.
var totalSize int64
for _, layer := range manifest.Layers {
totalSize += layer.Size
}
totalSize += manifest.Config.Size

targetDir := b.storageDir
if cfg.ExtractFromRemote && cfg.ExtractDir != "" {
targetDir = cfg.ExtractDir
}
if err := diskspace.Check(targetDir, totalSize); err != nil {
logrus.Warnf("pull: %v", err)
}

// TODO: need refactor as currently use a global flag to control the progress bar render.
if cfg.DisableProgress {
internalpb.SetDisableProgress(true)
Expand Down
112 changes: 112 additions & 0 deletions pkg/diskspace/check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2025 The CNAI Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//go:build linux || darwin

package diskspace

import (
"fmt"
"math"
"os"
"path/filepath"

"golang.org/x/sys/unix"
Comment thread
aftersnow marked this conversation as resolved.
Comment thread
aftersnow marked this conversation as resolved.
)

const (
// safetyMargin is the extra space ratio to account for metadata overhead
// (manifests, temporary files, etc.). 10% extra required.
safetyMargin = 1.1
)

// Check checks if the directory has enough disk space for the required bytes.
// It returns a descriptive error if space is insufficient, or nil if space is enough.
// The caller should use the returned error for warning purposes only and not
// treat it as a fatal error.
func Check(dir string, requiredBytes int64) error {
if requiredBytes <= 0 {
return nil
}

// Ensure the directory exists for statfs; walk up to find an existing parent.
checkDir := dir
for {
if _, err := os.Stat(checkDir); err == nil {
break
}
parent := filepath.Dir(checkDir)
if parent == checkDir {
// Reached filesystem root without finding an existing directory.
return fmt.Errorf("cannot determine disk space: no existing directory found for path %s", dir)
}
checkDir = parent
}

var stat unix.Statfs_t
if err := unix.Statfs(checkDir, &stat); err != nil {
return fmt.Errorf("failed to check disk space for %s: %w", dir, err)
}

// Available space for non-root users.
// Guard against overflow: on Linux Bavail is uint64, and values exceeding
// math.MaxInt64 would wrap negative when cast to int64. Cap at MaxInt64.
bavail := stat.Bavail
bsize := uint64(stat.Bsize)
var availableBytes int64
if bavail > 0 && bsize > uint64(math.MaxInt64)/bavail {
availableBytes = math.MaxInt64
} else {
availableBytes = int64(bavail * bsize)
}
requiredWithMargin := int64(float64(requiredBytes) * safetyMargin)

if availableBytes < requiredWithMargin {
return fmt.Errorf(
"insufficient disk space in %s: available %s, required %s (with 10%% safety margin)",
dir, formatBytes(availableBytes), formatBytes(requiredWithMargin),
)
}

return nil
}

// formatBytes formats bytes into a human-readable string.
func formatBytes(bytes int64) string {
if bytes < 0 {
return "0 B"
}

const (
kb = 1024
mb = kb * 1024
gb = mb * 1024
tb = gb * 1024
)

switch {
case bytes >= tb:
return fmt.Sprintf("%.2f TB", float64(bytes)/float64(tb))
case bytes >= gb:
return fmt.Sprintf("%.2f GB", float64(bytes)/float64(gb))
case bytes >= mb:
return fmt.Sprintf("%.2f MB", float64(bytes)/float64(mb))
case bytes >= kb:
return fmt.Sprintf("%.2f KB", float64(bytes)/float64(kb))
default:
return fmt.Sprintf("%d B", bytes)
}
}
30 changes: 30 additions & 0 deletions pkg/diskspace/check_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright 2025 The CNAI Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//go:build !linux && !darwin

package diskspace

import "github.com/sirupsen/logrus"

// Check is a no-op stub for platforms that do not yet have a disk-space
// implementation. It always returns nil so callers can compile and run on
// Windows and other non-Unix systems, at the cost of losing the pre-check
// warning. Native Windows support is tracked upstream.
func Check(dir string, requiredBytes int64) error {
logrus.Debugf("diskspace: pre-check skipped on this platform (dir=%s, required=%d)", dir, requiredBytes)
return nil
}
Loading
Loading