Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d61d880
Re-mount volumes on a restart or update #72
kanya-approve Nov 25, 2025
2a46ac7
fix: address comments and apply fixes (#77)
sambuc Dec 1, 2025
9023632
sambuc/feat merge restart pr 2 (#78)
sambuc Dec 9, 2025
29814fb
fix tests after introduction of DriverConfig (#80)
sambuc Dec 9, 2025
9292bc4
fix: propagate context more thoroughly (#79)
olevski Dec 9, 2025
2675531
fix: the error handling was creating issues ignored previously (#81)
sambuc Dec 12, 2025
6372a5d
fix: Use node tmp folder for the mounts recovery state (#82)
sambuc Dec 15, 2025
f2a8e20
fix: Wait for the deamon to be ready (#83)
sambuc Dec 16, 2025
b3df4a4
fix: empty json body & memory unit in yaml (#85)
sambuc Dec 17, 2025
1587f6d
fix: handle pod annotations for metrics scraping (#87)
sambuc Jan 19, 2026
7ef9324
fix: pod annotations should be a map of string to strings
sambuc Jan 22, 2026
bb93654
feat: Split stage & publish operations
sambuc Jan 14, 2026
411f3aa
fix: Cleanup some warnings
sambuc Jan 14, 2026
29cdaed
fix: Add explicit support MULTI_READER_ONLY
sambuc Jan 14, 2026
5c8ba53
fix: Add standardized logs to gRPC methods
sambuc Jan 21, 2026
d797259
fix: use a tmpfs as a fixed point, review unmount process
sambuc Jan 21, 2026
5cc4bc5
fix: Add csi.NodeServiceCapability_RPC_UNKNOWN in the list, just in c…
sambuc Jan 21, 2026
d4023ab
chore: upgrade to Go 1.25
sambuc Jan 23, 2026
941dea5
fix: Cleanup some warnings
sambuc Jan 23, 2026
e6f05ba
chore: update most libraries, except csi ones
sambuc Jan 23, 2026
38afe2b
test: Try if using the tmpfs during staging works better
sambuc Jan 26, 2026
0283ba0
fix: remove dependency on deprectaed utils/mount
sambuc Jan 26, 2026
d168ee2
fix: switch to the default go context library
sambuc Jan 26, 2026
2842e94
fix: ignore retry error codes
sambuc Jan 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
{
"name": "CSI rclone devcontainer",
"image": "mcr.microsoft.com/devcontainers/base:bookworm",
"remoteUser": "root",
"containerUser": "root",
"features": {
"ghcr.io/devcontainers/features/git:1": {},
"ghcr.io/devcontainers/features/go:1": {},
"ghcr.io/devcontainers/features/go:1": {
"version": "latest"
},
"ghcr.io/devcontainers-extra/features/apt-packages:1": {
"packages": "fuse3"
},
Expand Down
3 changes: 3 additions & 0 deletions .devcontainer/rclone/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ rm -rf /tmp/rclone
# Fix the $GOPATH folder
chown -R "${USERNAME}:golang" /go
chmod -R g+r+w /go

# Make sure the default folders exists
mkdir -p /run/csi-rclone
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ARG RCLONE_IMAGE_REPOSITORY="ghcr.io/swissdatasciencecenter/rclone"
ARG RCLONE_IMAGE_TAG="sha-308067c"
FROM ${RCLONE_IMAGE_REPOSITORY}:${RCLONE_IMAGE_TAG} AS rclone

FROM golang:1.23.8-bookworm AS build
FROM golang:1.25.6-bookworm AS build
COPY go.mod go.sum ./
RUN --mount=type=cache,target=/go/pkg/mod \
go mod download
Expand All @@ -23,4 +23,4 @@ EOT
COPY --from=build /csi-rclone /csi-rclone
COPY --from=rclone --chmod=755 /rclone /usr/bin/

ENTRYPOINT ["/csi-rclone"]
ENTRYPOINT ["/csi-rclone"]
122 changes: 25 additions & 97 deletions cmd/csi-rclone-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,35 @@ package main

import (
"context"
"errors"
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"time"

"github.com/SwissDataScienceCenter/csi-rclone/pkg/common"
"github.com/SwissDataScienceCenter/csi-rclone/pkg/metrics"
"github.com/SwissDataScienceCenter/csi-rclone/pkg/rclone"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/klog"
mountUtils "k8s.io/mount-utils"
)

var (
endpoint string
nodeID string
cacheDir string
cacheSize string
meters []metrics.Observable
)
func exitOnError(err error) {
// ParseFlags uses errors to return some status information, ignore it here.
if err != nil && !errors.Is(err, pflag.ErrHelp) {
klog.Error(err.Error())
os.Exit(1)
}
}

func init() {
flag.Set("logtostderr", "true")
exitOnError(flag.Set("logtostderr", "true"))
}

func main() {
var meters []metrics.Observable
metricsServerConfig := metrics.ServerConfig{
Host: "localhost",
Port: 9090,
Expand All @@ -37,123 +39,49 @@ func main() {
ShutdownTimeout: 5 * time.Second,
Enabled: false,
}
nodeServerConfig := rclone.NodeServerConfig{}
controllerServerConfig := rclone.ControllerServerConfig{}

root := &cobra.Command{
Use: "rclone",
Short: "CSI based rclone driver",
}
// Allow flags to be defined in subcommands, they will be reported at the Execute() step, with the help printed
// before exiting.
root.FParseErrWhitelist.UnknownFlags = true

metricsServerConfig.CommandLineParameters(root)

runCmd := &cobra.Command{
Use: "run",
Short: "Start the CSI driver.",
}
root.AddCommand(runCmd)
exitOnError(nodeServerConfig.CommandLineParameters(runCmd, &meters))
exitOnError(controllerServerConfig.CommandLineParameters(runCmd, &meters))

runNode := &cobra.Command{
Use: "node",
Short: "Start the CSI driver node service - expected to run in a daemonset on every node.",
Run: func(cmd *cobra.Command, args []string) {
handleNode()
},
}
runNode.PersistentFlags().StringVar(&nodeID, "nodeid", "", "node id")
runNode.MarkPersistentFlagRequired("nodeid")
runNode.PersistentFlags().StringVar(&endpoint, "endpoint", "", "CSI endpoint")
runNode.MarkPersistentFlagRequired("endpoint")
runNode.PersistentFlags().StringVar(&cacheDir, "cachedir", "", "cache dir")
runNode.PersistentFlags().StringVar(&cacheSize, "cachesize", "", "cache size")
runCmd.AddCommand(runNode)
runController := &cobra.Command{
Use: "controller",
Short: "Start the CSI driver controller.",
Run: func(cmd *cobra.Command, args []string) {
handleController()
},
}
runController.PersistentFlags().StringVar(&nodeID, "nodeid", "", "node id")
runController.MarkPersistentFlagRequired("nodeid")
runController.PersistentFlags().StringVar(&endpoint, "endpoint", "", "CSI endpoint")
runController.MarkPersistentFlagRequired("endpoint")
runCmd.AddCommand(runController)
root.AddCommand(runCmd)

versionCmd := &cobra.Command{
Use: "version",
Short: "Prints information about this version of csi rclone plugin",
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("csi-rclone plugin Version: %s", rclone.DriverVersion)
fmt.Printf("csi-rclone plugin Version: %s\n", rclone.DriverVersion)
},
}
root.AddCommand(versionCmd)

root.ParseFlags(os.Args[1:])
exitOnError(root.ParseFlags(os.Args[1:]))

if metricsServerConfig.Enabled {
// Gracefully exit the metrics background servers
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
ctx, stop := signal.NotifyContext(context.Background(), common.InterruptSignals...)
defer stop()

metricsServer := metricsServerConfig.NewServer(ctx, &meters)
go metricsServer.ListenAndServe()
}

if err := root.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
os.Exit(1)
}
exitOnError(root.Execute())

os.Exit(0)
}

func handleNode() {
err := unmountOldVols()
if err != nil {
klog.Warningf("There was an error when trying to unmount old volumes: %v", err)
}
d := rclone.NewDriver(nodeID, endpoint)
ns, err := rclone.NewNodeServer(d.CSIDriver, cacheDir, cacheSize)
if err != nil {
panic(err)
}
meters = append(meters, ns.Metrics()...)
d.WithNodeServer(ns)
err = d.Run()
if err != nil {
panic(err)
}
}

func handleController() {
d := rclone.NewDriver(nodeID, endpoint)
cs := rclone.NewControllerServer(d.CSIDriver)
meters = append(meters, cs.Metrics()...)
d.WithControllerServer(cs)
err := d.Run()
if err != nil {
panic(err)
}
}

// unmountOldVols is used to unmount volumes after a restart on a node
func unmountOldVols() error {
const mountType = "fuse.rclone"
const unmountTimeout = time.Second * 5
klog.Info("Checking for existing mounts")
mounter := mountUtils.Mounter{}
mounts, err := mounter.List()
if err != nil {
return err
}
for _, mount := range mounts {
if mount.Type != mountType {
continue
}
err := mounter.UnmountWithForce(mount.Path, unmountTimeout)
if err != nil {
klog.Warningf("Failed to unmount %s because of %v.", mount.Path, err)
continue
}
klog.Infof("Sucessfully unmounted %s", mount.Path)
}
return nil
}
10 changes: 6 additions & 4 deletions deploy/csi-rclone/templates/csi-controller-rclone.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ spec:
metadata:
labels:
app: csi-controller-rclone
annotations:
{{- toYaml .Values.csiControllerRclone.podAnnotations | nindent 8 }}
spec:
serviceAccountName: {{ include "chart.fullname" . }}-controller
containers:
Expand Down Expand Up @@ -54,8 +56,8 @@ spec:
image: {{ .Values.csiControllerRclone.csiProvisioner.image.repository }}:{{ .Values.csiControllerRclone.csiProvisioner.image.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.csiControllerRclone.csiProvisioner.imagePullPolicy }}
volumeMounts:
- name: socket-dir
mountPath: /csi
- mountPath: /csi
name: socket-dir
- name: rclone
args:
- run
Expand Down Expand Up @@ -85,7 +87,7 @@ spec:
fieldRef:
fieldPath: spec.nodeName
- name: CSI_ENDPOINT
value: "unix://plugin/csi.sock"
value: "unix://csi/csi.sock"
- name: KUBERNETES_CLUSTER_DOMAIN
value: {{ quote .Values.kubernetesClusterDomain }}
{{- if .Values.csiControllerRclone.rclone.goMemLimit }}
Expand Down Expand Up @@ -114,7 +116,7 @@ spec:
timeoutSeconds: 3
periodSeconds: 2
volumeMounts:
- mountPath: /plugin
- mountPath: /csi
name: socket-dir
- name: liveness-probe
imagePullPolicy: Always
Expand Down
23 changes: 16 additions & 7 deletions deploy/csi-rclone/templates/csi-nodeplugin-rclone.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@ spec:
labels:
app: csi-nodeplugin-rclone
{{- include "chart.selectorLabels" . | nindent 8 }}
annotations:
{{- toYaml .Values.csiNodepluginRclone.podAnnotations | nindent 8 }}
spec:
serviceAccountName: {{ include "chart.fullname" . }}-nodeplugin
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: node-driver-registrar
args:
- --v=5
- --csi-address=/plugin/csi.sock
- --csi-address=/csi/csi.sock
- --kubelet-registration-path=/var/lib/kubelet/plugins/{{ .Values.storageClassName }}/csi.sock
env:
- name: KUBE_NODE_NAME
Expand All @@ -45,17 +47,17 @@ spec:
resources:
{{- toYaml .Values.csiNodepluginRclone.rclone.resources | nindent 12 }}
volumeMounts:
- mountPath: /plugin
- mountPath: /csi
name: plugin-dir
- mountPath: /registration
name: registration-dir
- name: liveness-probe
imagePullPolicy: Always
image: registry.k8s.io/sig-storage/livenessprobe:v2.15.0
args:
- --csi-address=/plugin/csi.sock
- --csi-address=/csi/csi.sock
volumeMounts:
- mountPath: /plugin
- mountPath: /csi
name: plugin-dir
- name: rclone
args:
Expand Down Expand Up @@ -86,7 +88,7 @@ spec:
fieldRef:
fieldPath: spec.nodeName
- name: CSI_ENDPOINT
value: "unix://plugin/csi.sock"
value: "unix://csi/csi.sock"
- name: KUBERNETES_CLUSTER_DOMAIN
value: {{ quote .Values.kubernetesClusterDomain }}
- name: DRIVER_NAME
Expand Down Expand Up @@ -134,8 +136,10 @@ spec:
timeoutSeconds: 10
periodSeconds: 30
volumeMounts:
- mountPath: /plugin
- mountPath: /csi
name: plugin-dir
- mountPath: /run/csi-rclone
name: node-temp-dir
- mountPath: /var/lib/kubelet/pods
mountPropagation: Bidirectional
name: pods-mount-dir
Expand All @@ -154,6 +158,11 @@ spec:
{{ toYaml . | nindent 8 }}
{{- end }}
volumes:
- hostPath:
# NOTE: We mount on /tmp because we want the saved configuration to not survive a whole node restart.
path: /tmp/{{.Release.Namespace}}-{{.Release.Name}}-{{.Release.Revision}}
type: DirectoryOrCreate
name: node-temp-dir
- hostPath:
path: {{ .Values.kubeletDir }}/plugins/{{ .Values.storageClassName }}
type: DirectoryOrCreate
Expand All @@ -167,4 +176,4 @@ spec:
type: DirectoryOrCreate
name: registration-dir
- name: cache-dir
emptyDir:
emptyDir: {}
12 changes: 9 additions & 3 deletions deploy/csi-rclone/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
storageClassName: csi-rclone
csiControllerRclone:
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
csiAttacher:
image:
repository: registry.k8s.io/sig-storage/csi-attacher
Expand All @@ -22,10 +25,10 @@ csiControllerRclone:
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128M
# memory: 128Mi
# If set, used to set GOMEMLIMIT, it should be strictly lower than
# limits.memory to prevent OOMkills
goMemLimit: # 115Mi
goMemLimit: # 115MiB
# Prometheus metrics
metrics:
enabled: true
Expand All @@ -38,6 +41,9 @@ csiControllerRclone:
annotations: {}

csiNodepluginRclone:
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
nodeDriverRegistrar:
image:
repository: registry.k8s.io/sig-storage/csi-node-driver-registrar
Expand Down Expand Up @@ -68,7 +74,7 @@ csiNodepluginRclone:
# memory: 128Mi
# If set, used to set GOMEMLIMIT, it should be strictly lower than
# limits.memory to prevent OOMkills
goMemLimit: # 115Mi
goMemLimit: # 115MiB
# Prometheus metrics
metrics:
enabled: true
Expand Down
Loading