Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d61d880
Re-mount volumes on a restart or update #72
kanya-approve Nov 25, 2025
2a46ac7
fix: address comments and apply fixes (#77)
sambuc Dec 1, 2025
9023632
sambuc/feat merge restart pr 2 (#78)
sambuc Dec 9, 2025
29814fb
fix tests after introduction of DriverConfig (#80)
sambuc Dec 9, 2025
9292bc4
fix: propagate context more thoroughly (#79)
olevski Dec 9, 2025
2675531
fix: the error handling was creating issues ignored previously (#81)
sambuc Dec 12, 2025
6372a5d
fix: Use node tmp folder for the mounts recovery state (#82)
sambuc Dec 15, 2025
f2a8e20
fix: Wait for the deamon to be ready (#83)
sambuc Dec 16, 2025
b3df4a4
fix: empty json body & memory unit in yaml (#85)
sambuc Dec 17, 2025
1587f6d
fix: handle pod annotations for metrics scraping (#87)
sambuc Jan 19, 2026
7ef9324
fix: pod annotations should be a map of string to strings
sambuc Jan 22, 2026
bb93654
feat: Split stage & publish operations
sambuc Jan 14, 2026
411f3aa
fix: Cleanup some warnings
sambuc Jan 14, 2026
29cdaed
fix: Add explicit support MULTI_READER_ONLY
sambuc Jan 14, 2026
5c8ba53
fix: Add standardized logs to gRPC methods
sambuc Jan 21, 2026
d797259
fix: use a tmpfs as a fixed point, review unmount process
sambuc Jan 21, 2026
5cc4bc5
fix: Add csi.NodeServiceCapability_RPC_UNKNOWN in the list, just in c…
sambuc Jan 21, 2026
d4023ab
chore: upgrade to Go 1.25
sambuc Jan 23, 2026
941dea5
fix: Cleanup some warnings
sambuc Jan 23, 2026
e6f05ba
chore: update most libraries, except csi ones
sambuc Jan 23, 2026
38afe2b
test: Try if using the tmpfs during staging works better
sambuc Jan 26, 2026
0283ba0
fix: remove dependency on deprectaed utils/mount
sambuc Jan 26, 2026
d168ee2
fix: switch to the default go context library
sambuc Jan 26, 2026
2842e94
fix: ignore retry error codes
sambuc Jan 26, 2026
7faeb81
Revert "test: Try if using the tmpfs during staging works better"
sambuc Jan 30, 2026
5aded7f
build: add action to build the container image (dev)
leafty Jun 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
{
"name": "CSI rclone devcontainer",
"image": "mcr.microsoft.com/devcontainers/base:bookworm",
"remoteUser": "root",
"containerUser": "root",
"features": {
"ghcr.io/devcontainers/features/git:1": {},
"ghcr.io/devcontainers/features/go:1": {},
"ghcr.io/devcontainers/features/go:1": {
"version": "latest"
},
"ghcr.io/devcontainers-extra/features/apt-packages:1": {
"packages": "fuse3"
},
Expand Down
3 changes: 3 additions & 0 deletions .devcontainer/rclone/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ rm -rf /tmp/rclone
# Fix the $GOPATH folder
chown -R "${USERNAME}:golang" /go
chmod -R g+r+w /go

# Make sure the default folders exists
mkdir -p /run/csi-rclone
71 changes: 71 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name: Build dev version

on:
push:
workflow_dispatch:

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
CHART_NAME: ${{ github.repository }}/helm-chart

defaults:
run:
shell: bash

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

permissions:
contents: read

jobs:
build-image:
runs-on: ubuntu-24.04
outputs:
image: ${{ steps.docker_image.outputs.image }}
image_repository: ${{ steps.docker_image.outputs.image_repository }}
image_tag: ${{ steps.docker_image.outputs.image_tag }}
permissions:
contents: read
packages: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Docker image metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: type=sha
- name: Extract Docker image name
id: docker_image
env:
IMAGE_TAGS: ${{ steps.meta.outputs.tags }}
run: |
IMAGE=$(echo "$IMAGE_TAGS" | cut -d" " -f1)
IMAGE_REPOSITORY=$(echo "$IMAGE" | cut -d":" -f1)
IMAGE_TAG=$(echo "$IMAGE" | cut -d":" -f2)
echo "image=$IMAGE" >> "$GITHUB_OUTPUT"
echo "image_repository=$IMAGE_REPOSITORY" >> "$GITHUB_OUTPUT"
echo "image_tag=$IMAGE_TAG" >> "$GITHUB_OUTPUT"
- name: Set up Docker buildx
uses: docker/setup-buildx-action@v3
- name: Set up Docker
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ steps.docker_image.outputs.image_repository }}:buildcache
cache-to: type=registry,ref=${{ steps.docker_image.outputs.image_repository }}:buildcache,mode=max

# TODO: add job to build and push the helm chart if needed (manual trigger)
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ARG RCLONE_IMAGE_REPOSITORY="ghcr.io/swissdatasciencecenter/rclone"
ARG RCLONE_IMAGE_TAG="sha-308067c"
FROM ${RCLONE_IMAGE_REPOSITORY}:${RCLONE_IMAGE_TAG} AS rclone

FROM golang:1.23.8-bookworm AS build
FROM golang:1.25.6-bookworm AS build
COPY go.mod go.sum ./
RUN --mount=type=cache,target=/go/pkg/mod \
go mod download
Expand All @@ -23,4 +23,4 @@ EOT
COPY --from=build /csi-rclone /csi-rclone
COPY --from=rclone --chmod=755 /rclone /usr/bin/

ENTRYPOINT ["/csi-rclone"]
ENTRYPOINT ["/csi-rclone"]
122 changes: 25 additions & 97 deletions cmd/csi-rclone-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,35 @@ package main

import (
"context"
"errors"
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"time"

"github.com/SwissDataScienceCenter/csi-rclone/pkg/common"
"github.com/SwissDataScienceCenter/csi-rclone/pkg/metrics"
"github.com/SwissDataScienceCenter/csi-rclone/pkg/rclone"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/klog"
mountUtils "k8s.io/mount-utils"
)

var (
endpoint string
nodeID string
cacheDir string
cacheSize string
meters []metrics.Observable
)
func exitOnError(err error) {
// ParseFlags uses errors to return some status information, ignore it here.
if err != nil && !errors.Is(err, pflag.ErrHelp) {
klog.Error(err.Error())
os.Exit(1)
}
}

func init() {
flag.Set("logtostderr", "true")
exitOnError(flag.Set("logtostderr", "true"))
}

func main() {
var meters []metrics.Observable
metricsServerConfig := metrics.ServerConfig{
Host: "localhost",
Port: 9090,
Expand All @@ -37,123 +39,49 @@ func main() {
ShutdownTimeout: 5 * time.Second,
Enabled: false,
}
nodeServerConfig := rclone.NodeServerConfig{}
controllerServerConfig := rclone.ControllerServerConfig{}

root := &cobra.Command{
Use: "rclone",
Short: "CSI based rclone driver",
}
// Allow flags to be defined in subcommands, they will be reported at the Execute() step, with the help printed
// before exiting.
root.FParseErrWhitelist.UnknownFlags = true

metricsServerConfig.CommandLineParameters(root)

runCmd := &cobra.Command{
Use: "run",
Short: "Start the CSI driver.",
}
root.AddCommand(runCmd)
exitOnError(nodeServerConfig.CommandLineParameters(runCmd, &meters))
exitOnError(controllerServerConfig.CommandLineParameters(runCmd, &meters))

runNode := &cobra.Command{
Use: "node",
Short: "Start the CSI driver node service - expected to run in a daemonset on every node.",
Run: func(cmd *cobra.Command, args []string) {
handleNode()
},
}
runNode.PersistentFlags().StringVar(&nodeID, "nodeid", "", "node id")
runNode.MarkPersistentFlagRequired("nodeid")
runNode.PersistentFlags().StringVar(&endpoint, "endpoint", "", "CSI endpoint")
runNode.MarkPersistentFlagRequired("endpoint")
runNode.PersistentFlags().StringVar(&cacheDir, "cachedir", "", "cache dir")
runNode.PersistentFlags().StringVar(&cacheSize, "cachesize", "", "cache size")
runCmd.AddCommand(runNode)
runController := &cobra.Command{
Use: "controller",
Short: "Start the CSI driver controller.",
Run: func(cmd *cobra.Command, args []string) {
handleController()
},
}
runController.PersistentFlags().StringVar(&nodeID, "nodeid", "", "node id")
runController.MarkPersistentFlagRequired("nodeid")
runController.PersistentFlags().StringVar(&endpoint, "endpoint", "", "CSI endpoint")
runController.MarkPersistentFlagRequired("endpoint")
runCmd.AddCommand(runController)
root.AddCommand(runCmd)

versionCmd := &cobra.Command{
Use: "version",
Short: "Prints information about this version of csi rclone plugin",
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("csi-rclone plugin Version: %s", rclone.DriverVersion)
fmt.Printf("csi-rclone plugin Version: %s\n", rclone.DriverVersion)
},
}
root.AddCommand(versionCmd)

root.ParseFlags(os.Args[1:])
exitOnError(root.ParseFlags(os.Args[1:]))

if metricsServerConfig.Enabled {
// Gracefully exit the metrics background servers
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
ctx, stop := signal.NotifyContext(context.Background(), common.InterruptSignals...)
defer stop()

metricsServer := metricsServerConfig.NewServer(ctx, &meters)
go metricsServer.ListenAndServe()
}

if err := root.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "%s", err.Error())
os.Exit(1)
}
exitOnError(root.Execute())

os.Exit(0)
}

func handleNode() {
err := unmountOldVols()
if err != nil {
klog.Warningf("There was an error when trying to unmount old volumes: %v", err)
}
d := rclone.NewDriver(nodeID, endpoint)
ns, err := rclone.NewNodeServer(d.CSIDriver, cacheDir, cacheSize)
if err != nil {
panic(err)
}
meters = append(meters, ns.Metrics()...)
d.WithNodeServer(ns)
err = d.Run()
if err != nil {
panic(err)
}
}

func handleController() {
d := rclone.NewDriver(nodeID, endpoint)
cs := rclone.NewControllerServer(d.CSIDriver)
meters = append(meters, cs.Metrics()...)
d.WithControllerServer(cs)
err := d.Run()
if err != nil {
panic(err)
}
}

// unmountOldVols is used to unmount volumes after a restart on a node
func unmountOldVols() error {
const mountType = "fuse.rclone"
const unmountTimeout = time.Second * 5
klog.Info("Checking for existing mounts")
mounter := mountUtils.Mounter{}
mounts, err := mounter.List()
if err != nil {
return err
}
for _, mount := range mounts {
if mount.Type != mountType {
continue
}
err := mounter.UnmountWithForce(mount.Path, unmountTimeout)
if err != nil {
klog.Warningf("Failed to unmount %s because of %v.", mount.Path, err)
continue
}
klog.Infof("Sucessfully unmounted %s", mount.Path)
}
return nil
}
10 changes: 6 additions & 4 deletions deploy/csi-rclone/templates/csi-controller-rclone.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ spec:
metadata:
labels:
app: csi-controller-rclone
annotations:
{{- toYaml .Values.csiControllerRclone.podAnnotations | nindent 8 }}
spec:
serviceAccountName: {{ include "chart.fullname" . }}-controller
containers:
Expand Down Expand Up @@ -54,8 +56,8 @@ spec:
image: {{ .Values.csiControllerRclone.csiProvisioner.image.repository }}:{{ .Values.csiControllerRclone.csiProvisioner.image.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.csiControllerRclone.csiProvisioner.imagePullPolicy }}
volumeMounts:
- name: socket-dir
mountPath: /csi
- mountPath: /csi
name: socket-dir
- name: rclone
args:
- run
Expand Down Expand Up @@ -85,7 +87,7 @@ spec:
fieldRef:
fieldPath: spec.nodeName
- name: CSI_ENDPOINT
value: "unix://plugin/csi.sock"
value: "unix://csi/csi.sock"
- name: KUBERNETES_CLUSTER_DOMAIN
value: {{ quote .Values.kubernetesClusterDomain }}
{{- if .Values.csiControllerRclone.rclone.goMemLimit }}
Expand Down Expand Up @@ -114,7 +116,7 @@ spec:
timeoutSeconds: 3
periodSeconds: 2
volumeMounts:
- mountPath: /plugin
- mountPath: /csi
name: socket-dir
- name: liveness-probe
imagePullPolicy: Always
Expand Down
Loading
Loading