diff --git a/ollamaniemotron3nano30ba3b/Chart.yaml b/ollamaniemotron3nano30ba3b/Chart.yaml new file mode 100644 index 000000000..375f3c55a --- /dev/null +++ b/ollamaniemotron3nano30ba3b/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: 'nemotron3nano30b:a3b' +description: description +name: ollamaniemotron3nano30ba3b +type: application +version: '1.0.0' diff --git a/ollamaniemotron3nano30ba3b/OlaresManifest.yaml b/ollamaniemotron3nano30ba3b/OlaresManifest.yaml new file mode 100644 index 000000000..7f5e8c1f8 --- /dev/null +++ b/ollamaniemotron3nano30ba3b/OlaresManifest.yaml @@ -0,0 +1,117 @@ +--- +olaresManifest.version: '0.11.0' +olaresManifest.type: app +apiVersion: 'v2' +metadata: + name: ollamaniemotron3nano30ba3b + icon: https://app.cdn.olares.com/appstore/llm/ollama/llm/Cogito-14b.png + description: NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference. + appid: ollamaniemotron3nano30ba3b + title: Nemotron-3-Nano 30B A3B (Ollama) + version: '1.0.0' + categories: + - AI +sharedEntrances: + - name: ollamaniemotron3nano30ba3b + host: sharedentrances-api + port: 0 + title: Nemotron-3-Nano 30B A3B + invisible: true + authLevel: internal + icon: https://app.cdn.olares.com/appstore/llm/ollama/llm/Cogito-14b.png +entrances: + - name: ollamaclient + port: 8080 + host: ollamaclient + title: Nemotron-3-Nano 30B A3B + authLevel: internal + icon: https://app.cdn.olares.com/appstore/llm/ollama/llm/Cogito-14b.png + openMethod: window + +spec: + versionName: 'nemotron3nano30b:a3b' + fullDescription: | + ## IMPORTANT NOTE ## + This is a shared app. Once installed by the Olares Admin, all users in the cluster can use it through reference app. + + ## MODEL OVERVIEW ## + NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference. + + Nemotron-3-Nano 30B A3B is a Mixture-of-Experts (MoE) model that activates only a fraction of its total parameters per token, delivering strong reasoning and generation quality at a lower inference cost than comparable dense models. + + # Features + - Mixture-of-Experts architecture for efficient GPU inference + - 128k (131072) token context window for long-document and agentic workloads + - Served locally via Ollama with an OpenAI-compatible API surface + - Distributed as quantized GGUF weights by Unsloth + developer: NVIDIA / Unsloth + website: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF + sourceCode: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF + submitter: Zachary Nanfelt + locale: + - en-US + - zh-CN + doc: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF + license: + - text: NVIDIA Open Model License + url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/ + + {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} + limitedCpu: 6200m + requiredCpu: 150m + requiredDisk: 50Mi + limitedDisk: 80Gi + limitedMemory: 32Gi + requiredMemory: 16Gi + requiredGpu: 16Gi + limitedGpu: 24Gi + {{- else }} + requiredMemory: 64Mi + limitedMemory: 800Mi + requiredDisk: 50Mi + limitedDisk: 200Mi + requiredCpu: 10m + limitedCpu: 800m + {{- end }} + + supportArch: + - amd64 + subCharts: + - name: ollamaniemotron3nano30ba3bserver + shared: true + - name: ollamaniemotron3nano30ba3b +permission: + appData: true + appCache: true + userData: + - Home +options: + apiTimeout: 0 + appScope: + {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} + clusterScoped: true + appRef: + - ollamaniemotron3nano30ba3b + {{- else }} + clusterScoped: false + {{- end }} + dependencies: + - name: olares + version: '>=1.12.3-0' + type: system + {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} + {{- else }} + - name: ollamaniemotron3nano30ba3b + type: application + version: '>=1.0.1' + mandatory: true + {{- end }} +{{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} +envs: + - envName: KEEP_ALIVE + required: false + type: bool + editable: true + applyOnChange: true + default: "false" +{{- end }} diff --git a/ollamaniemotron3nano30ba3b/README.md b/ollamaniemotron3nano30ba3b/README.md new file mode 100644 index 000000000..4d694f886 --- /dev/null +++ b/ollamaniemotron3nano30ba3b/README.md @@ -0,0 +1,13 @@ +# Nemotron-3-Nano 30B A3B (Ollama) + +NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE +architecture with a 128k (131072) token context window, designed for efficient +GPU inference. + +This is a shared app. Once installed by the Olares Admin, all users in the +cluster can use it through the reference app. + +- Model: `nemotron3nano30b:a3b` +- Developer: NVIDIA / Unsloth +- Source: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF +- License: NVIDIA Open Model License diff --git a/ollamaniemotron3nano30ba3b/i18n/en-US/OlaresManifest.yaml b/ollamaniemotron3nano30ba3b/i18n/en-US/OlaresManifest.yaml new file mode 100644 index 000000000..3dec02464 --- /dev/null +++ b/ollamaniemotron3nano30ba3b/i18n/en-US/OlaresManifest.yaml @@ -0,0 +1,19 @@ +metadata: + title: Nemotron-3-Nano 30B A3B (Ollama) + description: NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference. + +spec: + fullDescription: | + ## IMPORTANT NOTE ## + This is a shared app. Once installed by the Olares Admin, all users in the cluster can use it through reference app. + + ## MODEL OVERVIEW ## + NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference. + + Nemotron-3-Nano 30B A3B is a Mixture-of-Experts (MoE) model that activates only a fraction of its total parameters per token, delivering strong reasoning and generation quality at a lower inference cost than comparable dense models. + + # Features + - Mixture-of-Experts architecture for efficient GPU inference + - 128k (131072) token context window for long-document and agentic workloads + - Served locally via Ollama with an OpenAI-compatible API surface + - Distributed as quantized GGUF weights by Unsloth diff --git a/ollamaniemotron3nano30ba3b/i18n/zh-CN/OlaresManifest.yaml b/ollamaniemotron3nano30ba3b/i18n/zh-CN/OlaresManifest.yaml new file mode 100644 index 000000000..3dec02464 --- /dev/null +++ b/ollamaniemotron3nano30ba3b/i18n/zh-CN/OlaresManifest.yaml @@ -0,0 +1,19 @@ +metadata: + title: Nemotron-3-Nano 30B A3B (Ollama) + description: NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference. + +spec: + fullDescription: | + ## IMPORTANT NOTE ## + This is a shared app. Once installed by the Olares Admin, all users in the cluster can use it through reference app. + + ## MODEL OVERVIEW ## + NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference. + + Nemotron-3-Nano 30B A3B is a Mixture-of-Experts (MoE) model that activates only a fraction of its total parameters per token, delivering strong reasoning and generation quality at a lower inference cost than comparable dense models. + + # Features + - Mixture-of-Experts architecture for efficient GPU inference + - 128k (131072) token context window for long-document and agentic workloads + - Served locally via Ollama with an OpenAI-compatible API surface + - Distributed as quantized GGUF weights by Unsloth diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/Chart.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/Chart.yaml new file mode 100644 index 000000000..e77b4666d --- /dev/null +++ b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: '1.25.3-2' +description: description +name: ollamaniemotron3nano30ba3b +type: application +version: '1.0.0' diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/templates/clientproxy.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/templates/clientproxy.yaml new file mode 100644 index 000000000..61cc4971c --- /dev/null +++ b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/templates/clientproxy.yaml @@ -0,0 +1,125 @@ +--- +apiVersion: v1 +data: + nginx.conf: | + server { + + listen 8080; + access_log /opt/bitnami/openresty/nginx/logs/access.log; + error_log /opt/bitnami/openresty/nginx/logs/error.log; + + proxy_connect_timeout 600s; + proxy_send_timeout 600s; + proxy_read_timeout 1800s; + proxy_set_header host $host; + proxy_set_header x-forwarded-host $http_host; + + proxy_http_version 1.1; + + proxy_set_header upgrade $http_upgrade; + proxy_set_header connection "upgrade"; + + location / { + add_header X-Frame-Options ""; + proxy_pass http://api.ollamaniemotron3nano30ba3bserver-shared:8081; + } + } + +kind: ConfigMap +metadata: + name: nginx-config + namespace: {{ .Release.Namespace }} + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: null + labels: + io.kompose.service: ollamaclient + name: {{ .Release.Name }} + namespace: '{{ .Release.Namespace }}' +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: ollamaclient + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + io.kompose.network/chrome-default: "true" + io.kompose.service: ollamaclient + spec: + volumes: + - name: nginx-config + configMap: + name: nginx-config + defaultMode: 438 + items: + - key: nginx.conf + path: nginx.conf + containers: + - name: nginx + image: "docker.io/beclab/aboveos-bitnami-openresty:1.25.3-2" + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: OPENRESTY_CONF_FILE + value: /etc/nginx/nginx.conf + startupProbe: + tcpSocket: + port: 8080 + failureThreshold: 30 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 30 + timeoutSeconds: 60 + periodSeconds: 60 + successThreshold: 1 + failureThreshold: 10 + # readinessProbe: + # httpGet: + # path: / + # host: ollama.ollama-{{ .Values.admin }} + # port: 7860 + # scheme: HTTP + # initialDelaySeconds: 10 + # periodSeconds: 5 + # successThreshold: 1 + # failureThreshold: 10 + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 10m + memory: 64Mi + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-config + mountPath: /opt/bitnami/openresty/nginx/conf/server_blocks/nginx.conf + subPath: nginx.conf + +--- +apiVersion: v1 +kind: Service +metadata: + name: ollamaclient + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + io.kompose.service: ollamaclient + ports: + - name: ollamaclient + protocol: TCP + port: 8080 + targetPort: 8080 diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/values.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/Chart.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/Chart.yaml new file mode 100644 index 000000000..3e09c4cf4 --- /dev/null +++ b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: '0.18.3' +description: description +name: ollamaniemotron3nano30ba3bserver +type: application +version: '1.0.0' diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/api.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/api.yaml new file mode 100644 index 000000000..0eac2504b --- /dev/null +++ b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/api.yaml @@ -0,0 +1,100 @@ +{{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} +{{- $ollamaclientDomainENV := split "," .Values.domain.ollamaclient -}} +{{- $ollamaclientDomain := index $ollamaclientDomainENV "_0" -}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: null + labels: + io.kompose.service: api + name: api + namespace: "{{ .Release.Namespace }}" +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: api + strategy: + type: Recreate + template: + metadata: + creationTimestamp: null + labels: + io.kompose.network/chrome-default: "true" + io.kompose.service: api + spec: + containers: + - name: api + image: "docker.io/beclab/harveyff-olares-ollama:v0.1.18" + env: + - name: PGID + value: "1000" + - name: PUID + value: "1000" + - name: TZ + value: Etc/UTC + - name: DISPLAY + value: ":0" + - name: OLLAMA_MODEL + valueFrom: + configMapKeyRef: + name: ollama-env + key: MODEL_NAME + - name: OLLAMA_URL + value: "http://ollama.ollamaniemotron3nano30ba3bserver-shared:11434" + - name: APP_URL + value: "https://{{ $ollamaclientDomain }}" + - name: OLLAMA_NOPRUNE + value: "1" + resources: + requests: + cpu: 300m + memory: 200Mi + limits: + cpu: 1000m + memory: 2Gi + ports: + - name: http + containerPort: 8080 + protocol: TCP + restartPolicy: Always +status: {} +--- +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + io.kompose.service: api + name: api + namespace: "{{ .Release.Namespace }}" +spec: + ports: + - name: "api" + port: 8081 + targetPort: 8080 + selector: + io.kompose.service: api +status: + loadBalancer: {} + +--- +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + io.kompose.service: api + name: sharedentrances-api + namespace: "{{ .Release.Namespace }}" +spec: + ports: + - name: "api" + port: 80 + targetPort: 8080 + selector: + io.kompose.service: api +status: + loadBalancer: {} +{{- end }} diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/configmap.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/configmap.yaml new file mode 100644 index 000000000..5c91d34fb --- /dev/null +++ b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/configmap.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} +{{- $ollamaclientDomainENV := split "," .Values.domain.ollamaclient -}} +{{- $ollamaclientDomain := index $ollamaclientDomainENV "_0" -}} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ollama-env + namespace: "{{ .Release.Namespace }}" +data: + PGID: "1000" + PUID: "1000" + TZ: "Etc/UTC" + GGML_CUDA_DISABLE_GRAPHS: "1" + OLLAMA_ORIGINS: "*" + OLLAMA_DEBUG: "1" + OLLAMA_CONTEXT_LENGTH: "131072" + MODEL_NAME: "nemotron3nano30b:a3b" + +{{- end }} diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/deployment.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/deployment.yaml new file mode 100644 index 000000000..adc94098f --- /dev/null +++ b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/templates/deployment.yaml @@ -0,0 +1,120 @@ +{{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} +{{- $ollamaclientDomainENV := split "," .Values.domain.ollamaclient -}} +{{- $ollamaclientDomain := index $ollamaclientDomainENV "_0" -}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: null + labels: + io.kompose.service: ollama + name: ollama + namespace: "{{ .Release.Namespace }}" + annotations: + applications.app.bytetrade.io/gpu-inject: "true" +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: ollama + strategy: + type: Recreate + template: + metadata: + creationTimestamp: null + labels: + io.kompose.network/chrome-default: "true" + io.kompose.service: ollama + spec: + containers: + - name: ollama + image: "docker.io/beclab/ollama-ollama:0.18.3" +{{- with .Values.olaresEnv }} +{{- if and .KEEP_ALIVE (eq (lower (toString .KEEP_ALIVE)) "true") }} + command: + - sh + - '-c' + - | + echo "Starting Ollama server..." + ollama serve & + SERVER_PID=$! + until ollama list > /dev/null 2>&1; do + sleep 1 + done + echo "Ollama server is ready. Preloading model..." + START_TIME=$(date +%s) + ollama run ${MODEL_NAME} "Hello" + END_TIME=$(date +%s) + ELAPSED=$((END_TIME - START_TIME)) + echo "Model preloading completed in ${ELAPSED} seconds" + wait $SERVER_PID +{{- end }} +{{- end }} + envFrom: + - configMapRef: + name: ollama-env + env: + - name: OLLAMA_NOPRUNE + value: "1" +{{- with .Values.olaresEnv }} +{{- if and .KEEP_ALIVE (eq (lower (toString .KEEP_ALIVE)) "true") }} + - name: OLLAMA_KEEP_ALIVE + value: "-1" +{{- end }} +{{- end }} + ports: + - containerPort: 11434 + livenessProbe: + httpGet: + path: / + port: 11434 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 60 + periodSeconds: 60 + successThreshold: 1 + failureThreshold: 10 + startupProbe: + tcpSocket: + port: 11434 + timeoutSeconds: 5 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 30 + resources: + limits: + cpu: "4" + memory: 38Gi + requests: + cpu: 50m + memory: 4Gi + volumeMounts: + - mountPath: "/root/.ollama" + name: data + volumes: + - name: data + hostPath: + path: "{{ .Values.userspace.userData }}/Ollama/{{ .Release.Name }}" + type: DirectoryOrCreate + restartPolicy: Always +status: {} +--- +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + io.kompose.service: api + name: ollama + namespace: "{{ .Release.Namespace }}" +spec: + ports: + - name: "ollama" + port: 11434 + targetPort: 11434 + selector: + io.kompose.service: ollama +status: + loadBalancer: {} + +{{- end }} diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/values.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/ollamaniemotron3nano30ba3b/owners b/ollamaniemotron3nano30ba3b/owners new file mode 100644 index 000000000..5205a5d3e --- /dev/null +++ b/ollamaniemotron3nano30ba3b/owners @@ -0,0 +1,2 @@ +owners: +- 'znanfelt' diff --git a/ollamaniemotron3nano30ba3b/templates/keep b/ollamaniemotron3nano30ba3b/templates/keep new file mode 100644 index 000000000..e69de29bb diff --git a/ollamaniemotron3nano30ba3b/values.yaml b/ollamaniemotron3nano30ba3b/values.yaml new file mode 100644 index 000000000..e69de29bb