beclab · znanfelt · Jun 8, 2026 · Jun 8, 2026
diff --git a/ollamaniemotron3nano30ba3b/Chart.yaml b/ollamaniemotron3nano30ba3b/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+appVersion: 'nemotron3nano30b:a3b'
+description: description
+name: ollamaniemotron3nano30ba3b
+type: application
+version: '1.0.0'
diff --git a/ollamaniemotron3nano30ba3b/OlaresManifest.yaml b/ollamaniemotron3nano30ba3b/OlaresManifest.yaml
@@ -0,0 +1,117 @@
+---
+olaresManifest.version: '0.11.0'
+olaresManifest.type: app
+apiVersion: 'v2'
+metadata:
+  name: ollamaniemotron3nano30ba3b
+  icon: https://app.cdn.olares.com/appstore/llm/ollama/llm/Cogito-14b.png
+  description: NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference.
+  appid: ollamaniemotron3nano30ba3b
+  title: Nemotron-3-Nano 30B A3B (Ollama)
+  version: '1.0.0'
+  categories:
+    - AI
+sharedEntrances:
+  - name: ollamaniemotron3nano30ba3b
+    host: sharedentrances-api
+    port: 0
+    title: Nemotron-3-Nano 30B A3B
+    invisible: true
+    authLevel: internal
+    icon: https://app.cdn.olares.com/appstore/llm/ollama/llm/Cogito-14b.png
+entrances:
+  - name: ollamaclient
+    port: 8080
+    host: ollamaclient
+    title: Nemotron-3-Nano 30B A3B
+    authLevel: internal
+    icon: https://app.cdn.olares.com/appstore/llm/ollama/llm/Cogito-14b.png
+    openMethod: window
+
+spec:
+  versionName: 'nemotron3nano30b:a3b'
+  fullDescription: |
+    ## IMPORTANT NOTE ##
+    This is a shared app. Once installed by the Olares Admin, all users in the cluster can use it through reference app.
+
+    ## MODEL OVERVIEW ##
+    NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference.
+
+    Nemotron-3-Nano 30B A3B is a Mixture-of-Experts (MoE) model that activates only a fraction of its total parameters per token, delivering strong reasoning and generation quality at a lower inference cost than comparable dense models.
+
+    # Features
+    - Mixture-of-Experts architecture for efficient GPU inference
+    - 128k (131072) token context window for long-document and agentic workloads
+    - Served locally via Ollama with an OpenAI-compatible API surface
+    - Distributed as quantized GGUF weights by Unsloth
+  developer: NVIDIA / Unsloth
+  website: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF
+  sourceCode: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF
+  submitter: Zachary Nanfelt
+  locale:
+    - en-US
+    - zh-CN
+  doc: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF
+  license:
+    - text: NVIDIA Open Model License
+      url: https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/
+
+  {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }}
+  limitedCpu: 6200m
+  requiredCpu: 150m
+  requiredDisk: 50Mi
+  limitedDisk: 80Gi
+  limitedMemory: 32Gi
+  requiredMemory: 16Gi
+  requiredGpu: 16Gi
+  limitedGpu: 24Gi
+  {{- else }}
+  requiredMemory: 64Mi
+  limitedMemory: 800Mi
+  requiredDisk: 50Mi
+  limitedDisk: 200Mi
+  requiredCpu: 10m
+  limitedCpu: 800m
+  {{- end }}
+
+  supportArch:
+    - amd64
+  subCharts:
+  - name: ollamaniemotron3nano30ba3bserver
+    shared: true
+  - name: ollamaniemotron3nano30ba3b
+permission:
+  appData: true
+  appCache: true
+  userData:
+  - Home
+options:
+  apiTimeout: 0
+  appScope:
+  {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }}
+    clusterScoped: true
+    appRef:
+      - ollamaniemotron3nano30ba3b
+  {{- else }}
+    clusterScoped: false
+  {{- end }}
+  dependencies:
+    - name: olares
+      version: '>=1.12.3-0'
+      type: system
+  {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }}
+  {{- else }}
+    - name: ollamaniemotron3nano30ba3b
+      type: application
+      version: '>=1.0.1'
+      mandatory: true
+  {{- end }}
+{{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }}
+envs:
+  - envName: KEEP_ALIVE
+    required: false
+    type: bool
+    editable: true
+    applyOnChange: true
+    default: "false"
+{{- end }}
diff --git a/ollamaniemotron3nano30ba3b/README.md b/ollamaniemotron3nano30ba3b/README.md
@@ -0,0 +1,13 @@
+# Nemotron-3-Nano 30B A3B (Ollama)
+
+NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE
+architecture with a 128k (131072) token context window, designed for efficient
+GPU inference.
+
+This is a shared app. Once installed by the Olares Admin, all users in the
+cluster can use it through the reference app.
+
+- Model: `nemotron3nano30b:a3b`
+- Developer: NVIDIA / Unsloth
+- Source: https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF
+- License: NVIDIA Open Model License
diff --git a/ollamaniemotron3nano30ba3b/i18n/en-US/OlaresManifest.yaml b/ollamaniemotron3nano30ba3b/i18n/en-US/OlaresManifest.yaml
@@ -0,0 +1,19 @@
+metadata:
+  title: Nemotron-3-Nano 30B A3B (Ollama)
+  description: NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference.
+
+spec:
+  fullDescription: |
+    ## IMPORTANT NOTE ##
+    This is a shared app. Once installed by the Olares Admin, all users in the cluster can use it through reference app.
+
+    ## MODEL OVERVIEW ##
+    NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference.
+
+    Nemotron-3-Nano 30B A3B is a Mixture-of-Experts (MoE) model that activates only a fraction of its total parameters per token, delivering strong reasoning and generation quality at a lower inference cost than comparable dense models.
+
+    # Features
+    - Mixture-of-Experts architecture for efficient GPU inference
+    - 128k (131072) token context window for long-document and agentic workloads
+    - Served locally via Ollama with an OpenAI-compatible API surface
+    - Distributed as quantized GGUF weights by Unsloth
diff --git a/ollamaniemotron3nano30ba3b/i18n/zh-CN/OlaresManifest.yaml b/ollamaniemotron3nano30ba3b/i18n/zh-CN/OlaresManifest.yaml
@@ -0,0 +1,19 @@
+metadata:
+  title: Nemotron-3-Nano 30B A3B (Ollama)
+  description: NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference.
+
+spec:
+  fullDescription: |
+    ## IMPORTANT NOTE ##
+    This is a shared app. Once installed by the Olares Admin, all users in the cluster can use it through reference app.
+
+    ## MODEL OVERVIEW ##
+    NVIDIA Nemotron-3-Nano 30B A3B model served via Ollama. Optimized MoE architecture with 128k context window, designed for efficient GPU inference.
+
+    Nemotron-3-Nano 30B A3B is a Mixture-of-Experts (MoE) model that activates only a fraction of its total parameters per token, delivering strong reasoning and generation quality at a lower inference cost than comparable dense models.
+
+    # Features
+    - Mixture-of-Experts architecture for efficient GPU inference
+    - 128k (131072) token context window for long-document and agentic workloads
+    - Served locally via Ollama with an OpenAI-compatible API surface
+    - Distributed as quantized GGUF weights by Unsloth
diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/Chart.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+appVersion: '1.25.3-2'
+description: description
+name: ollamaniemotron3nano30ba3b
+type: application
+version: '1.0.0'
diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/templates/clientproxy.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/templates/clientproxy.yaml
@@ -0,0 +1,125 @@
+---
+apiVersion: v1
+data:
+  nginx.conf: |
+    server {
+
+      listen 8080;
+      access_log /opt/bitnami/openresty/nginx/logs/access.log;
+      error_log /opt/bitnami/openresty/nginx/logs/error.log;
+
+      proxy_connect_timeout                          600s;
+      proxy_send_timeout                             600s;
+      proxy_read_timeout                             1800s;
+      proxy_set_header      host                      $host;
+      proxy_set_header      x-forwarded-host          $http_host;
+
+      proxy_http_version 1.1;
+
+      proxy_set_header upgrade $http_upgrade;
+      proxy_set_header connection "upgrade";
+
+      location / {
+        add_header X-Frame-Options "";
+        proxy_pass http://api.ollamaniemotron3nano30ba3bserver-shared:8081;
+      }
+    }
+
+kind: ConfigMap
+metadata:
+  name: nginx-config
+  namespace: {{ .Release.Namespace }}
+
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  creationTimestamp: null
+  labels:
+    io.kompose.service: ollamaclient
+  name: {{ .Release.Name }}
+  namespace: '{{ .Release.Namespace }}'
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      io.kompose.service: ollamaclient
+  strategy: {}
+  template:
+    metadata:
+      creationTimestamp: null
+      labels:
+        io.kompose.network/chrome-default: "true"
+        io.kompose.service: ollamaclient
+    spec:
+      volumes:
+        - name: nginx-config
+          configMap:
+            name: nginx-config
+            defaultMode: 438
+            items:
+              - key: nginx.conf
+                path: nginx.conf
+      containers:
+        - name: nginx
+          image: "docker.io/beclab/aboveos-bitnami-openresty:1.25.3-2"
+          ports:
+            - containerPort: 8080
+              protocol: TCP
+          env:
+            - name: OPENRESTY_CONF_FILE
+              value: /etc/nginx/nginx.conf
+          startupProbe:
+            tcpSocket:
+              port: 8080
+            failureThreshold: 30
+            periodSeconds: 10
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 30
+            timeoutSeconds: 60
+            periodSeconds: 60
+            successThreshold: 1
+            failureThreshold: 10
+          # readinessProbe:
+          #   httpGet:
+          #     path: /
+          #     host: ollama.ollama-{{ .Values.admin }}
+          #     port: 7860
+          #     scheme: HTTP
+          #   initialDelaySeconds: 10
+          #   periodSeconds: 5
+          #   successThreshold: 1
+          #   failureThreshold: 10
+          resources:
+            limits:
+              cpu: 500m
+              memory: 500Mi
+            requests:
+              cpu: 10m
+              memory: 64Mi
+          volumeMounts:
+            - name: nginx-config
+              mountPath: /etc/nginx/nginx.conf
+              subPath: nginx.conf
+            - name: nginx-config
+              mountPath: /opt/bitnami/openresty/nginx/conf/server_blocks/nginx.conf
+              subPath: nginx.conf
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollamaclient
+  namespace: {{ .Release.Namespace }}
+spec:
+  type: ClusterIP
+  selector:
+    io.kompose.service: ollamaclient
+  ports:
+    - name: ollamaclient
+      protocol: TCP
+      port: 8080
+      targetPort: 8080
diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/values.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3b/values.yaml
diff --git a/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/Chart.yaml b/ollamaniemotron3nano30ba3b/ollamaniemotron3nano30ba3bserver/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+appVersion: '0.18.3'
+description: description
+name: ollamaniemotron3nano30ba3bserver
+type: application
+version: '1.0.0'