diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e33fde --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# GenAI +*.md \ No newline at end of file diff --git a/k8s-argocd/applications/prod/monitoring.yaml b/k8s-argocd/applications/prod/monitoring.yaml new file mode 100644 index 0000000..c3a121b --- /dev/null +++ b/k8s-argocd/applications/prod/monitoring.yaml @@ -0,0 +1,48 @@ +# =================================== +# Prod Monitoring Root +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +# 기본 정보 +metadata: + name: monitoring-root-prod + namespace: argocd + + # 라벨 추가 + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: monitoring + + # Finalizers를 설정하면 Application 삭제 시 관련 리소스도 함께 삭제됨 + finalizers: + - resources-finalizer.argocd.argoproj.io + +# 스펙 +spec: + project: default + + # Directory 소스 설정 + source: + # GitOps URL + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + # Main 브랜치 참조 + targetRevision: main + # Monitoring 하위 애플리케이션 디렉터리 + path: k8s-argocd/applications/prod/monitoring + directory: + recurse: false + + # 배포 대상 클러스터 + destination: + server: https://kubernetes.default.svc + namespace: argocd + + # 동기화 정책 + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/monitoring/monitoring-alloy.yaml b/k8s-argocd/applications/prod/monitoring/monitoring-alloy.yaml new file mode 100644 index 0000000..df0133f --- /dev/null +++ b/k8s-argocd/applications/prod/monitoring/monitoring-alloy.yaml @@ -0,0 +1,51 @@ +# =================================== +# Prod Monitoring Alloy +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +# 기본 정보 +metadata: + name: monitoring-alloy-prod + namespace: argocd + + # 라벨 추가 + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: monitoring + pinhouse.co.kr/monitoring-component: alloy + + # 어노테이션 메타데이터 + annotations: + argocd.argoproj.io/sync-wave: "2" + + # Finalizers를 설정하면 Application 삭제 시 관련 리소스도 함께 삭제됨 + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + # Helm 소스 설정 + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-helm/releases/monitoring-alloy + helm: + releaseName: monitoring-alloy + valueFiles: + - values-prod-gitops.yaml + + # 배포 대상 클러스터 + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + # 동기화 정책 + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/monitoring/monitoring-core.yaml b/k8s-argocd/applications/prod/monitoring/monitoring-core.yaml new file mode 100644 index 0000000..b9aa57a --- /dev/null +++ b/k8s-argocd/applications/prod/monitoring/monitoring-core.yaml @@ -0,0 +1,51 @@ +# =================================== +# Prod Monitoring Core +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +# 기본 정보 +metadata: + name: monitoring-core-prod + namespace: argocd + + # 라벨 추가 + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: monitoring + pinhouse.co.kr/monitoring-component: core + + # 어노테이션 메타데이터 + annotations: + argocd.argoproj.io/sync-wave: "0" + + # Finalizers를 설정하면 Application 삭제 시 관련 리소스도 함께 삭제됨 + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + # Helm 소스 설정 + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-helm/releases/monitoring-core + helm: + releaseName: monitoring-core + valueFiles: + - values-prod-gitops.yaml + + # 배포 대상 클러스터 + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + # 동기화 정책 + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/monitoring/monitoring-loki.yaml b/k8s-argocd/applications/prod/monitoring/monitoring-loki.yaml new file mode 100644 index 0000000..00a9fd0 --- /dev/null +++ b/k8s-argocd/applications/prod/monitoring/monitoring-loki.yaml @@ -0,0 +1,51 @@ +# =================================== +# Prod Monitoring Loki +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +# 기본 정보 +metadata: + name: monitoring-loki-prod + namespace: argocd + + # 라벨 추가 + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: monitoring + pinhouse.co.kr/monitoring-component: loki + + # 어노테이션 메타데이터 + annotations: + argocd.argoproj.io/sync-wave: "1" + + # Finalizers를 설정하면 Application 삭제 시 관련 리소스도 함께 삭제됨 + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + # Helm 소스 설정 + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-helm/releases/monitoring-loki + helm: + releaseName: monitoring-loki + valueFiles: + - values-prod-gitops.yaml + + # 배포 대상 클러스터 + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + # 동기화 정책 + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/monitoring/monitoring-tempo.yaml b/k8s-argocd/applications/prod/monitoring/monitoring-tempo.yaml new file mode 100644 index 0000000..5714a55 --- /dev/null +++ b/k8s-argocd/applications/prod/monitoring/monitoring-tempo.yaml @@ -0,0 +1,51 @@ +# =================================== +# Prod Monitoring Tempo +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +# 기본 정보 +metadata: + name: monitoring-tempo-prod + namespace: argocd + + # 라벨 추가 + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: monitoring + pinhouse.co.kr/monitoring-component: tempo + + # 어노테이션 메타데이터 + annotations: + argocd.argoproj.io/sync-wave: "1" + + # Finalizers를 설정하면 Application 삭제 시 관련 리소스도 함께 삭제됨 + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + # Helm 소스 설정 + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-helm/releases/monitoring-tempo + helm: + releaseName: monitoring-tempo + valueFiles: + - values-prod-gitops.yaml + + # 배포 대상 클러스터 + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + # 동기화 정책 + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform.yaml b/k8s-argocd/applications/prod/platform.yaml new file mode 100644 index 0000000..da349aa --- /dev/null +++ b/k8s-argocd/applications/prod/platform.yaml @@ -0,0 +1,36 @@ +# =================================== +# Prod Platform Root +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-root-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-argocd/applications/prod/platform + directory: + recurse: false + + destination: + server: https://kubernetes.default.svc + namespace: argocd + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/argocd-config.yaml b/k8s-argocd/applications/prod/platform/argocd-config.yaml new file mode 100644 index 0000000..0dc72f4 --- /dev/null +++ b/k8s-argocd/applications/prod/platform/argocd-config.yaml @@ -0,0 +1,36 @@ +# =================================== +# Prod Platform Argo CD Config +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-argocd-config-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "3" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-kustomize/platform/argocd/overlays/prod + + destination: + server: https://kubernetes.default.svc + namespace: argocd + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/cert-manager.yaml b/k8s-argocd/applications/prod/platform/cert-manager.yaml new file mode 100644 index 0000000..fae4b63 --- /dev/null +++ b/k8s-argocd/applications/prod/platform/cert-manager.yaml @@ -0,0 +1,43 @@ +# =================================== +# Prod Platform cert-manager +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-cert-manager-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "1" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + sources: + - repoURL: oci://quay.io/jetstack/charts + chart: cert-manager + targetRevision: v1.19.2 + helm: + releaseName: cert-manager + valueFiles: + - $values/k8s-helm/releases/cert-manager/values-prod.yaml + - repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: cert-manager + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/external-secret.yaml b/k8s-argocd/applications/prod/platform/external-secret.yaml new file mode 100644 index 0000000..0a1c05d --- /dev/null +++ b/k8s-argocd/applications/prod/platform/external-secret.yaml @@ -0,0 +1,43 @@ +# =================================== +# Prod Platform External Secrets Operator +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-external-secret-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "1" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + sources: + - repoURL: https://charts.external-secrets.io + chart: external-secrets + targetRevision: 0.20.4 + helm: + releaseName: external-secrets + valueFiles: + - $values/k8s-helm/releases/external-secret/values-prod.yaml + - repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: external-secrets + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/gateway-api.yaml b/k8s-argocd/applications/prod/platform/gateway-api.yaml new file mode 100644 index 0000000..1511afc --- /dev/null +++ b/k8s-argocd/applications/prod/platform/gateway-api.yaml @@ -0,0 +1,36 @@ +# =================================== +# Prod Platform Gateway API CRD +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-gateway-api-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "0" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-kustomize/platform/gateway-api/standard + + destination: + server: https://kubernetes.default.svc + namespace: argocd + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/metrics-server.yaml b/k8s-argocd/applications/prod/platform/metrics-server.yaml new file mode 100644 index 0000000..2880431 --- /dev/null +++ b/k8s-argocd/applications/prod/platform/metrics-server.yaml @@ -0,0 +1,43 @@ +# =================================== +# Prod Platform Metrics Server +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-metrics-server-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "1" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + sources: + - repoURL: https://kubernetes-sigs.github.io/metrics-server/ + chart: metrics-server + targetRevision: 3.13.0 + helm: + releaseName: metrics-server + valueFiles: + - $values/k8s-helm/releases/metrics-server/values-prod.yaml + - repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: kube-system + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/nginx-gateway-fabric.yaml b/k8s-argocd/applications/prod/platform/nginx-gateway-fabric.yaml new file mode 100644 index 0000000..4a622fc --- /dev/null +++ b/k8s-argocd/applications/prod/platform/nginx-gateway-fabric.yaml @@ -0,0 +1,43 @@ +# =================================== +# Prod Platform NGINX Gateway Fabric +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-nginx-gateway-fabric-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "1" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + sources: + - repoURL: oci://ghcr.io/nginx/charts + chart: nginx-gateway-fabric + targetRevision: 2.4.2 + helm: + releaseName: ngf + valueFiles: + - $values/k8s-helm/releases/nginx-gateway-fabric/values-prod.yaml + - repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: nginx-gateway + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/platform-resources.yaml b/k8s-argocd/applications/prod/platform/platform-resources.yaml new file mode 100644 index 0000000..260d6ed --- /dev/null +++ b/k8s-argocd/applications/prod/platform/platform-resources.yaml @@ -0,0 +1,40 @@ +# =================================== +# Prod Platform Resources +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-resources-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "2" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-helm/platform-chart + helm: + releaseName: pinhouse-platform + valueFiles: + - values-prod.yaml + + destination: + server: https://kubernetes.default.svc + namespace: platform-system + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-argocd/applications/prod/platform/storageclass.yaml b/k8s-argocd/applications/prod/platform/storageclass.yaml new file mode 100644 index 0000000..993fdcc --- /dev/null +++ b/k8s-argocd/applications/prod/platform/storageclass.yaml @@ -0,0 +1,36 @@ +# =================================== +# Prod Platform StorageClass +# =================================== + +apiVersion: argoproj.io/v1alpha1 +kind: Application + +metadata: + name: platform-storageclass-prod + namespace: argocd + labels: + pinhouse.co.kr/environment: prod + pinhouse.co.kr/component: platform + annotations: + argocd.argoproj.io/sync-wave: "0" + finalizers: + - resources-finalizer.argocd.argoproj.io + +spec: + project: default + + source: + repoURL: https://github.com/PinHouse/PinHouse_CLOUD + targetRevision: main + path: k8s-kustomize/platform/storageclass + + destination: + server: https://kubernetes.default.svc + namespace: argocd + + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/k8s-helm/.gitignore b/k8s-helm/.gitignore index ff29a3c..1a97b70 100644 --- a/k8s-helm/.gitignore +++ b/k8s-helm/.gitignore @@ -18,6 +18,13 @@ releases/**/secrets/*.yaml releases/argocd/values-*.yaml !releases/argocd/*.yaml.example +# ======================================== +# Monitoring 로컬 오버라이드 +# ======================================== +releases/monitoring-*/values-*.yaml +releases/monitoring-*/README.md +!releases/monitoring-*/values-prod-gitops.yaml + # ======================================== # 로컬 오버라이드 파일 # ======================================== @@ -32,4 +39,4 @@ releases/argocd/values-*.yaml releases/*/README.md # Helm dependency artifacts -releases/monitoring/charts/ \ No newline at end of file +releases/monitoring-*/charts/ diff --git a/k8s-helm/platform-chart/values-prod.yaml.example b/k8s-helm/platform-chart/values-prod.yaml.example index 8edcac8..c378204 100644 --- a/k8s-helm/platform-chart/values-prod.yaml.example +++ b/k8s-helm/platform-chart/values-prod.yaml.example @@ -168,3 +168,26 @@ externalSecrets: - regexp: source: "^Prod_BE_(.*)$" target: "$1" + + # 모니터링 Grafana 관리자 시크릿 적용 + - name: monitoring-secret-kv + namespace: monitoring + spec: + refreshInterval: 1h + secretStoreRef: + name: gcp-secret-manager + kind: ClusterSecretStore + target: + name: monitoring-secret-kv + creationPolicy: Owner + deletionPolicy: Retain + data: + - find: + name: + regexp: "^Prod_MONITORING_" + conversionStrategy: Default + decodingStrategy: None + rewrite: + - regexp: + source: "^Prod_MONITORING_(.*)$" + target: "$1" \ No newline at end of file diff --git a/k8s-helm/releases/monitoring-alloy/Chart.lock b/k8s-helm/releases/monitoring-alloy/Chart.lock new file mode 100644 index 0000000..f0b4de4 --- /dev/null +++ b/k8s-helm/releases/monitoring-alloy/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: alloy + repository: https://grafana.github.io/helm-charts + version: 1.7.0 +digest: sha256:2a2c2530b9dd4d61b7735106b45a984392179c7abc457180b06f5341204c97a9 +generated: "2026-04-08T15:28:07.056311+09:00" diff --git a/k8s-helm/releases/monitoring-alloy/Chart.yaml b/k8s-helm/releases/monitoring-alloy/Chart.yaml new file mode 100644 index 0000000..8e72dda --- /dev/null +++ b/k8s-helm/releases/monitoring-alloy/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: pinhouse-monitoring-alloy + +# 설명 +description: PinHouse Monitoring Alloy Helm Chart + +# 타입 +type: application + +# 헬름차트 버전 +version: 0.1.0 + +# 애플리케이션 버전 +appVersion: "1.7.0" + +# 의존 차트 +dependencies: + - name: alloy + version: 1.7.0 + repository: https://grafana.github.io/helm-charts diff --git a/k8s-helm/releases/monitoring-alloy/values-prod-gitops.yaml b/k8s-helm/releases/monitoring-alloy/values-prod-gitops.yaml new file mode 100644 index 0000000..5a4093d --- /dev/null +++ b/k8s-helm/releases/monitoring-alloy/values-prod-gitops.yaml @@ -0,0 +1,22 @@ +# ======================================== +# Prod Monitoring Alloy GitOps 값 +# ======================================== + +# 애플리케이션 OTLP 엔드포인트 예시 +# gRPC: monitoring-alloy.monitoring.svc.cluster.local:4317 +# HTTP: http://monitoring-alloy.monitoring.svc.cluster.local:4318/v1/traces + +global: + # Alloy가 Loki로 보내는 로그 라벨과 공통 식별값에 사용합니다. + clusterName: "pinhouse-prod" + environment: "prod" + +alloy: + # 로그/트레이스/메트릭까지 함께 수집하므로 기본값보다 조금 더 여유를 둡니다. + resources: + requests: + cpu: 300m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi diff --git a/k8s-helm/releases/monitoring-alloy/values.yaml b/k8s-helm/releases/monitoring-alloy/values.yaml new file mode 100644 index 0000000..1681c21 --- /dev/null +++ b/k8s-helm/releases/monitoring-alloy/values.yaml @@ -0,0 +1,183 @@ +# ======================================== +# Monitoring Alloy 공통 값 +# ======================================== + +global: + # Alloy 공통 라벨에 사용할 클러스터 식별자입니다. + clusterName: "pinhouse-cluster" + + # 운영 구분용 환경명입니다. + environment: "shared" + +alloy: + fullnameOverride: monitoring-alloy + + alloy: + # DaemonSet 여러 인스턴스 간에 scrape 대상을 나눠 가져가도록 클러스터링을 활성화합니다. + clustering: + enabled: true + + enableReporting: false + extraPorts: + - name: otlp-grpc + port: 4317 + targetPort: 4317 + protocol: TCP + appProtocol: grpc + - name: otlp-http + port: 4318 + targetPort: 4318 + protocol: TCP + appProtocol: http + + configMap: + content: |- + otelcol.receiver.otlp "traces" { + grpc {} + http {} + + output { + traces = [otelcol.processor.batch.traces.input] + } + } + + otelcol.processor.batch "traces" { + output { + traces = [otelcol.exporter.otlp.tempo.input] + } + } + + otelcol.exporter.otlp "tempo" { + client { + endpoint = "monitoring-tempo-distributor.monitoring.svc.cluster.local:4317" + + tls { + insecure = true + } + } + } + + prometheus.remote_write "prometheus" { + external_labels = { + cluster = "{{ .Values.global.clusterName }}" + environment = "{{ .Values.global.environment }}" + } + + endpoint { + url = "http://monitoring-core-kube-prome-prometheus.monitoring.svc.cluster.local:9090/api/v1/write" + } + } + + prometheus.operator.servicemonitors "cluster" { + forward_to = [prometheus.remote_write.prometheus.receiver] + + clustering { + enabled = true + } + + scrape { + default_scrape_interval = "30s" + default_scrape_timeout = "10s" + } + } + + prometheus.operator.podmonitors "cluster" { + forward_to = [prometheus.remote_write.prometheus.receiver] + + clustering { + enabled = true + } + + scrape { + default_scrape_interval = "30s" + default_scrape_timeout = "10s" + } + } + + prometheus.operator.probes "cluster" { + forward_to = [prometheus.remote_write.prometheus.receiver] + + clustering { + enabled = true + } + + scrape { + default_scrape_interval = "30s" + default_scrape_timeout = "10s" + } + } + + discovery.kubernetes "pods_on_same_node" { + role = "pod" + + selectors { + role = "pod" + field = "spec.nodeName=" + coalesce(sys.env("HOSTNAME"), constants.hostname) + } + } + + loki.relabel "pod_logs" { + forward_to = [loki.process.pod_logs.receiver] + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "node" + } + + rule { + source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"] + separator = "/" + target_label = "job" + } + } + + loki.process "pod_logs" { + forward_to = [loki.write.default.receiver] + + stage.cri {} + } + + loki.source.kubernetes "pods" { + targets = discovery.kubernetes.pods_on_same_node.targets + forward_to = [loki.relabel.pod_logs.receiver] + } + + loki.write "default" { + external_labels = { + cluster = "{{ .Values.global.clusterName }}" + environment = "{{ .Values.global.environment }}" + } + + endpoint { + url = "http://monitoring-loki-gateway.monitoring.svc.cluster.local/loki/api/v1/push" + } + } + + controller: + type: daemonset + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + serviceMonitor: + enabled: true diff --git a/k8s-helm/releases/monitoring-core/Chart.lock b/k8s-helm/releases/monitoring-core/Chart.lock new file mode 100644 index 0000000..0affb60 --- /dev/null +++ b/k8s-helm/releases/monitoring-core/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: kube-prometheus-stack + repository: https://prometheus-community.github.io/helm-charts + version: 82.13.1 +digest: sha256:6f78a7accfaddfb45673b0a7e108b0aae33e9e9d07b032c2240df300247ca061 +generated: "2026-04-08T15:26:52.010883+09:00" diff --git a/k8s-helm/releases/monitoring-core/Chart.yaml b/k8s-helm/releases/monitoring-core/Chart.yaml new file mode 100644 index 0000000..826fc34 --- /dev/null +++ b/k8s-helm/releases/monitoring-core/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: pinhouse-monitoring-core + +# 설명 +description: PinHouse Monitoring Core Helm Chart (kube-prometheus-stack) + +# 타입 +type: application + +# 헬름차트 버전 +version: 0.1.0 + +# 애플리케이션 버전 +appVersion: "82.13.1" + +# 의존 차트 +dependencies: + - name: kube-prometheus-stack + version: 82.13.1 + repository: https://prometheus-community.github.io/helm-charts diff --git a/k8s-helm/releases/monitoring-core/values-prod-gitops.yaml b/k8s-helm/releases/monitoring-core/values-prod-gitops.yaml new file mode 100644 index 0000000..f5992a8 --- /dev/null +++ b/k8s-helm/releases/monitoring-core/values-prod-gitops.yaml @@ -0,0 +1,116 @@ +# ======================================== +# Prod Monitoring Core GitOps 값 +# ======================================== + +kube-prometheus-stack: + grafana: + # Grafana 관리자 계정은 ExternalSecret이 만든 Kubernetes Secret을 사용합니다. + admin: + existingSecret: monitoring-secret-kv + userKey: GRAFANA_ADMIN_USER + passwordKey: GRAFANA_ADMIN_PASSWORD + + # Grafana Pod 리소스는 대시보드 조회와 플러그인 초기화 여유를 고려한 값입니다. + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + # 운영 환경에서는 Grafana 대시보드와 설정을 유지하기 위해 PVC를 사용합니다. + persistence: + enabled: true + type: sts + size: 20Gi + storageClassName: "gce-standard-rwo" + + prometheus: + prometheusSpec: + # 운영 기준 메트릭 보관 기간과 최대 디스크 사용량입니다. + retention: 15d + retentionSize: 50GB + # 실제 scrape는 Alloy가 수행하고, Prometheus는 remote write receiver로 메트릭을 받습니다. + enableRemoteWriteReceiver: true + + # Prometheus는 스크랩 대상이 늘어날수록 메모리 사용량이 커지므로 + # 운영 환경에서는 요청/제한을 명시적으로 지정합니다. + resources: + requests: + cpu: 500m + memory: 2Gi + limits: + cpu: 2000m + memory: 4Gi + + # Prometheus가 직접 ServiceMonitor/PodMonitor/Probe를 scrape하지 않도록 둡니다. + serviceMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelector: + matchLabels: + "pinhouse.co.kr/scrape-via": "prometheus" + serviceMonitorNamespaceSelector: {} + + podMonitorSelectorNilUsesHelmValues: false + podMonitorSelector: + matchLabels: + "pinhouse.co.kr/scrape-via": "prometheus" + podMonitorNamespaceSelector: {} + + probeSelectorNilUsesHelmValues: false + probeSelector: + matchLabels: + "pinhouse.co.kr/scrape-via": "prometheus" + probeNamespaceSelector: {} + + # 메트릭 장기 보관을 위해 PVC를 사용합니다. + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: "gce-standard-rwo" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + + alertmanager: + # Alertmanager는 알림 라우팅 전용이라 상대적으로 작은 리소스로 시작합니다. + alertmanagerSpec: + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + + prometheusOperator: + # Operator는 scrape 주체는 아니지만 Prometheus/Alertmanager 리소스를 생성하므로 계속 필요합니다. + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + + kube-state-metrics: + # Kubernetes 리소스 상태 수집용 컴포넌트입니다. + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 200m + memory: 512Mi + + prometheus-node-exporter: + # 노드 메트릭 수집기라서 가볍게 시작하되 과도한 사용을 막기 위해 제한을 둡니다. + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi diff --git a/k8s-helm/releases/monitoring-core/values.yaml b/k8s-helm/releases/monitoring-core/values.yaml new file mode 100644 index 0000000..6a17acc --- /dev/null +++ b/k8s-helm/releases/monitoring-core/values.yaml @@ -0,0 +1,84 @@ +# ======================================== +# Monitoring Core 공통 값 +# ======================================== + +kube-prometheus-stack: + crds: + enabled: true + + # kubeadm 기반 self-managed 클러스터에서는 컨트롤 플레인 정적 Pod 메트릭 엔드포인트가 + # 외부에서 바로 접근되지 않는 경우가 많아서 기본값은 비활성화합니다. + kubeEtcd: + enabled: false + + kubeControllerManager: + enabled: false + + kubeScheduler: + enabled: false + + kubeProxy: + enabled: false + + alertmanager: + enabled: true + + grafana: + enabled: true + + # 운영 전에는 ExternalSecret 기반 existingSecret으로 전환하는 것을 권장합니다. + adminPassword: "change-me-before-apply" + + service: + type: ClusterIP + + persistence: + enabled: false + type: sts + size: 10Gi + + additionalDataSources: + - name: Loki + uid: loki + type: loki + access: proxy + url: http://monitoring-loki-gateway.monitoring.svc.cluster.local + isDefault: false + jsonData: + maxLines: 1000 + - name: Tempo + uid: tempo + type: tempo + access: proxy + url: http://monitoring-tempo-query-frontend.monitoring.svc.cluster.local:3100 + isDefault: false + + prometheus: + prometheusSpec: + retention: 15d + retentionSize: 40GB + walCompression: true + # Prometheus는 Alloy가 전달한 메트릭을 저장/조회하는 역할로 사용합니다. + enableRemoteWriteReceiver: true + + # 직접 scrape는 사용하지 않고 Alloy가 ServiceMonitor/PodMonitor/Probe를 scrape합니다. + # 아래 selector는 의도적으로 아무것도 매칭하지 않도록 둡니다. + serviceMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelector: + matchLabels: + "pinhouse.co.kr/scrape-via": "prometheus" + serviceMonitorNamespaceSelector: {} + + podMonitorSelectorNilUsesHelmValues: false + podMonitorSelector: + matchLabels: + "pinhouse.co.kr/scrape-via": "prometheus" + podMonitorNamespaceSelector: {} + + probeSelectorNilUsesHelmValues: false + probeSelector: + matchLabels: + "pinhouse.co.kr/scrape-via": "prometheus" + probeNamespaceSelector: {} + + storageSpec: {} diff --git a/k8s-helm/releases/monitoring-loki/Chart.lock b/k8s-helm/releases/monitoring-loki/Chart.lock new file mode 100644 index 0000000..ea9cc98 --- /dev/null +++ b/k8s-helm/releases/monitoring-loki/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: loki + repository: https://grafana.github.io/helm-charts + version: 6.55.0 +digest: sha256:2e09dd20884300bed8826aa0742382dceefa4423c14b916a2f59cb694897d7e5 +generated: "2026-04-08T15:28:22.231919+09:00" diff --git a/k8s-helm/releases/monitoring-loki/Chart.yaml b/k8s-helm/releases/monitoring-loki/Chart.yaml new file mode 100644 index 0000000..749d3ba --- /dev/null +++ b/k8s-helm/releases/monitoring-loki/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: pinhouse-monitoring-loki + +# 설명 +description: PinHouse Monitoring Loki Helm Chart + +# 타입 +type: application + +# 헬름차트 버전 +version: 0.1.0 + +# 애플리케이션 버전 +appVersion: "6.55.0" + +# 의존 차트 +dependencies: + - name: loki + version: 6.55.0 + repository: https://grafana.github.io/helm-charts diff --git a/k8s-helm/releases/monitoring-loki/values-prod-gitops.yaml b/k8s-helm/releases/monitoring-loki/values-prod-gitops.yaml new file mode 100644 index 0000000..e730b65 --- /dev/null +++ b/k8s-helm/releases/monitoring-loki/values-prod-gitops.yaml @@ -0,0 +1,43 @@ +# ======================================== +# Prod Monitoring Loki GitOps 값 +# ======================================== + +loki: + # Loki 공용 ServiceAccount 이름을 고정해 두면 추후 인증 방식 전환 시 추적이 쉽습니다. + serviceAccount: + create: true + name: monitoring-loki-sa + annotations: {} + + loki: + # 로그 보관 기간은 14일로 시작합니다. + limits_config: + retention_period: 336h + + # Loki 로그 청크와 인덱스는 GCS 버킷에 저장합니다. + storage: + bucketNames: + chunks: "pinhouse-prod-loki" + ruler: "pinhouse-prod-loki" + admin: "pinhouse-prod-loki" + gcs: + bucket_name: "pinhouse-prod-loki" + + singleBinary: + # 초기 운영 단계는 복잡도를 낮추기 위해 SingleBinary 모드로 유지합니다. + replicas: 1 + + # 로컬 디스크는 캐시와 임시 데이터를 위해 사용합니다. + persistence: + enabled: true + size: 30Gi + storageClass: "gce-standard-rwo" + + # 로그 유입량이 갑자기 늘어도 버틸 수 있도록 여유 있게 잡은 값입니다. + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 2Gi diff --git a/k8s-helm/releases/monitoring-loki/values.yaml b/k8s-helm/releases/monitoring-loki/values.yaml new file mode 100644 index 0000000..6ff5890 --- /dev/null +++ b/k8s-helm/releases/monitoring-loki/values.yaml @@ -0,0 +1,72 @@ +# ======================================== +# Monitoring Loki 공통 값 +# ======================================== + +loki: + fullnameOverride: monitoring-loki + + # 초기 구성은 운영 복잡도를 낮추기 위해 SingleBinary로 둡니다. + deploymentMode: SingleBinary + + loki: + auth_enabled: false + + commonConfig: + replication_factor: 1 + + schemaConfig: + configs: + - from: "2024-04-01" + store: tsdb + object_store: gcs + schema: v13 + index: + prefix: index_ + period: 24h + + limits_config: + retention_period: 168h + + compactor: + retention_enabled: true + delete_request_store: gcs + + storage: + type: gcs + bucketNames: + chunks: "replace-me-loki-bucket" + ruler: "replace-me-loki-bucket" + admin: "replace-me-loki-bucket" + gcs: + bucket_name: "replace-me-loki-bucket" + + singleBinary: + replicas: 1 + + persistence: + enabled: false + size: 20Gi + + # SingleBinary 외 다른 모드는 비활성화합니다. + write: + replicas: 0 + + read: + replicas: 0 + + backend: + replicas: 0 + + monitoring: + dashboards: + enabled: false + rules: + enabled: false + serviceMonitor: + enabled: true + + test: + enabled: false + + lokiCanary: + enabled: false diff --git a/k8s-helm/releases/monitoring-tempo/Chart.lock b/k8s-helm/releases/monitoring-tempo/Chart.lock new file mode 100644 index 0000000..5c40021 --- /dev/null +++ b/k8s-helm/releases/monitoring-tempo/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: tempo-distributed + repository: https://grafana.github.io/helm-charts + version: 1.61.2 +digest: sha256:780da9648131a94895a3283275cc77cb19c6d6a14cfdfd7611d365debc9e200a +generated: "2026-04-08T15:28:17.71737+09:00" diff --git a/k8s-helm/releases/monitoring-tempo/Chart.yaml b/k8s-helm/releases/monitoring-tempo/Chart.yaml new file mode 100644 index 0000000..b596ec2 --- /dev/null +++ b/k8s-helm/releases/monitoring-tempo/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: pinhouse-monitoring-tempo + +# 설명 +description: PinHouse Monitoring Tempo Helm Chart + +# 타입 +type: application + +# 헬름차트 버전 +version: 0.1.0 + +# 애플리케이션 버전 +appVersion: "1.61.2" + +# 의존 차트 +dependencies: + - name: tempo-distributed + alias: tempo + version: 1.61.2 + repository: https://grafana.github.io/helm-charts diff --git a/k8s-helm/releases/monitoring-tempo/values-prod-gitops.yaml b/k8s-helm/releases/monitoring-tempo/values-prod-gitops.yaml new file mode 100644 index 0000000..242b785 --- /dev/null +++ b/k8s-helm/releases/monitoring-tempo/values-prod-gitops.yaml @@ -0,0 +1,79 @@ +# ======================================== +# Prod Monitoring Tempo GitOps 값 +# ======================================== + +tempo: + # Tempo 공용 ServiceAccount 이름을 고정해 두면 인증 설정을 추적하기 쉽습니다. + serviceAccount: + create: true + name: monitoring-tempo-sa + annotations: {} + + storage: + # 트레이스 원본 데이터는 GCS 버킷에 저장합니다. + trace: + gcs: + bucket_name: "pinhouse-prod-tempo" + # 블록 보관 기간 설정 (14일) + block: + retention: 336h # 14일 = 336시간 (Loki와 동일) + + ingester: + # 초기 운영 단계는 단일 replica로 시작하되 PVC와 리소스를 명시합니다. + replicas: 1 + persistence: + enabled: true + size: 20Gi + storageClass: "gce-standard-rwo" + # Ingester는 트레이스 쓰기 경로의 핵심이라 상대적으로 큰 메모리를 할당합니다. + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 2Gi + + distributor: + # 애플리케이션에서 들어오는 트레이스를 받아 분산시키는 역할입니다. + replicas: 1 + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + compactor: + # 저장된 블록을 정리하는 작업이 있으므로 메모리를 조금 더 줍니다. + replicas: 1 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 1Gi + + querier: + # Grafana 조회 시 실제 트레이스 검색을 담당합니다. + replicas: 1 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 1Gi + + queryFrontend: + # 조회 요청을 정리하고 캐시하기 위한 프론트 레이어입니다. + replicas: 1 + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi diff --git a/k8s-helm/releases/monitoring-tempo/values.yaml b/k8s-helm/releases/monitoring-tempo/values.yaml new file mode 100644 index 0000000..c7d579f --- /dev/null +++ b/k8s-helm/releases/monitoring-tempo/values.yaml @@ -0,0 +1,53 @@ +# ======================================== +# Monitoring Tempo 공통 값 +# ======================================== + +tempo: + fullnameOverride: monitoring-tempo + + reportingEnabled: false + + traces: + otlp: + http: + enabled: true + grpc: + enabled: true + + storage: + trace: + backend: gcs + gcs: + bucket_name: "replace-me-tempo-bucket" + + ingester: + replicas: 1 + + config: + replication_factor: 1 + + persistence: + enabled: true + size: 10Gi + + distributor: + replicas: 1 + + compactor: + replicas: 1 + + querier: + replicas: 1 + + queryFrontend: + replicas: 1 + + metricsGenerator: + enabled: false + + gateway: + enabled: false + + metaMonitoring: + serviceMonitor: + enabled: true diff --git a/k8s-kustomize/platform/storageclass/gce-standard-rwo.yaml b/k8s-kustomize/platform/storageclass/gce-standard-rwo.yaml new file mode 100644 index 0000000..c7a82bc --- /dev/null +++ b/k8s-kustomize/platform/storageclass/gce-standard-rwo.yaml @@ -0,0 +1,27 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass + +metadata: + name: gce-standard-rwo + annotations: + # 기본 StorageClass로 자동 선택되지 않도록 false로 둡니다. + storageclass.kubernetes.io/is-default-class: "false" + +provisioner: kubernetes.io/gce-pd + +parameters: + # 표준 영구 디스크를 사용합니다. + type: pd-standard + # 파티션 포맷은 ext4를 사용합니다. + fstype: ext4 + # 영역 PD가 프로비전 + replication-type: none + +# Pod 하나가 하나의 디스크를 마운트하는 기본 운영 모델입니다. +volumeBindingMode: WaitForFirstConsumer + +# PVC 크기 증설을 허용합니다. +allowVolumeExpansion: true + +# 기본 reclaim 정책은 Delete로 둡니다. +reclaimPolicy: Delete diff --git a/k8s-kustomize/platform/storageclass/kustomization.yaml b/k8s-kustomize/platform/storageclass/kustomization.yaml new file mode 100644 index 0000000..06841ca --- /dev/null +++ b/k8s-kustomize/platform/storageclass/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - gce-standard-rwo.yaml diff --git a/terraform/environments/prod/compute.tf b/terraform/environments/prod/compute.tf index d287a02..534316d 100644 --- a/terraform/environments/prod/compute.tf +++ b/terraform/environments/prod/compute.tf @@ -17,7 +17,6 @@ module "k8s_master_nodes" { # 마스터 노드는 단일 인스턴스로 고정 운영합니다. enable_autoscaling = false - update_policy_type = "OPPORTUNISTIC" # 공통 인스턴스 설정 machine_type = var.k8s_master_machine_type @@ -68,7 +67,6 @@ module "k8s_worker_nodes" { autoscaling_min_replicas = var.autoscaling_min_replicas autoscaling_max_replicas = var.autoscaling_max_replicas autoscaling_cpu_target = 0.7 - update_policy_type = "OPPORTUNISTIC" # 공통 인스턴스 설정 machine_type = var.k8s_worker_machine_type diff --git a/terraform/environments/prod/storage.tf b/terraform/environments/prod/storage.tf index 45e4853..787b895 100644 --- a/terraform/environments/prod/storage.tf +++ b/terraform/environments/prod/storage.tf @@ -14,6 +14,8 @@ module "storage" { # 버킷 정의 buckets = merge( + + # 기본 버킷 추가 var.create_storage_buckets ? tomap({ static_assets = { name = "${var.project}-${var.environment}" @@ -46,6 +48,28 @@ module "storage" { } ] : [] } - }) : tomap({}) + }) : tomap({}), + + # 모니터링 버킷 추가 + var.create_monitoring_buckets ? tomap({ + loki = { + name = "${var.project}-${var.environment}-${var.monitoring_loki}" + storage_class = "STANDARD" + uniform_bucket_level_access = true + versioning_enabled = true + force_destroy = false + public_access_prevention = "enforced" + cors = [] + } + tempo = { + name = "${var.project}-${var.environment}-${var.monitoring_tempo}" + storage_class = "STANDARD" + uniform_bucket_level_access = true + versioning_enabled = true + force_destroy = false + public_access_prevention = "enforced" + cors = [] + } + }) : tomap({}), ) } diff --git a/terraform/environments/prod/terraform.tfvars.example b/terraform/environments/prod/terraform.tfvars.example index ae64caa..4e278b1 100644 --- a/terraform/environments/prod/terraform.tfvars.example +++ b/terraform/environments/prod/terraform.tfvars.example @@ -68,6 +68,7 @@ calico_version = "v3.31.4" # 스토리지 관련 값 # ======================================== create_storage_buckets = true +create_monitoring_buckets = true storage_location = "ASIA-NORTHEAST3" # CORS 허용 Origin @@ -98,4 +99,4 @@ secret_manager_secret_iam_members = { role = "roles/secretmanager.secretAccessor" member = "serviceAccount:example-secrets@example-pinhouse.iam.gserviceaccount.com" } -} +} \ No newline at end of file diff --git a/terraform/environments/prod/variables.tf b/terraform/environments/prod/variables.tf index ef89f3e..9e4374f 100644 --- a/terraform/environments/prod/variables.tf +++ b/terraform/environments/prod/variables.tf @@ -261,6 +261,24 @@ variable "create_storage_buckets" { default = true } +variable "create_monitoring_buckets" { + description = "모니터링 버킷 생성 여부입니다." + type = bool + default = false +} + +variable "monitoring_loki" { + description = "Loki에서 사용할 모니터링 라벨입니다." + type = string + default = "loki" +} + +variable "monitoring_tempo" { + description = "Tempo에서 사용할 모니터링 라벨입니다." + type = string + default = "tempo" +} + variable "storage_location" { description = "스토리지 버킷을 생성할 위치입니다." type = string