1、 获取node-exporter
已经打好在”用到的离线包/node-exporter.tar”
在有网机器下载镜像
docker pull prom/node-exporter:v0.16.0
docker save -o node-exporter.tar prom/node-exporter:v0.16.0
将tar包拷贝至有网的机器
2、 将node-exporter放入离线镜像服务器
docker load -i node-exporter.tar
docker tag prom/ node-exporter:v0.16.0 192.168.100.94:80/node-exporter:v0.16.0
docker push 192.168.100.94:80/node-exporter:v0.16.0
3、 使用DemonSet部署node-exporter
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
namespace: kube-ops
labels:
name: node-exporter
spec:
template:
metadata:
labels:
name: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true
containers:
- name: node-exporter
image: 192.168.100.94:80/node-exporter:v0.16.0
ports:
- containerPort: 9100
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
kubectl apply -f prome-node-exporter.yaml
[root@master node-exporter]# kubectl get pods -n kube-ops -owide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
node-exporter-kg427 1/1 Running 0 17s 192.168.100.94 master <none> <none>
node-exporter-xk6vm 1/1 Running 0 17s 192.168.100.95 node2 <none> <none>
node-exporter-z6bpr 1/1 Running 0 17s 192.168.100.96 node3 <none> <none>
prometheus-5745dbdc87-rjnxj 1/1 Running 1 5d4h 10.244.2.30 node3 <none> <none>
redis-55d48df69d-xzsvt 2/2 Running 2 5d3h 10.244.1.28 node2 <none> <none>
4、 部署服务发现
修改prometheus-cm.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: kube-ops
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'traefik'
static_configs:
- targets: ['traefik-service.kube-system.svc.cluster.local:8080']
- job_name: 'redis'
static_configs:
- targets: ['redis:9121']
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
# - job_name: 'kubernetes-kubelet'
# kubernetes_sd_configs:
# - role: node
# scheme: https
#tls_config:
# ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
#insecure_skip_verify: true
#bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
#relabel_configs:
#- action: labelmap
#regex: __meta_kubernetes_node_label_(.+)
#- target_label: __address__
#replacement: kubernetes.default.svc:443
#- source_labels: [__meta_kubernetes_node_name]
#regex: (.+)
#target_label: __metrics_path__
#replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
kubectl delete -f prometheus-cm.yaml
kubectl apply –f Prometheus-cm.yaml
[root@master prometheus]# kubectl get services -n kube-ops
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
prometheus NodePort 10.105.89.6 <none> 9090:30229/TCP 5d4h
redis ClusterIP 10.100.232.96 <none> 6379/TCP,9121/TCP 5d4h
[root@master prometheus]# curl -X POST http://10.105.89.6:9090/-/reload
访问http://192.168.100.94:30229/targets
使用检索条件
sum by (pod_name)(rate(container_cpu_usage_seconds_total{image!="", pod_name!=""}[1m] ))