Created
August 12, 2022 15:13
-
-
Save voxxit/5cfea887652b763765d46d5f9fa8b8e1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
apiVersion: v1 | |
data: | |
kernel-monitor.json: | | |
{ | |
"plugin": "kmsg", | |
"logPath": "/dev/kmsg", | |
"lookback": "5m", | |
"bufferSize": 10, | |
"source": "kernel-monitor", | |
"conditions": [ | |
{ | |
"type": "KernelDeadlock", | |
"reason": "KernelHasNoDeadlock", | |
"message": "kernel has no deadlock" | |
}, | |
{ | |
"type": "ReadonlyFilesystem", | |
"reason": "FilesystemIsNotReadOnly", | |
"message": "Filesystem is not read-only" | |
} | |
], | |
"rules": [ | |
{ | |
"type": "temporary", | |
"reason": "OOMKilling", | |
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "TaskHung", | |
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\." | |
}, | |
{ | |
"type": "temporary", | |
"reason": "UnregisterNetDevice", | |
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "KernelOops", | |
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "KernelOops", | |
"pattern": "divide error: 0000 \\[#\\d+\\] SMP" | |
}, | |
{ | |
"type": "temporary", | |
"reason": "MemoryReadError", | |
"pattern": "CE memory read error .*" | |
}, | |
{ | |
"type": "permanent", | |
"condition": "KernelDeadlock", | |
"reason": "DockerHung", | |
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\." | |
}, | |
{ | |
"type": "permanent", | |
"condition": "ReadonlyFilesystem", | |
"reason": "FilesystemIsReadOnly", | |
"pattern": "Remounting filesystem read-only" | |
} | |
] | |
} | |
docker-monitor.json: | | |
{ | |
"plugin": "journald", | |
"pluginConfig": { | |
"source": "dockerd" | |
}, | |
"logPath": "/var/log/journal", | |
"lookback": "5m", | |
"bufferSize": 10, | |
"source": "docker-monitor", | |
"conditions": [], | |
"rules": [ | |
{ | |
"type": "temporary", | |
"reason": "CorruptDockerImage", | |
"pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*" | |
} | |
] | |
} | |
kind: ConfigMap | |
metadata: | |
name: node-problem-detector-config | |
namespace: kube-system | |
--- | |
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
name: node-problem-detector | |
namespace: kube-system | |
labels: | |
app: node-problem-detector | |
spec: | |
selector: | |
matchLabels: | |
app: node-problem-detector | |
template: | |
metadata: | |
labels: | |
app: node-problem-detector | |
spec: | |
affinity: | |
nodeAffinity: | |
requiredDuringSchedulingIgnoredDuringExecution: | |
nodeSelectorTerms: | |
- matchExpressions: | |
- key: kubernetes.io/os | |
operator: In | |
values: | |
- linux | |
containers: | |
- name: node-problem-detector | |
command: | |
- /node-problem-detector | |
- --logtostderr | |
- --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json | |
- --config.custom-plugin-monitor=/config/health-checker-kubelet.json | |
image: k8s.gcr.io/node-problem-detector/node-problem-detector:v0.8.6 | |
resources: | |
limits: | |
cpu: 10m | |
memory: 80Mi | |
requests: | |
cpu: 10m | |
memory: 80Mi | |
imagePullPolicy: Always | |
securityContext: | |
privileged: true | |
env: | |
- name: NODE_NAME | |
valueFrom: | |
fieldRef: | |
fieldPath: spec.nodeName | |
volumeMounts: | |
- name: log | |
mountPath: /var/log | |
readOnly: true | |
- name: kmsg | |
mountPath: /dev/kmsg | |
readOnly: true | |
# Make sure node problem detector is in the same timezone | |
# with the host. | |
- name: localtime | |
mountPath: /etc/localtime | |
readOnly: true | |
- name: config | |
mountPath: /config | |
readOnly: true | |
- mountPath: /etc/machine-id | |
name: machine-id | |
readOnly: true | |
- mountPath: /run/systemd/system | |
name: systemd | |
- mountPath: /var/run/dbus/ | |
name: dbus | |
mountPropagation: Bidirectional | |
volumes: | |
- name: log | |
# Config `log` to your system log directory | |
hostPath: | |
path: /var/log/ | |
- name: kmsg | |
hostPath: | |
path: /dev/kmsg | |
- name: localtime | |
hostPath: | |
path: /etc/localtime | |
- name: config | |
configMap: | |
name: node-problem-detector-config | |
items: | |
- key: kernel-monitor.json | |
path: kernel-monitor.json | |
- key: docker-monitor.json | |
path: docker-monitor.json | |
- name: machine-id | |
hostPath: | |
path: /etc/machine-id | |
type: "File" | |
- name: systemd | |
hostPath: | |
path: /run/systemd/system/ | |
type: "" | |
- name: dbus | |
hostPath: | |
path: /var/run/dbus/ | |
type: "" | |
--- | |
apiVersion: apps/v1 | |
kind: DaemonSet | |
metadata: | |
name: node-problem-detector | |
namespace: kube-system | |
labels: | |
app: node-problem-detector | |
spec: | |
selector: | |
matchLabels: | |
app: node-problem-detector | |
template: | |
metadata: | |
labels: | |
app: node-problem-detector | |
spec: | |
affinity: | |
nodeAffinity: | |
requiredDuringSchedulingIgnoredDuringExecution: | |
nodeSelectorTerms: | |
- matchExpressions: | |
- key: kubernetes.io/os | |
operator: In | |
values: | |
- linux | |
containers: | |
- name: node-problem-detector | |
command: | |
- /node-problem-detector | |
- --logtostderr | |
- --config.system-log-monitor=/config/kernel-monitor.json,/config/docker-monitor.json | |
image: k8s.gcr.io/node-problem-detector/node-problem-detector:v0.8.7 | |
resources: | |
limits: | |
cpu: 10m | |
memory: 80Mi | |
requests: | |
cpu: 10m | |
memory: 80Mi | |
imagePullPolicy: Always | |
securityContext: | |
privileged: true | |
env: | |
- name: NODE_NAME | |
valueFrom: | |
fieldRef: | |
fieldPath: spec.nodeName | |
volumeMounts: | |
- name: log | |
mountPath: /var/log | |
readOnly: true | |
- name: kmsg | |
mountPath: /dev/kmsg | |
readOnly: true | |
# Make sure node problem detector is in the same timezone | |
# with the host. | |
- name: localtime | |
mountPath: /etc/localtime | |
readOnly: true | |
- name: config | |
mountPath: /config | |
readOnly: true | |
volumes: | |
- name: log | |
# Config `log` to your system log directory | |
hostPath: | |
path: /var/log/ | |
- name: kmsg | |
hostPath: | |
path: /dev/kmsg | |
- name: localtime | |
hostPath: | |
path: /etc/localtime | |
- name: config | |
configMap: | |
name: node-problem-detector-config | |
items: | |
- key: kernel-monitor.json | |
path: kernel-monitor.json | |
- key: docker-monitor.json | |
path: docker-monitor.json | |
tolerations: | |
- effect: NoSchedule | |
operator: Exists | |
- effect: NoExecute | |
operator: Exists |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment