-
-
Save AlexKordic/65f031b708177a01a002cc19f0d7298c to your computer and use it in GitHub Desktop.
import pynvml, time | |
from pynvml import * | |
TEMP_MIN_VALUE = 30.0 # fan is around 30% | |
TEMP_MAX_VALUE = 65.0 # fan is at 100% onwards | |
TEMP_RANGE = TEMP_MAX_VALUE - TEMP_MIN_VALUE | |
def fanspeed_from_t(t): | |
if t <= TEMP_MIN_VALUE: return 0.0 | |
if t >= TEMP_MAX_VALUE: return 1.0 | |
return (t - TEMP_MIN_VALUE) / TEMP_RANGE | |
try: | |
_nvmlGetFunctionPointer = pynvml._nvmlGetFunctionPointer | |
_nvmlCheckReturn = pynvml._nvmlCheckReturn | |
except AttributeError as err: | |
_nvmlGetFunctionPointer = pynvml.nvml._nvmlGetFunctionPointer | |
_nvmlCheckReturn = pynvml.nvml._nvmlCheckReturn | |
nvmlInit() | |
def alex_nvmlDeviceGetMinMaxFanSpeed(handle): | |
c_minSpeed = c_uint() | |
c_maxSpeed = c_uint() | |
fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinMaxFanSpeed") | |
ret = fn(handle, byref(c_minSpeed), byref(c_maxSpeed)) | |
_nvmlCheckReturn(ret) | |
return c_minSpeed.value, c_maxSpeed.value | |
class Device: | |
def __init__(self, index): | |
self.index = index | |
self.handle = nvmlDeviceGetHandleByIndex(index) | |
self.name = nvmlDeviceGetName(self.handle) | |
self.fan_count = nvmlDeviceGetNumFans(self.handle) | |
self.fan_min, self.fan_max = alex_nvmlDeviceGetMinMaxFanSpeed(self.handle) | |
self._fan_range = self.fan_max - self.fan_min | |
def temp(self): | |
return nvmlDeviceGetTemperature(self.handle, NVML_TEMPERATURE_GPU) | |
def fan_percentages(self): | |
return [nvmlDeviceGetFanSpeed_v2(self.handle, i) for i in range(self.fan_count)] | |
def set_fan_speed(self, percentage): | |
""" WARNING: This function changes the fan control policy to manual. It means that YOU have to monitor the temperature and adjust the fan speed accordingly. If you set the fan speed too low you can burn your GPU! Use nvmlDeviceSetDefaultFanSpeed_v2 to restore default control policy. | |
""" | |
for i in range(self.fan_count): | |
nvmlDeviceSetFanSpeed_v2(self.handle, i, percentage) | |
def query(self): | |
return f"{self.index}:{self.name} {self.temp()}@{self.fan_percentages()}" | |
def control(self): | |
t = self.temp() | |
fans = self.fan_percentages() | |
current = round(sum(fans) / len(fans)) | |
shouldbe = round(fanspeed_from_t(t) * self._fan_range + self.fan_min) | |
if(shouldbe != current): | |
print(f"{self.index}:{self.name} t={t} {current} >> {shouldbe}") | |
# change fan speed | |
self.set_fan_speed(shouldbe) | |
def __str__(self): | |
return f"{self.index}:{self.name} fans={self.fan_count} {self.fan_min}-{self.fan_max}" | |
__repr__ = __str__ | |
print(f"Driver Version: {nvmlSystemGetDriverVersion()}") | |
device_count = nvmlDeviceGetCount() | |
devices = [Device(i) for i in range(device_count)] | |
for device in devices: | |
print(device) | |
def main(): | |
try: | |
while True: | |
for device in devices: | |
# print(device.query()) | |
device.control() | |
time.sleep(1) | |
finally: | |
# reset to auto fan control | |
for device in devices: | |
for i in range(device.fan_count): | |
nvmlDeviceSetDefaultFanSpeed_v2(device.handle, i) | |
nvmlShutdown() | |
if __name__ == "__main__": | |
main() |
Hi @AlexKordic, this looks exactly what I need to quieten the noisy fan on my headless server, but the temp control doesn't seem to take effect. Is there some setup steps I'm missing?
The output of the script what I run it is:
(.venv) rbalmer@ubuntuserver:~/code/gpu_fan$ sudo /home/rbalmer/code/gpu_fan/.venv/bin/python /home/rbalmer/code/gpu_fan/gpu_fan.py
Driver Version: 535.171.04
0:NVIDIA GeForce GTX 960 fans=1 23-100
0:NVIDIA GeForce GTX 960 t=25 78 >> 23
0:NVIDIA GeForce GTX 960 t=25 39 >> 23
0:NVIDIA GeForce GTX 960 t=25 63 >> 23
0:NVIDIA GeForce GTX 960 t=25 61 >> 23
0:NVIDIA GeForce GTX 960 t=25 53 >> 23
0:NVIDIA GeForce GTX 960 t=25 80 >> 23
0:NVIDIA GeForce GTX 960 t=25 62 >> 23
0:NVIDIA GeForce GTX 960 t=25 57 >> 23
0:NVIDIA GeForce GTX 960 t=25 63 >> 23
0:NVIDIA GeForce GTX 960 t=25 57 >> 23
0:NVIDIA GeForce GTX 960 t=25 91 >> 23
0:NVIDIA GeForce GTX 960 t=25 627 >> 23
0:NVIDIA GeForce GTX 960 t=25 1392 >> 23
0:NVIDIA GeForce GTX 960 t=25 70 >> 23
0:NVIDIA GeForce GTX 960 t=25 72 >> 23
0:NVIDIA GeForce GTX 960 t=25 63 >> 23
0:NVIDIA GeForce GTX 960 t=25 52 >> 23
I think there's a problem with getting the minimum fan speed for my gpu - mine reports 23% but setting to that fails. I've hacked in a
if c_minSpeed.value < 45:
c_minSpeed.value = 45
At the end of alex_nvmlDeviceGetMinMaxFanSpeed
and it's working now.
Interestingly I've seen the fan running as low as 34 and as high as 2044, but the min and max are reported as 23-100.
@richbalmer Tested 4060 & 4090 cards. Both report 30% as minimal value for fan speed. Even then my 4090 card does not start fans until 40% is requested.
When 40% is set fan starts with single impulse then stops, making funny noise on this repeat. That is why i hardcoded c_minSpeed.value = 45
:) The fan_min
is not reliable.
Thankfully Python is easy to hack and fix.
Tested on 2080Ti, works perfectly, thank you so much!
:~$ ./fan.sh
Driver Version: 535.183.01
0:NVIDIA GeForce RTX 2080 Ti fans=1 27-100
0:NVIDIA GeForce RTX 2080 Ti t=36 27 >> 40
0:NVIDIA GeForce RTX 2080 Ti t=36 43 >> 40
0:NVIDIA GeForce RTX 2080 Ti t=36 41 >> 40
0:NVIDIA GeForce RTX 2080 Ti t=36 39 >> 40
0:NVIDIA GeForce RTX 2080 Ti t=35 40 >> 37
0:NVIDIA GeForce RTX 2080 Ti t=35 36 >> 37
0:NVIDIA GeForce RTX 2080 Ti t=34 37 >> 35
when the script isn't running, are the default values back in play? Or is there a command or script that sets the fan back to automatic, so i don't burn out my GPU?
when the script isn't running, are the default values back in play? Or is there a command or script that sets the fan back to automatic, so i don't burn out my GPU?
It is the # reset to auto fan control
part
Depends on:
pip3 install pynvml -t . --break-system-packages
Modify
fanspeed_from_t()
calculation according to your needs.