Created
October 14, 2021 13:44
-
-
Save jjerphan/72083ebc7d26fd7850adb998128d3961 to your computer and use it in GitHub Desktop.
CuML -- sklearn -- sklearnex NearestNeighbors Benchmarks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import cudf | |
import joblib | |
import numpy as np | |
import time | |
import sklearn | |
if "--sklearnex" in sys.argv: | |
import sklearnex | |
sklearnex.patch_sklearn() | |
sklearn.show_versions() | |
from cuml.neighbors import NearestNeighbors as CUNN | |
from sklearn.neighbors import NearestNeighbors as SKNN | |
from sklearn.datasets import make_blobs | |
params = dict(n_neighbors=1024, n_jobs=-1, algorithm='brute') | |
m = joblib.Memory(location='/tmp/jjerphan') | |
make_blobs = m.cache(make_blobs) | |
X, _ = make_blobs( | |
n_samples=65_000, | |
centers=5, | |
n_features=256, | |
random_state=42, | |
) | |
Y, _ = make_blobs( | |
n_samples=65_000, | |
centers=5, | |
n_features=256, | |
random_state=43, | |
) | |
if "--float32": | |
X = X.astype(np.float32) | |
Y = Y.astype(np.float32) | |
X_cudf = cudf.DataFrame(X.astype(np.float32)) | |
Y_cudf = cudf.DataFrame(Y.astype(np.float32)) | |
cu_durations = [] | |
sk_durations = [] | |
cu_model = CUNN(**params) | |
for i in range(5): | |
start = time.perf_counter() | |
cu_model.fit(X_cudf) | |
cu_model.kneighbors(Y_cudf) | |
end = time.perf_counter() | |
cu_duration = end - start | |
cu_durations.append(cu_duration) | |
print("CUDA:", cu_duration) | |
model = SKNN(**params) | |
for i in range(3): | |
start = time.perf_counter() | |
model.fit(X) | |
model.kneighbors(Y) | |
end = time.perf_counter() | |
sk_duration = end - start | |
sk_durations.append(sk_duration) | |
print("skl:", sk_duration) | |
print("Ratio:", np.min(sk_duration) / np.min(cu_duration)) |
Author
jjerphan
commented
Oct 14, 2021
•
(rapids-21.10+intelex) β
[email protected] ~ π
$ srun --partition=gpu --gpus=1 -c 64 `where python` bench_knn.py --sklearnex
srun: job 300370 queued and waiting for resources
srun: job 300370 has been allocated resources
Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
System:
python: 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:59:51) [GCC 9.4.0]
executable: /home/parietal/jjerphan/.local/miniconda3/envs/rapids-21.10+intelex/bin/python
machine: Linux-4.18.0-240.1.1.el8_3.x86_64-x86_64-with-glibc2.10
Python dependencies:
pip: 21.3
setuptools: 58.2.0
sklearn: 0.24.2
numpy: 1.21.2
scipy: 1.7.1
Cython: None
pandas: 1.3.3
matplotlib: None
joblib: 1.1.0
threadpoolctl: 2.1.0
Built with OpenMP: True
CUDA: 1.3430722079938278
CUDA: 0.7992004080442712
CUDA: 0.801696153008379
CUDA: 0.9491368799936026
CUDA: 0.8242675189394504
skl: 2.288681124104187
skl: 2.2949441720265895
skl: 2.27677305706311
Ratio: 2.7621773329034442
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment