Created
September 29, 2024 02:48
-
-
Save samos123/b0850663cf7513ace888c04bc915dfb3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARNING 09-28 19:47:27 preprocess.py:86] Falling back on <BOS> for decoder start token id because decoder start token id is not available. | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] Engine background task failed | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] Traceback (most recent call last): | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 51, in _log_task_completion | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] return_value = task.result() | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 755, in run_engine_loop | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] result = task.result() | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 678, in engine_step | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] request_outputs = await self.engine.step_async(virtual_engine) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 343, in step_async | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] outputs = await self.model_executor.execute_model_async( | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/gpu_executor.py", line 185, in execute_model_async | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] output = await make_async(self.driver_worker.execute_model | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/lib/python3.12/concurrent/futures/thread.py", line 58, in run | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] result = self.fn(*self.args, **self.kwargs) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker_base.py", line 327, in execute_model | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] output = self.model_runner.execute_model( | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] return func(*args, **kwargs) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/enc_dec_model_runner.py", line 223, in execute_model | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] output: SamplerOutput = self.model.sample( | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/mllama.py", line 940, in sample | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] next_tokens = self.sampler(logits, sampling_metadata) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/sampler.py", line 231, in forward | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] self._init_sampling_tensors(logits, sampling_metadata) | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers/sampler.py", line 195, in _init_sampling_tensors | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] do_min_p) = SamplingTensors.from_sampling_metadata( | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/sampling_metadata.py", line 470, in from_sampling_metadata | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] sampling_tensors = SamplingTensors.from_lists( | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/sampling_metadata.py", line 528, in from_lists | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] temperatures_t = torch.tensor( | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] ^^^^^^^^^^^^^ | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] RuntimeError: CUDA error: an illegal memory access was encountered | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
ERROR 09-28 19:47:28 async_llm_engine.py:61] | |
Exception in callback functools.partial(<function _log_task_completion at 0x7dbfb52dae80>, error_callback=<bound method AsyncLLMEngine._error_callback of <vllm.engine.async_llm_engine.AsyncLLMEngine object at 0x7dbfb1737170>>) | |
h |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment