Skip to content

Instantly share code, notes, and snippets.

@crypdick
Created June 17, 2025 23:29
Show Gist options
  • Save crypdick/1c154b82a8c53fc5f5f2549456a30a7a to your computer and use it in GitHub Desktop.
Save crypdick/1c154b82a8c53fc5f5f2549456a30a7a to your computer and use it in GitHub Desktop.
stack trace for flaky ray data llm workload
2025-06-17 16:26:19,406 DEBUG streaming_executor.py:546 -- 9: - MapBatches(vLLMEngineStageUDF): Tasks: 24; Actors: 3; Queued blocks: 13; Resources: 0.0 CPU, 3.0 GPU, 768.0MB object store; [8/24 objects local], Blocks Outputted: 0/None
2025-06-17 16:26:19,406 DEBUG streaming_executor.py:546 -- 10: - MapBatches(DetokenizeUDF): Tasks: 0; Actors: 1; Queued blocks: 0; Resources: 1.0 CPU, 0.0B object store; [all objects local], Blocks Outputted: 0/None
2025-06-17 16:26:19,406 DEBUG streaming_executor.py:546 -- 11: - Map(_postprocess)->Filter(NoneType)->Write: Tasks: 0; Actors: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store, Blocks Outputted: 0/None
2025-06-17 16:26:27,977 ERROR streaming_executor_state.py:519 -- An exception was raised from a task of operator "MapBatches(vLLMEngineStageUDF)". Dataset execution will now abort. To ignore this exception and continue, set DataContext.max_errored_blocks.
Traceback (most recent call last):
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/streaming_executor_state.py", line 487, in process_completed_tasks
bytes_read = task.on_data_ready(
^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/interfaces/physical_operator.py", line 156, in on_data_ready
raise ex from None
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/interfaces/physical_operator.py", line 152, in on_data_ready
ray.get(block_ref)
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/worker.py", line 2851, in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/worker.py", line 951, in get_objects
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(EngineDeadError): ray::MapBatches(vLLMEngineStageUDF).submit() (pid=3006, ip=10.0.190.249, actor_id=b69436f602ee168b444a002f02000000, repr=MapWorker(MapBatches(vLLMEngineStageUDF)))
yield from _map_task(
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_operator.py", line 557, in _map_task
for b_out in map_transformer.apply_transform(iter(blocks), ctx):
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_transformer.py", line 532, in __call__
for data in iter:
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_transformer.py", line 211, in _udf_timed_iter
output = next(input)
^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_transformer.py", line 327, in __call__
yield from self._batch_fn(input, ctx)
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/planner/plan_udf_map_op.py", line 485, in transform_fn
raise out_item
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async
output = await self._generate_async(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_async_v1
async for request_output in stream:
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 315, in generate
out = q.get_nowait() or await q.get()
^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get
raise output
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async
output = await self._generate_async(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_async_v1
async for request_output in stream:
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 315, in generate
out = q.get_nowait() or await q.get()
^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get
raise output
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async
output = await self._generate_async(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_async_v1
async for request_output in stream:
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 315, in generate
out = q.get_nowait() or await q.get()
^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get
raise output
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 366, in output_handler
outputs = await engine_core.get_output_async()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 806, in get_output_async
raise self._format_exception(outputs) from None
vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue. See stack trace (above) for the root cause.
2025-06-17 16:26:27,985 DEBUG streaming_executor.py:189 -- Shutting down executor for dataset dataset_14_0 (failed with ray::MapBatches(vLLMEngineStageUDF).submit() (pid=3006, ip=10.0.190.249, actor_id=b69436f602ee168b444a002f02000000, repr=MapWorker(MapBatches(vLLMEngineStageUDF)))
yield from _map_task(
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_operator.py", line 557, in _map_task
for b_out in map_transformer.apply_transform(iter(blocks), ctx):
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_transformer.py", line 532, in __call__
for data in iter:
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_transformer.py", line 211, in _udf_timed_iter
output = next(input)
^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/execution/operators/map_transformer.py", line 327, in __call__
yield from self._batch_fn(input, ctx)
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_internal/planner/plan_udf_map_op.py", line 485, in transform_fn
raise out_item
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async
output = await self._generate_async(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_async_v1
async for request_output in stream:
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 315, in generate
out = q.get_nowait() or await q.get()
^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get
raise output
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async
output = await self._generate_async(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_async_v1
async for request_output in stream:
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 315, in generate
out = q.get_nowait() or await q.get()
^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get
raise output
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async
output = await self._generate_async(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_async_v1
async for request_output in stream:
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 315, in generate
out = q.get_nowait() or await q.get()
^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get
raise output
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 366, in output_handler
outputs = await engine_core.get_output_async()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 806, in get_output_async
raise self._format_exception(outputs) from None
vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue. See stack trace (above) for the root cause.)
2025-06-17 16:26:27,989 INFO streaming_executor.py:228 -- ⚠️ Dataset dataset_14_0 execution failed
2025-06-17 16:26:28,006 DEBUG streaming_executor.py:241 -- Shut down operator hierarchy for dataset dataset_14_0 (min/max/total=0.0/0.004/0.005s)
2025-06-17 16:26:28,007 DEBUG streaming_executor.py:257 -- Shut down executor for dataset dataset_14_0 (took 0.022s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment