This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow.dataset as ds | |
def count_parquet_rows(dataset_path: str) -> int: | |
""" | |
Count the number of rows in a parquet file without reading the data into memory. | |
https://stackoverflow.com/a/79118602/4212158 | |
""" | |
dataset = ds.dataset(dataset_path, format="parquet") | |
row_count = sum(row_group.num_rows for fragment in dataset.get_fragments() for row_group in fragment.row_groups) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Traceback (most recent call last): | |
File "/tmp/ray/session_2025-08-08_19-13-17_305038_2243/runtime_resources/working_dir_files/s3_ray-release-automation-results_working_dirs_text_embeddings_benchmark_fixed_size_preemptible_gswrofihok__anyscale_pkg_aa4c368f375d6f6f25845bef969f1c00/dataset/text_embeddings_benchmark.py", line 257, in <module> | |
benchmark.run_fn("text-embeddings-benchmark", main, args) | |
File "/tmp/ray/session_2025-08-08_19-13-17_305038_2243/runtime_resources/working_dir_files/s3_ray-release-automation-results_working_dirs_text_embeddings_benchmark_fixed_size_preemptible_gswrofihok__anyscale_pkg_aa4c368f375d6f6f25845bef969f1c00/dataset/benchmark.py", line 154, in run_fn | |
fn_output = fn(*fn_args, **fn_kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/tmp/ray/session_2025-08-08_19-13-17_305038_2243/runtime_resources/working_dir_files/s3_ray-release-automation-results_working_dirs_text_embeddings_benchmark_fixed_size_preemptible_gswrofihok__anyscale_pkg_aa4c368f375d6f6f25845bef969f1c0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.12" | |
# dependencies = [ | |
# "pyarrow", | |
# ] | |
# /// | |
"""Convert .arrow shards to Parquet without loading entire dataset into memory. | |
- Discovers all .arrow files under a given source directory |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import threading | |
import time | |
import ray | |
@ray.remote(num_cpus=0) | |
class GPUHoursTracker: | |
"""Actor that integrates GPU capacity over time across a Ray cluster. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ray | |
from ray.data.llm import vLLMEngineProcessorConfig, build_llm_processor | |
config = vLLMEngineProcessorConfig(model_source="unsloth/Llama-3.2-1B-Instruct") | |
processor = build_llm_processor( | |
config, | |
preprocess=lambda row: { | |
"messages": [ | |
{"role": "system", "content": "You are a bot that responds with haikus."}, | |
{"role": "user", "content": row["item"]}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
from mpl_toolkits.mplot3d import Axes3D | |
# Create a 3D plot showing the vectors and interpolations | |
fig = plt.figure(figsize=(10, 8)) | |
ax = fig.add_subplot(111, projection='3d') | |
# Plot original vectors | |
ax.quiver(0, 0, 0, vecs[0][0], vecs[0][1], vecs[0][2], | |
color='black', arrow_length_ratio=0.1, label='v0 [1,0,0]') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<deleted 50,000 lines of logs repeating the same thing> | |
[36m(MapWorker(MapBatches(vLLMEngineStageUDF)) pid=3858, ip=10.0.54.100)[0m File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/output_processor.py", line 51, in get | |
[36m(MapWorker(MapBatches(vLLMEngineStageUDF)) pid=3858, ip=10.0.54.100)[0m raise output | |
[36m(MapWorker(MapBatches(vLLMEngineStageUDF)) pid=3858, ip=10.0.54.100)[0m File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 317, in generate_async | |
[36m(MapWorker(MapBatches(vLLMEngineStageUDF)) pid=3858, ip=10.0.54.100)[0m output = await self._generate_async(request) | |
[36m(MapWorker(MapBatches(vLLMEngineStageUDF)) pid=3858, ip=10.0.54.100)[0m ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
[36m(MapWorker(MapBatches(vLLMEngineStageUDF)) pid=3858, ip=10.0.54.100)[0m File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/llm/_internal/batch/stages/vllm_engine_stage.py", line 399, in generate_as |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2025-06-17 16:26:19,406 DEBUG streaming_executor.py:546 -- 9: - MapBatches(vLLMEngineStageUDF): Tasks: 24; Actors: 3; Queued blocks: 13; Resources: 0.0 CPU, 3.0 GPU, 768.0MB object store; [8/24 objects local], Blocks Outputted: 0/None | |
2025-06-17 16:26:19,406 DEBUG streaming_executor.py:546 -- 10: - MapBatches(DetokenizeUDF): Tasks: 0; Actors: 1; Queued blocks: 0; Resources: 1.0 CPU, 0.0B object store; [all objects local], Blocks Outputted: 0/None | |
2025-06-17 16:26:19,406 DEBUG streaming_executor.py:546 -- 11: - Map(_postprocess)->Filter(NoneType)->Write: Tasks: 0; Actors: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store, Blocks Outputted: 0/None | |
2025-06-17 16:26:27,977 ERROR streaming_executor_state.py:519 -- An exception was raised from a task of operator "MapBatches(vLLMEngineStageUDF)". Dataset execution will now abort. To ignore this exception and continue, set DataContext.max_errored_blocks. | |
Traceback (most recent call last): | |
File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/data/_inter |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "CalledProcessError", | |
"message": "Command 'b'\ | |
# Production batch job -- note that this is a bash cell\ | |
! anyscale job submit --name=train-xboost-breast-cancer-model \\\\\ | |
--containerfile=\"${WORKING_DIR}/containerfile\" \\\\\ | |
--working-dir=\"${WORKING_DIR}\" \\\\\ | |
--exclude=\"\" \\\\\ | |
--wait \\\\\ | |
--max-retries=0 \\\\\ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------------------------------------------------- | |
SystemException Traceback (most recent call last) | |
SystemException: | |
The above exception was the direct cause of the following exception: | |
RayTaskError(TypeError) Traceback (most recent call last) | |
/home/ray/default/e2e-audio/e2e_audio/curation.ipynb Cell 16 line 1 | |
----> 1 print(ds.take(1)) |
NewerOlder