Skip to content

Instantly share code, notes, and snippets.

@astrofrog
Created May 11, 2020 16:41

Revisions

  1. astrofrog created this gist May 11, 2020.
    20 changes: 20 additions & 0 deletions sample_dask_chunks.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@
    def sample_dask_array_chunks(array, n_chunks):
    """
    Return an 1-d array which contains the data values from n_chunks randomly
    sampled from the chunks in the array (without replacement)
    """

    # Find the indices of the chunks to extract
    indices = [np.random.randint(dimsize, size=n_chunks) for dimsize in array.numblocks]

    # Determine the boundaries of chunks along each dimension
    chunk_indices = [np.hstack([0, np.cumsum([size for size in sizes])]) for sizes in array.chunks]

    data = []
    for ichunk in range(n_chunks):
    slices = tuple(slice(chunk_indices[idim][indices[idim][ichunk]],
    chunk_indices[idim][indices[idim][ichunk] + 1])
    for idim in range(array.ndim))
    data.append(array[slices].compute().ravel())

    return np.hstack(data)