在dask数据帧中,可以按groupby键不同的列对分组(由groupby api获取)数据帧进行排序 .

def func1(grouped_df):
    grouped_df=grouped_df.sort_values(by=[“col2”])
    ….
    …
    return …

ddf.groupby(“COL1”) . 申请(FUNC1).compute()

在我的情况下失败,出现以下错误:

Traceback (most recent call last):

  File "", line 4, in 

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/dask/base.py", line 135, in compute

    (result,) = compute(self, traverse=False, **kwargs)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/dask/base.py", line 333, in compute

    results = get(dsk, keys, **kwargs)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/distributed/client.py", line 1999, in get

    results = self.gather(packed, asynchronous=asynchronous)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/distributed/client.py", line 1437, in gather

    asynchronous=asynchronous)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/distributed/client.py", line 592, in sync

    return sync(self.loop, func, *args, **kwargs)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/distributed/utils.py", line 254, in sync

    six.reraise(*error[0])

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/six.py", line 693, in reraise

    raise value

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/distributed/utils.py", line 238, in f

    result[0] = yield make_coro()

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/tornado/gen.py", line 1055, in run

    value = future.result()

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/tornado/concurrent.py", line 238, in result

    raise_exc_info(self._exc_info)

  File "", line 4, in raise_exc_info

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/tornado/gen.py", line 1063, in run

    yielded = self.gen.throw(*exc_info)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/distributed/client.py", line 1315, in _gather

    traceback)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/six.py", line 692, in reraise

    raise value.with_traceback(tb)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/dask/dataframe/core.py", line 3236, in apply_and_enforce

    df = func(*args, **kwargs)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/dask/dataframe/groupby.py", line 147, in _groupby_slice_apply

    return g.apply(func)

  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/pandas/core/groupby.py", line 805, in apply

    return self._python_apply_general(f)
  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/pandas/core/groupby.py", line 809, in _python_apply_general
    self.axis)
  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/pandas/core/groupby.py", line 1969, in apply
    res = f(group)
  File "", line 5, in main_func3
  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/pandas/core/series.py", line 623, in __getitem__
    result = self.index.get_value(self, key)
  File "/user/agent/miniconda3/envs/dask-distributed/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 2560, in get_value
    tz=getattr(series.dtype, 'tz', None))
  File "pandas/_libs/index.pyx", line 83, in pandas._libs.index.IndexEngine.get_value
  File "pandas/_libs/index.pyx", line 91, in pandas._libs.index.IndexEngine.get_value
  File "pandas/_libs/index.pyx", line 139, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 811, in pandas._libs.hashtable.Int64HashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 817, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0