我下载了最新版本的H2O(3.18.0.1),XGboost仍然失败 . 我不确定是否要发布到JIRA问题或这里 .

h2o.init()
from h2o.estimators import H2OXGBoostEstimator
is_xgboost_available = H2OXGBoostEstimator.available()
train_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_train_imbalance_100k.csv'
test_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_test_imbalance_100k.csv'

df_train = h2o.import_file(train_path)
df_test = h2o.import_file(test_path)

# Transform first feature into categorical feature
df_train[0] = df_train[0].asfactor()
df_test[0] = df_test[0].asfactor()

param = {
      "ntrees" : 500
}

model = H2OXGBoostEstimator(**param)
model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)

我可以运行随机林,GBM没有问题,但xgboost一直在失败 .

我在Ubuntu 16.04上运行 . Java版本:java版“1.8.0_161”; Java(TM)SE运行时环境(版本1.8.0_161-b12); Java HotSpot(TM)64位服务器VM(内置25.161-b12,混合模式) . Anaconda Python 3.6

我重新安装了Anaconda并重新安装了JRE,但我仍然遇到同样的问题 .

它一直给我以下错误:

xgboost Model Build progress: |████████████████████████████████████████
    ---------------------------------------------------------------------------
    ConnectionResetError                      Traceback (most recent call last)
    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
        600                                                   body=body, headers=headers,
    --> 601                                                   chunked=chunked)
        602 

    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
        386                     # otherwise it looks like a programming error was the cause.
    --> 387                     six.raise_from(e, None)
        388         except (SocketTimeout, BaseSSLError, SocketError) as e:

    ~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
        382                 try:
    --> 383                     httplib_response = conn.getresponse()
        384                 except Exception as e:

    ~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
       1330             try:
    -> 1331                 response.begin()
       1332             except ConnectionError:

    ~/anaconda3/lib/python3.6/http/client.py in begin(self)
        296         while True:
    --> 297             version, status, reason = self._read_status()
        298             if status != CONTINUE:

    ~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
        257     def _read_status(self):
    --> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
        259         if len(line) > _MAXLINE:

    ~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
        585             try:
    --> 586                 return self._sock.recv_into(b)
        587             except timeout:

    ConnectionResetError: [Errno 104] Connection reset by peer

    During handling of the above exception, another exception occurred:

    ProtocolError                             Traceback (most recent call last)
    ~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
        439                     retries=self.max_retries,
    --> 440                     timeout=timeout
        441                 )

    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
        638             retries = retries.increment(method, url, error=e, _pool=self,
    --> 639                                         _stacktrace=sys.exc_info()[2])
        640             retries.sleep()

    ~/anaconda3/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
        356             if read is False or not self._is_method_retryable(method):
    --> 357                 raise six.reraise(type(error), error, _stacktrace)
        358             elif read is not None:

    ~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
        684         if value.__traceback__ is not tb:
    --> 685             raise value.with_traceback(tb)
        686         raise value

    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
        600                                                   body=body, headers=headers,
    --> 601                                                   chunked=chunked)
        602 

    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
        386                     # otherwise it looks like a programming error was the cause.
    --> 387                     six.raise_from(e, None)
        388         except (SocketTimeout, BaseSSLError, SocketError) as e:

    ~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

    ~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
        382                 try:
    --> 383                     httplib_response = conn.getresponse()
        384                 except Exception as e:

    ~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
       1330             try:
    -> 1331                 response.begin()
       1332             except ConnectionError:

    ~/anaconda3/lib/python3.6/http/client.py in begin(self)
        296         while True:
    --> 297             version, status, reason = self._read_status()
        298             if status != CONTINUE:

    ~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
        257     def _read_status(self):
    --> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
        259         if len(line) > _MAXLINE:

    ~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
        585             try:
    --> 586                 return self._sock.recv_into(b)
        587             except timeout:

    ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

    During handling of the above exception, another exception occurred:

    ConnectionError                           Traceback (most recent call last)
    ~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
        399                                     headers=headers, timeout=self._timeout, stream=stream,
    --> 400                                     auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies)
        401             self._log_end_transaction(start_time, resp)

    ~/anaconda3/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
         57     with sessions.Session() as session:
    ---> 58         return session.request(method=method, url=url, **kwargs)
         59 

    ~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
        507         send_kwargs.update(settings)
    --> 508         resp = self.send(prep, **send_kwargs)
        509 

    ~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
        617         # Send the request
    --> 618         r = adapter.send(request, **kwargs)
        619 

    ~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
        489         except (ProtocolError, socket.error) as err:
    --> 490             raise ConnectionError(err, request=request)
        491 

    ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

    During handling of the above exception, another exception occurred:

    H2OConnectionError                        Traceback (most recent call last)
    <ipython-input-22-37b26d4dfbfd> in <module>()
          1 start = time.time()
    ----> 2 model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
          3 end = time.time()
          4 print(end - start)

    ~/anaconda3/lib/python3.6/site-packages/h2o/estimators/estimator_base.py in train(self, x, y, training_frame, offset_column, fold_column, weights_column, validation_frame, max_runtime_secs, ignored_columns, model_id, verbose)
        229             return
        230 
    --> 231         model.poll(verbose_model_scoring_history=verbose)
        232         model_json = h2o.api("GET /%d/Models/%s" % (rest_ver, model.dest_key))["models"][0]
        233         self._resolve_model(model.dest_key, model_json)

    ~/anaconda3/lib/python3.6/site-packages/h2o/job.py in poll(self, verbose_model_scoring_history)
         56                 pb.execute(self._refresh_job_status, print_verbose_info=lambda x: self._print_verbose_info() if int(x * 10) % 5 == 0  else " ")
         57             else:
    ---> 58                 pb.execute(self._refresh_job_status)
         59         except StopIteration as e:
         60             if str(e) == "cancelled":

    ~/anaconda3/lib/python3.6/site-packages/h2o/utils/progressbar.py in execute(self, progress_fn, print_verbose_info)
        167                 # Query the progress level, but only if it's time already
        168                 if self._next_poll_time <= now:
    --> 169                     res = progress_fn()  # may raise StopIteration
        170                     assert_is_type(res, (numeric, numeric), numeric)
        171                     if not isinstance(res, tuple):

    ~/anaconda3/lib/python3.6/site-packages/h2o/job.py in _refresh_job_status(self)
         91     def _refresh_job_status(self):
         92         if self._poll_count <= 0: raise StopIteration("")
    ---> 93         jobs = h2o.api("GET /3/Jobs/%s" % self.job_key)
         94         self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0]
         95         self.status = self.job["status"]

    ~/anaconda3/lib/python3.6/site-packages/h2o/h2o.py in api(endpoint, data, json, filename, save_to)
        101     # type checks are performed in H2OConnection class
        102     _check_connection()
    --> 103     return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
        104 
        105 

    ~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
        408             else:
        409                 self._log_end_exception(e)
    --> 410                 raise H2OConnectionError("Unexpected HTTP error: %s" % e)
        411         except requests.exceptions.Timeout as e:
        412             self._log_end_exception(e)

    H2OConnectionError: Unexpected HTTP error: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))