logo
Loading...

出現cannot reindex from a duplicate axis問題 - Cupoy

程式在跑到df_temp = pd.DataFrame()for c in object_featu...

出現cannot reindex from a duplicate axis問題

2020/03/31 04:36 下午
機器學習共學討論版
Ava Chen
觀看數:13
回答數:5
收藏數:1

程式在跑到

df_temp = pd.DataFrame()

for c in object_features:

    df_temp[c] = LabelEncoder().fit_transform(df[c])

df_temp['Cabin_Hash'] = df['Cabin'].map(lambda x:hash(x) % 10)

train_X = df_temp[:train_num]

estimator = LogisticRegression()

print(cross_val_score(estimator, train_X, train_Y, cv=5).mean())

df_temp.head()

出現ValueError: cannot reindex from a duplicate axis

請問reindex的原因是在哪?

======================================

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-34-55c94d744486> in <module>
     2 for c in object_features:
     3     df_temp[c] = LabelEncoder().fit_transform(df[c])
----> 4 df_temp['Cabin_Hash'] = df['Cabin'].map(lambda x:hash(x) % 10)
     5 train_X = df_temp[:train_num]
     6 estimator = LogisticRegression()

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
  3470         else:
  3471             # set column
-> 3472             self._set_item(key, value)
  3473
  3474     def _setitem_slice(self, key, value):

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _set_item(self, key, value)
  3547
  3548         self._ensure_valid_index(value)
-> 3549         value = self._sanitize_column(key, value)
  3550         NDFrame._set_item(self, key, value)
  3551

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
  3709
  3710         if isinstance(value, Series):
-> 3711             value = reindexer(value)
  3712
  3713         elif isinstance(value, DataFrame):

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindexer(value)
  3700                     # duplicate axis
  3701                     if not value.index.is_unique:
-> 3702                         raise e
  3703
  3704                     # other

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindexer(value)
  3695                 # GH 4107
  3696                 try:
-> 3697                     value = value.reindex(self.index)._values
  3698                 except Exception as e:
  3699

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
  4216     @Appender(generic.NDFrame.reindex.__doc__)
  4217     def reindex(self, index=None, **kwargs):
-> 4218         return super().reindex(index=index, **kwargs)
  4219
  4220     def drop(

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
  4512         # perform the reindex on the axes
  4513         return self._reindex_axes(
-> 4514             axes, level, limit, tolerance, method, fill_value, copy
  4515         ).__finalize__(self)
  4516

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
  4533                 fill_value=fill_value,
  4534                 copy=copy,
-> 4535                 allow_dups=False,
  4536             )
  4537

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
  4575                 fill_value=fill_value,
  4576                 allow_dups=allow_dups,
-> 4577                 copy=copy,
  4578             )
  4579

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
  1249         # some axes don't allow reindexing with dups
  1250         if not allow_dups:
-> 1251             self.axes[axis]._can_reindex(indexer)
  1252
  1253         if axis >= self.ndim:

/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in _can_reindex(self, indexer)
  3360         # trying to reindex on an axis with duplicates
  3361         if not self.is_unique and len(indexer):
-> 3362             raise ValueError("cannot reindex from a duplicate axis")
  3363
  3364     def reindex(self, target, method=None, level=None, limit=None, tolerance=None):

ValueError: cannot reindex from a duplicate axis

========================================

另外有嘗試先用Labelencoding後的df_temp做hash轉換,但會不會和原本直接用字串做hash有差別呢?