Multi-Class for GBDT leaf encoding

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

gdbt = GradientBoostingClassifier()
lr = LogisticRegression(solver='lbfgs', max_iter=1000, multi_class='ovr')
x, y = make_classification(n_samples=1000, n_classes=3, n_informative=3)
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=9453)
gdbt.fit(train_x, train_y)
onehot = OneHotEncoder(categories='auto')

print(f"gdbt.apply(train_x).shape: {gdbt.apply(train_x).shape}\n-")
enc_train_x = onehot.fit_transform(gdbt.apply(train_x).reshape(train_x.shape[0], -1))
enc_test_x = onehot.transform(gdbt.apply(test_x).reshape(test_x.shape[0], -1))

print(f"train_x: {train_x.shape}, enc_train_x: {enc_train_x.shape},\n"
      f"test_x: {test_x.shape}, enc_test_x: {enc_test_x.shape}\n-")

lr.fit(enc_train_x, train_y)
lr_score = lr.score(
    enc_test_x,
    test_y
)
gdbt_score = gdbt.score(test_x, test_y)

print(f"Score: gdbt={gdbt_score}, gdbt_lr={lr_score}")

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

gdbt = GradientBoostingClassifier()
lr = LogisticRegression(solver='lbfgs', max_iter=1000, multi_class='ovr')
x, y = make_classification(n_samples=1000, n_classes=3, n_informative=3)
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=9453)
gdbt.fit(train_x, train_y)
onehot = OneHotEncoder(categories='auto')

print(f"gdbt.apply(train_x).shape: {gdbt.apply(train_x).shape}\n-")
enc_train_x = onehot.fit_transform(gdbt.apply(train_x).reshape(train_x.shape[0], -1))
enc_test_x = onehot.transform(gdbt.apply(test_x).reshape(test_x.shape[0], -1))

print(f"train_x: {train_x.shape}, enc_train_x: {enc_train_x.shape},\n"
      f"test_x: {test_x.shape}, enc_test_x: {enc_test_x.shape}\n-")

lr.fit(enc_train_x, train_y)
lr_score = lr.score(
    enc_test_x,
    test_y
)
gdbt_score = gdbt.score(test_x, test_y)

print(f"Score: gdbt={gdbt_score}, gdbt_lr={lr_score}")

Multi-Class for GBDT leaf encoding

回答列表