Skip to content

Commit a20284d

Browse files
committed
Update python scripts to use lightgbm api v4
1 parent fca59ef commit a20284d

File tree

8 files changed

+2491
-173
lines changed

8 files changed

+2491
-173
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
/tmp/
99
*.lock
1010
/vendor/lib_lightgbm.*
11+
/.venv/

test/support/booster.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,57 @@
1+
# Run this script to regenerate the test/support/model.txt and test/support/model_categorical.txt files
2+
13
import lightgbm as lgb
24
import pandas as pd
35

4-
df = pd.read_csv('test/support/data.csv')
6+
params = {'verbosity': -1}
7+
8+
def booster():
9+
df = pd.read_csv('test/support/data.csv')
10+
11+
X = df.drop(columns=['y'])
12+
y = df['y']
13+
14+
X_train = X[:300]
15+
y_train = y[:300]
16+
X_test = X[300:]
17+
y_test = y[300:]
18+
19+
train_data = lgb.Dataset(X_train, label=y_train)
20+
bst = lgb.train(params, train_data)
21+
bst.save_model('test/support/model.txt')
22+
23+
bst = lgb.Booster(model_file='test/support/model.txt')
24+
print('x', X_train[:2].to_numpy().tolist())
25+
print('predict', bst.predict(X_train)[:2].tolist())
26+
print('feature_importance', bst.feature_importance().tolist())
27+
print('feature_name', bst.feature_name())
28+
29+
def booster_categorical():
30+
df = pd.read_csv('test/support/data.csv', dtype={'x3': 'category'})
31+
32+
X = df.drop(columns=['y'])
33+
y = df['y']
34+
35+
X_train = X[:300]
36+
y_train = y[:300]
37+
X_test = X[300:]
38+
y_test = y[300:]
39+
40+
train_data = lgb.Dataset(X_train, label=y_train, categorical_feature='auto')
41+
bst = lgb.train(params, train_data)
42+
bst.save_model('test/support/model_categorical.txt')
43+
44+
bst = lgb.Booster(model_file='test/support/model_categorical.txt')
45+
print('x', X_train[:2].to_numpy().tolist())
46+
print('predict', bst.predict(X_train)[:2].tolist())
47+
print('feature_importance', bst.feature_importance().tolist())
48+
print('feature_name', bst.feature_name())
549

6-
X = df.drop(columns=['y'])
7-
y = df['y']
850

9-
X_train = X[:300]
10-
y_train = y[:300]
11-
X_test = X[300:]
12-
y_test = y[300:]
51+
print('booster -> model.txt')
52+
booster()
1353

14-
train_data = lgb.Dataset(X_train, label=y_train)
15-
bst = lgb.train({}, train_data)
16-
bst.save_model('test/support/model.txt')
54+
print('')
1755

18-
bst = lgb.Booster(model_file='test/support/model.txt')
19-
print('x', X_train[:2].to_numpy().tolist())
20-
print('predict', bst.predict(X_train)[:2].tolist())
21-
print('feature_importance', bst.feature_importance().tolist())
22-
print('feature_name', bst.feature_name())
56+
print('categorical')
57+
booster_categorical()

test/support/classifier.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@
2525
print()
2626
print('test_multiclass')
2727

28-
model = lgb.LGBMClassifier()
28+
model = lgb.LGBMClassifier(verbosity=-1)
2929
model.fit(X_train, ym_train)
3030
print(model.predict(X_test)[0:100].tolist())
3131
print(model.predict_proba(X_test)[0].tolist())
3232
print(model.feature_importances_.tolist())
3333

3434
print()
3535
print('test_early_stopping')
36-
model.fit(X_train, ym_train, eval_set=[(X_test, ym_test)], early_stopping_rounds=5, verbose=True)
36+
model = lgb.LGBMClassifier(early_stopping_round=5, verbosity=1)
37+
model.fit(X_train, ym_train, eval_set=[(X_test, ym_test)])
3738

3839
print()
3940
print('test_missing_numeric')

test/support/cv.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,42 +16,45 @@
1616
regression_params = {'objective': 'regression', 'verbosity': -1}
1717
regression_train = lgb.Dataset(X_train, label=y_train)
1818
eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False)
19-
print(eval_hist['l2-mean'][0])
20-
print(eval_hist['l2-mean'][-1])
21-
print(eval_hist['l2-stdv'][0])
22-
print(eval_hist['l2-stdv'][-1])
19+
print(eval_hist['valid l2-mean'][0])
20+
print(eval_hist['valid l2-mean'][-1])
21+
print(eval_hist['valid l2-stdv'][0])
22+
print(eval_hist['valid l2-stdv'][-1])
2323

2424
print()
2525
print('test_binary')
2626

2727
binary_params = {'objective': 'binary', 'verbosity': -1}
2828
binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1))
2929
eval_hist = lgb.cv(binary_params, binary_train, shuffle=False, stratified=False)
30-
print(eval_hist['binary_logloss-mean'][0])
31-
print(eval_hist['binary_logloss-mean'][-1])
32-
print(eval_hist['binary_logloss-stdv'][0])
33-
print(eval_hist['binary_logloss-stdv'][-1])
30+
print(eval_hist['valid binary_logloss-mean'][0])
31+
print(eval_hist['valid binary_logloss-mean'][-1])
32+
print(eval_hist['valid binary_logloss-stdv'][0])
33+
print(eval_hist['valid binary_logloss-stdv'][-1])
3434

3535
print()
3636
print('test_multiclass')
3737

3838
multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1}
3939
multiclass_train = lgb.Dataset(X_train, label=y_train)
4040
eval_hist = lgb.cv(multiclass_params, multiclass_train, shuffle=False, stratified=False)
41-
print(eval_hist['multi_logloss-mean'][0])
42-
print(eval_hist['multi_logloss-mean'][-1])
43-
print(eval_hist['multi_logloss-stdv'][0])
44-
print(eval_hist['multi_logloss-stdv'][-1])
41+
print(eval_hist['valid multi_logloss-mean'][0])
42+
print(eval_hist['valid multi_logloss-mean'][-1])
43+
print(eval_hist['valid multi_logloss-stdv'][0])
44+
print(eval_hist['valid multi_logloss-stdv'][-1])
4545

4646
print('')
4747
print('test_early_stopping_early')
4848

49-
eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, verbose_eval=True, early_stopping_rounds=5)
50-
print(len(eval_hist['l2-mean']))
49+
regression_params = {'objective': 'regression', 'verbosity': 1, 'early_stopping_round': 5}
50+
eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False)
51+
print(len(eval_hist['valid l2-mean']))
5152

5253
print('')
5354
print('test_early_stopping_not_early')
5455

55-
eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, verbose_eval=True, early_stopping_rounds=500)
56-
print(len(eval_hist['l2-mean']))
56+
regression_params = {'objective': 'regression', 'verbosity': 1, 'early_stopping_round': 500}
57+
eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False)
58+
print(len(eval_hist['valid l2-mean']))
59+
5760

0 commit comments

Comments
 (0)