Sklearn 0.23.2
更新时间:2023-01-18
Sklearn
sklearn框架下,自定义作业支持发布保存模型为pickle
和joblib
格式,并且在发布至模型仓库时需要选择相应的模型文件。使用下面代码进行模型训练时,训练程序可以自行加载数据,训练数据选择空文件夹即可。
pickle格式模型示例代码:
# -*- coding:utf-8 -*-
""" sklearn train demo """
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
def load_data():
""" load data """
# 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
# 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
inputdata = datasets.load_iris()
# 切分,测试训练2,8分
x_train, x_test, y_train, y_test = \
train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
return x_train, x_test, y_train, y_test
def save_model(model):
""" save model with pickle format """
import pickle
with open('output/clf.pickle','wb') as f:
pickle.dump(model, f)
def save_model_joblib(model):
""" save model with joblib format """
try:
import joblib
except:
from sklearn.externals import joblib
joblib.dump(model, 'output/clf.pkl')
def main():
""" main """
# 训练集x ,测试集x,训练集label,测试集label
x_train, x_test, y_train, y_test = load_data()
# l2为正则项
model = LogisticRegression(penalty='l2')
model.fit(x_train, y_train)
save_model(model)
print("w: %s" % model.coef_)
print("b: %s" % model.intercept_)
# 准确率
print("precision: %s" % model.score(x_test, y_test))
print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
if __name__ == '__main__':
main()
joblib格式示例代码:
# -*- coding:utf-8 -*-
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
def load_data():
# 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
# 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
inputdata = datasets.load_iris()
# 切分,测试训练2,8分
x_train, x_test, y_train, y_test = \
train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
return x_train, x_test, y_train, y_test
def save_model(model):
import pickle
with open('output/clf.pickle','wb') as f:
pickle.dump(model, f)
def save_model_joblib(model):
from sklearn.externals import joblib
joblib.dump(model, 'output/clf.pkl')
def main():
# 训练集x ,测试集x,训练集label,测试集label
x_train, x_test, y_train, y_test = load_data()
# l2为正则项
model = LogisticRegression(penalty='l2')
model.fit(x_train, y_train)
save_model_joblib(model)
print("w: %s" % model.coef_)
print("b: %s" % model.intercept_)
# 准确率
print("precision: %s" % model.score(x_test, y_test))
print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
if __name__ == '__main__':
main()