资讯 社区 文档
技术能力
语音技术
文字识别
人脸与人体
图像技术
语言与知识
视频技术

Sklearn 0.23.2

Sklearn

sklearn框架下,自定义作业支持发布保存模型为picklejoblib格式,并且在发布至模型仓库时需要选择相应的模型文件。使用下面代码进行模型训练时,训练程序可以自行加载数据,训练数据选择空文件夹即可。

pickle格式模型示例代码:

# -*- coding:utf-8 -*-
""" sklearn train demo """
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
def load_data():
    """ load data """
    # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
    # 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
    inputdata = datasets.load_iris()
    # 切分,测试训练2,8分
    x_train, x_test, y_train, y_test = \
        train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
    return x_train, x_test, y_train, y_test
def save_model(model):
    """ save model with pickle format """
    import pickle
    with open('output/clf.pickle','wb') as f:
        pickle.dump(model, f)
def save_model_joblib(model):
    """ save model with joblib format """
    try:
        import joblib
    except:
        from sklearn.externals import joblib
    joblib.dump(model, 'output/clf.pkl')
def main():
    """ main """
    # 训练集x ,测试集x,训练集label,测试集label
    x_train, x_test, y_train, y_test = load_data()
    # l2为正则项
    model = LogisticRegression(penalty='l2')
    model.fit(x_train, y_train)
    save_model(model)
    print("w: %s" % model.coef_)
    print("b: %s" % model.intercept_)
    # 准确率
    print("precision: %s" % model.score(x_test, y_test))
    print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
if __name__ == '__main__':
    main()

joblib格式示例代码:

# -*- coding:utf-8 -*-
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

def load_data():
    # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
    # 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
    inputdata = datasets.load_iris()
    # 切分,测试训练2,8分
    x_train, x_test, y_train, y_test = \
        train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
    return x_train, x_test, y_train, y_test


def save_model(model):
    import pickle
    with open('output/clf.pickle','wb') as f:
        pickle.dump(model, f)


def save_model_joblib(model):
    from sklearn.externals import joblib
    joblib.dump(model, 'output/clf.pkl')


def main():
    # 训练集x ,测试集x,训练集label,测试集label
    x_train, x_test, y_train, y_test = load_data()
    # l2为正则项
    model = LogisticRegression(penalty='l2')
    model.fit(x_train, y_train)

    save_model_joblib(model)
    print("w: %s" % model.coef_)
    print("b: %s" % model.intercept_)    
    # 准确率
    print("precision: %s" % model.score(x_test, y_test))
    print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))

if __name__ == '__main__':
    main()
上一篇
Pytorch 1.7.1
下一篇
XGBoost 1.3.1