开放能力
开发平台
行业应用
生态合作
开发与教学
资讯 社区 控制台
技术能力
语音技术
图像技术
文字识别
人脸与人体识别
视频技术
AR与VR
自然语言处理
知识图谱
数据智能
场景方案
部署方案
行业应用
智能教育
智能医疗
智能零售
智能工业
企业服务
智能政务
智能农业
信息服务
智能园区
智能硬件
BML 全功能AI开发平台

    Sklearn 0.23.2

    Sklearn

    sklearn框架下,自定义作业支持发布保存模型为picklejoblib格式,并且在发布至模型仓库时需要选择相应的模型文件。使用下面代码进行模型训练时,训练程序可以自行加载数据,训练数据选择空文件夹即可。

    pickle格式模型示例代码:

    # -*- coding:utf-8 -*-
    """ sklearn train demo """
    import numpy as np
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    def load_data():
        """ load data """
        # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
        # 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
        inputdata = datasets.load_iris()
        # 切分,测试训练2,8分
        x_train, x_test, y_train, y_test = \
            train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
        return x_train, x_test, y_train, y_test
    def save_model(model):
        """ save model with pickle format """
        import pickle
        with open('output/clf.pickle','wb') as f:
            pickle.dump(model, f)
    def save_model_joblib(model):
        """ save model with joblib format """
        try:
            import joblib
        except:
            from sklearn.externals import joblib
        joblib.dump(model, 'output/clf.pkl')
    def main():
        """ main """
        # 训练集x ,测试集x,训练集label,测试集label
        x_train, x_test, y_train, y_test = load_data()
        # l2为正则项
        model = LogisticRegression(penalty='l2')
        model.fit(x_train, y_train)
        save_model(model)
        print("w: %s" % model.coef_)
        print("b: %s" % model.intercept_)
        # 准确率
        print("precision: %s" % model.score(x_test, y_test))
        print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
    if __name__ == '__main__':
        main()

    joblib格式示例代码:

    # -*- coding:utf-8 -*-
    import numpy as np
    
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    
    def load_data():
        # 共150条数据,训练120条,测试30条,进行2,8分进行模型训练
        # 每条数据类型为 x{nbarray} [6.4, 3.1, 5.5, 1.8]
        inputdata = datasets.load_iris()
        # 切分,测试训练2,8分
        x_train, x_test, y_train, y_test = \
            train_test_split(inputdata.data, inputdata.target, test_size = 0.2, random_state=0)
        return x_train, x_test, y_train, y_test
    
    
    def save_model(model):
        import pickle
        with open('output/clf.pickle','wb') as f:
            pickle.dump(model, f)
    
    
    def save_model_joblib(model):
        from sklearn.externals import joblib
        joblib.dump(model, 'output/clf.pkl')
    
    
    def main():
        # 训练集x ,测试集x,训练集label,测试集label
        x_train, x_test, y_train, y_test = load_data()
        # l2为正则项
        model = LogisticRegression(penalty='l2')
        model.fit(x_train, y_train)
    
        save_model_joblib(model)
        print("w: %s" % model.coef_)
        print("b: %s" % model.intercept_)    
        # 准确率
        print("precision: %s" % model.score(x_test, y_test))
        print("MSE: %s" % np.mean((model.predict(x_test) - y_test) ** 2))
    
    if __name__ == '__main__':
        main()
    上一篇
    Pytorch 1.7.1
    下一篇
    XGBoost 1.3.1