3.2 决策树可视化

1147-柳同学

发表文章数:593

首页 » 算法 » 正文

引言

以下show函数决定生成图片还是pdf

1.生成图片

import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import tree
import pydotplus
import matplotlib as mpl

# 加载数据
def loaddata():
    features = ["age", "work", "house", "credit"]
    x_train = pd.DataFrame([
        ["青年", "否", "否", "一般"],
        ["青年", "否", "否", "好"],
        ["青年", "是", "否", "好"],
        ["青年", "是", "是", "一般"],
        ["青年", "否", "否", "一般"],
        ["中年", "否", "否", "一般"],
        ["中年", "否", "否", "好"],
        ["中年", "是", "是", "好"],
        ["中年", "否", "是", "非常好"],
        ["中年", "否", "是", "非常好"],
        ["老年", "否", "是", "非常好"],
        ["老年", "否", "是", "好"],
        ["老年", "是", "否", "好"],
        ["老年", "是", "否", "非常好"],
        ["老年", "否", "否", "一般"]
    ])
    y_train = pd.DataFrame(["否", "否", "是", "是", "否", "否", "否", "是", "是", "是", "是", "是", "是", "是", "否"])
    y_type = [str(k) for k in np.unique(y_train)]
    # one-hot编码
    le_x = LabelEncoder()
    le_x.fit(np.unique(x_train))
    x_train = x_train.apply(le_x.transform)

    le_y = LabelEncoder()
    le_y.fit(y_train)
    y_train = le_y.transform(y_train)
    return x_train, y_train,features,le_x,le_y


# 决策树可视化
def show(clf,feature,y_type):
    dot_data = tree.export_graphviz(clf,out_file=None,
                                    feature_names=feature,
                                    class_names=y_type,filled=True,
                                    rounded=True,special_characters=True)
    # 生成图片
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_png('DT_show.png')






if __name__ == '__main__':
    mpl.rcParams["font.sans-serif"] = [u'simHei']
    mpl.rcParams['axes.unicode_minus'] = False
    # 加载数据
    x_train, y_train,features,le_x,le_y = loaddata()
    # 分类
    clf = DecisionTreeClassifier()
    clf.fit(x_train, y_train)
    y_type = [str(k) for k in np.unique(y_train)]
    # 可视化
    show(clf, features,y_type)

    # 预测
    X_show = pd.DataFrame([["青年", "否", "否", "一般"]])
    X_test = X_show.apply(le_x.transform)
    y_predict = clf.predict(X_test)
    # 结果输出
    X_show = [{features[i] :X_show.values[0][i]} for i in range(len(features))]
    print("{0}被分类为{1}".format(X_show,le_y.inverse_transform(y_predict)))


[{'age': '青年'}, {'work': '否'}, {'house': '否'}, {'credit': '一般'}]被分类为['否']

3.2 决策树可视化

2.生成pdf

import pydotplus
import graphviz
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import tree
import matplotlib as mpl

# 加载数据
def loaddata():
    features = ["age", "work", "house", "credit"]
    x_train = pd.DataFrame([
        ["青年", "否", "否", "一般"],
        ["青年", "否", "否", "好"],
        ["青年", "是", "否", "好"],
        ["青年", "是", "是", "一般"],
        ["青年", "否", "否", "一般"],
        ["中年", "否", "否", "一般"],
        ["中年", "否", "否", "好"],
        ["中年", "是", "是", "好"],
        ["中年", "否", "是", "非常好"],
        ["中年", "否", "是", "非常好"],
        ["老年", "否", "是", "非常好"],
        ["老年", "否", "是", "好"],
        ["老年", "是", "否", "好"],
        ["老年", "是", "否", "非常好"],
        ["老年", "否", "否", "一般"]
    ])
    y_train = pd.DataFrame(["否", "否", "是", "是", "否", "否", "否", "是", "是", "是", "是", "是", "是", "是", "否"])
    y_type = [str(k) for k in np.unique(y_train)]
    # one-hot编码
    le_x = LabelEncoder()
    le_x.fit(np.unique(x_train))
    x_train = x_train.apply(le_x.transform)

    le_y = LabelEncoder()
    le_y.fit(y_train)
    y_train = le_y.transform(y_train)
    return x_train, y_train,features,le_x,le_y


# 决策树可视化
def show(clf,feature,y_type):
    dot_data = tree.export_graphviz(clf,out_file=None,
                                    feature_names=feature,
                                    class_names=y_type,filled=True,
                                    rounded=True,special_characters=True)
    # 生成pdf
    graph = pydotplus.graph_from_dot_data(dot_data)
    # 写入pdf
    graph.write_pdf('iris.pdf')


if __name__ == '__main__':
    mpl.rcParams["font.sans-serif"] = [u'simHei']
    mpl.rcParams['axes.unicode_minus'] = False
    # 加载数据
    x_train, y_train,features,le_x,le_y = loaddata()
    # 分类
    clf = DecisionTreeClassifier()
    clf.fit(x_train, y_train)
    y_type = [str(k) for k in np.unique(y_train)]
    # 可视化
    show(clf, features,y_type)

    # 预测
    X_show = pd.DataFrame([["青年", "否", "否", "一般"]])
    X_test = X_show.apply(le_x.transform)
    y_predict = clf.predict(X_test)
    # 结果输出
    X_show = [{features[i] :X_show.values[0][i]} for i in range(len(features))]
    print("{0}被分类为{1}".format(X_show,le_y.inverse_transform(y_predict)))
import pydotplus
import graphviz
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import tree
import matplotlib as mpl

# 加载数据
def loaddata():
    features = ["age", "work", "house", "credit"]
    x_train = pd.DataFrame([
        ["青年", "否", "否", "一般"],
        ["青年", "否", "否", "好"],
        ["青年", "是", "否", "好"],
        ["青年", "是", "是", "一般"],
        ["青年", "否", "否", "一般"],
        ["中年", "否", "否", "一般"],
        ["中年", "否", "否", "好"],
        ["中年", "是", "是", "好"],
        ["中年", "否", "是", "非常好"],
        ["中年", "否", "是", "非常好"],
        ["老年", "否", "是", "非常好"],
        ["老年", "否", "是", "好"],
        ["老年", "是", "否", "好"],
        ["老年", "是", "否", "非常好"],
        ["老年", "否", "否", "一般"]
    ])
    y_train = pd.DataFrame(["否", "否", "是", "是", "否", "否", "否", "是", "是", "是", "是", "是", "是", "是", "否"])
    y_type = [str(k) for k in np.unique(y_train)]
    # one-hot编码
    le_x = LabelEncoder()
    le_x.fit(np.unique(x_train))
    x_train = x_train.apply(le_x.transform)

    le_y = LabelEncoder()
    le_y.fit(y_train)
    y_train = le_y.transform(y_train)
    return x_train, y_train,features,le_x,le_y


# 决策树可视化
def show(clf,feature,y_type):
    dot_data = tree.export_graphviz(clf,out_file='tree.dot',
                                    feature_names=feature,
                                    class_names=y_type,filled=True,
                                    rounded=True,special_characters=True)

    # 生成pdf
    with open('tree.dot') as f:
        dot_grapth = f.read()
    dot = graphviz.Source(dot_grapth)
    dot.view()






if __name__ == '__main__':
    mpl.rcParams["font.sans-serif"] = [u'simHei']
    mpl.rcParams['axes.unicode_minus'] = False
    # 加载数据
    x_train, y_train,features,le_x,le_y = loaddata()
    # 分类
    clf = DecisionTreeClassifier()
    clf.fit(x_train, y_train)
    y_type = [str(k) for k in np.unique(y_train)]
    # 可视化
    show(clf, features,y_type)

    # 预测
    X_show = pd.DataFrame([["青年", "否", "否", "一般"]])
    X_test = X_show.apply(le_x.transform)
    y_predict = clf.predict(X_test)
    # 结果输出
    X_show = [{features[i] :X_show.values[0][i]} for i in range(len(features))]
    print("{0}被分类为{1}".format(X_show,le_y.inverse_transform(y_predict)))

未经允许不得转载:作者:1147-柳同学, 转载或复制请以 超链接形式 并注明出处 拜师资源博客
原文地址:《3.2 决策树可视化》 发布于2021-01-10

分享到:
赞(0) 打赏

评论 抢沙发

评论前必须登录!

  注册



长按图片转发给朋友

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏

Vieu3.3主题
专业打造轻量级个人企业风格博客主题!专注于前端开发,全站响应式布局自适应模板。

登录

忘记密码 ?

您也可以使用第三方帐号快捷登录

Q Q 登 录
微 博 登 录