Pytorch机器学习经典算法——逻辑回归

导入基本库

import torch
import torch.nn as nn
from sklearn import datasets
from sklearn.preprocessing import StandardScaler#缩放特征
from sklearn.model_selection import train_test_split#分离测试与训练数据

导入乳腺癌数据集

打印出数据集的样本值和特征值

bc=datasets.load_breast_cancer()
X,y=bc.data,bc.target#X是基本数据，y是已知结果
n_samples,n_features=X.shape#X的样本值和特征值
print(n_samples,n_features)

输出结果

569 30

569个样本和30个特征 ### 对导入的数据进行处理

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1234)#放入测试部分是20%,random_state的值相当于一种规则，通过设定为相同的数，每次分割的结果都是相同的

sc=StandardScaler()#使特征具有零均值和单位变量
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

X_train=torch.from_numpy(X_train.astype(np.float32))#将X_train数据转换为tensor且是float32型的数据
X_test=torch.from_numpy(X_test.astype(np.float32))
y_train=torch.from_numpy(y_train.astype(np.float32))
y_test=torch.from_numpy(y_test.astype(np.float32))

y_train=y_train.view(y_train.shape[0],1)#重塑y的tensor
y_test=y_test.view(y_test.shape[0],1)#重塑y的tensor,让他从一行变成一列

搭建模型

这里只需要一个线性层

class LogisticRegression(nn.Module):
    def __init__(self,n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear=nn.Linear(n_input_features,1)#我们只需要一个标签

    def forward(self,x):
        y_pred=torch.sigmoid(self.linear(x))
        return y_pred

搭建优化器和损失函数

1
2
3

model=LogisticRegression(n_features)#30个输入特征和一个输出特征
criterian=nn.BCELoss()#二分类交叉熵损失
optimizer=torch.optim.SGD(model.parameters(),lr=0.01)#随机梯度下降法

对每一轮计算Loss

epochs=100
for epoch in range(epochs):
    y_pred=model(X_train)
    loss=criterian(y_pred,y_train)

    loss.backward()

    optimizer.step()

    optimizer.zero_grad()

    if(epoch+1)%10==0:
        print(f'epoch:{epoch+1},loss={loss.item():.4f}')

评估模型准确度

with torch.no_grad():#对模型评估
    y_pred = model(X_test)
    y_pred_cls=y_pred.round()#因为之前sigmoid函数已经将其放入了0-1之间，round的作用是将其以0.5为依据变成0或1
    acc=y_pred_cls.eq(y_test).sum()/float(y_test.shape[0])#判断预测值与实际值是否相等，相等的话就加1
    print(f'accuracy={acc:.4f}')

输出结果

epoch:10,loss=0.5711
epoch:20,loss=0.4670
epoch:30,loss=0.4019
epoch:40,loss=0.3575
epoch:50,loss=0.3251
epoch:60,loss=0.3003
epoch:70,loss=0.2806
epoch:80,loss=0.2645
epoch:90,loss=0.2511
epoch:100,loss=0.2396
accuracy=0.9123

可以更改训练次数和学习率来提高模型准确度