Everyone has their preferred method for writing and debugging code. I like to write code in Jupyter notebook format, executing and debugging it block by block.
However, this approach can become visually cluttered and hard to follow as complexity increases, making it challenging for others to understand the code during collaboration. Therefore, I think writing code in .py file format is the best practice.
Of course, debugging is still possible even when the code is written in .py files.
Additionally, when writing code, its useful to gather all the variables that need to be changed so they can be adjusted in one place.
The entire code used in the precious post(102_Dropout Layers) is as follows.
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_set = torchvision.datasets.MNIST(root='./data', train=True, download = True,
transform=transforms.Compose([transforms.ToTensor()]))
training_size = int(0.8*len(train_set))
test_size = len(train_set)-training_size
training_dataset, test_dataset = random_split(train_set, [training_size, test_size])
training_loader = DataLoader(training_dataset, batch_size= 64, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size=64)
class DNN(torch.nn.Module):
def __init__(self):
super(DNN, self).__init__()
self.input = torch.nn.Linear(28*28, 512)
self.hidden1 = torch.nn.Linear(512, 256)
self.hidden2 = torch.nn.Linear(256, 128)
self.hidden3 = torch.nn.Linear(128, 64)
self.dropout = torch.nn.Dropout(0.3)
self.output = torch.nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 28*28)
x = torch.relu(self.input(x))
x = self.dropout(x)
x = torch.relu(self.hidden1(x))
x = self.dropout(x)
x = torch.relu(self.hidden2(x))
x = self.dropout(x)
x = torch.relu(self.hidden3(x))
x = self.dropout(x)
x = self.output(x)
return x
def train(model, training_loader, test_loader, device, epochs = 10):
training_losses = []
test_losses = []
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
for epoch in range(epochs):
model.train()
running_loss = 0.0
for inputs, labels in tqdm(training_loader, desc=f"Epoch {epoch+1}/{epochs}"):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
training_losses.append(running_loss / len(training_loader))
model.eval()
test_loss = 0.0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
test_losses.append(test_loss / len(test_loader))
print(f'Epoch {epoch+1}, Training Loss: {training_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}')
return training_losses, test_losses
s = time.time()
model = DNN().to(device)
training_losses, test_losses = train(model, training_loader, test_loader, device)
print("total time : ",time.time()-s)
plt.plot(training_losses, label='Training loss')
plt.plot(test_losses, label='Test loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
If tou want to change the number of layers in this code, you currently have to add each layer manually. Since the model passes through linear activation, and dropout layers in a similar manner, you can make this process more convenient by specifying the number of layers and then using a loop to automatically add them.
This approach will modify the code to dynamically adjust the model's architecture based on specified parameters.
class DNN(torch.nn.Module):
def __init__(self, n_layer, feature):
super().__init__()
self.input = torch.nn.Linear(28*28, feature)
self.act = torch.nn.ReLU()
self.hidden = torch.nn.ModuleList()
self.dropout = torch.nn.Dropout(0.3)
steps = n_layer - 1
reduce = max(steps // feature // 64, 1)
for i in range(n_layer-1):
if (i+1) % reduce == 0 and feature // 2 >= 64:
next_feature = feature // 2
else:
next_feature = feature
self.hidden.append(torch.nn.Linear(feature,next_feature))
feature = next_feature
self.hidden.append(torch.nn.Linear(feature,64))
self.output = torch.nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 28*28)
x = self.input(x)
x = self.act(x)
x = self.dropout(x)
for layer in self.hidden:
x = layer(x)
x = self.act(x)
x = self.dropout(x)
x = self.output(x)
return x
Upon completing the writing, it might seem more complicated, but I think there's a more effective and concise way to represent what I want to do in code, and I've written it according to the method that came to mind.
To explain the code, unlike before, the DNN class now takes the number of hidden layers, 'n_layer', and the number of neurons, 'feature', as inputs. The hidden layers, composed of a 'ModuleList', are created and appended through a loop for the number of 'n_layer'.
Here, the output feature of the input layer is set to the specified 'feature' value. The process then either halves or maintains the 'feature' value for subsequent layers based on the number of layers, as determined by an if statement.
The input feature size of the output layer is fixed at 64, so the last out feature of the hidden layer is 64.
if __name__ == "__main__":
batch_size = 128
n_layer = 5
feature = 1024
epochs = 10
dropout = 0.3
main(batch_size, n_layer, feature, epochs, dropout)
The entire code has been functionalized to accept necessary parameter values as variables. Through such a code structre, parameters can be easily modified, and it becomes visually clear what values are inputted. This approach enhances the readability and maintainability of the code by allowing for straightforward adjustments to model parameters and configurations.
Although it's just modification of the code, the results are as described, and the entire code is as follows.
Looking at the code as a whole, it may not seem significantly changed or more efficient, but I'm reasonably satisfied with it in my own way.
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
torch.manual_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def main(batch_size, n_layer, feature, epochs, dropout):
train_set = torchvision.datasets.MNIST(root='./data', train=True, download = True,
transform=transforms.Compose([transforms.ToTensor()]))
training_size = int(0.8*len(train_set))
test_size = len(train_set)-training_size
training_dataset, test_dataset = random_split(train_set, [training_size, test_size])
training_loader = DataLoader(training_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size)
class DNN(torch.nn.Module):
def __init__(self, n_layer, feature, dropout):
super().__init__()
self.input = torch.nn.Linear(28*28, feature)
self.act = torch.nn.ReLU()
self.hidden = torch.nn.ModuleList()
self.dropout = torch.nn.Dropout(dropout)
steps = n_layer - 1
reduce = max(steps // feature // 64, 1)
for i in range(n_layer-1):
if (i+1) % reduce == 0 and feature // 2 >= 64:
next_feature = feature // 2
else:
next_feature = feature
self.hidden.append(torch.nn.Linear(feature,next_feature))
feature = next_feature
self.hidden.append(torch.nn.Linear(feature,64))
self.output = torch.nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 28*28)
x = self.input(x)
x = self.act(x)
x = self.dropout(x)
for layer in self.hidden:
x = layer(x)
x = self.act(x)
x = self.dropout(x)
x = self.output(x)
return x
def train(model, training_loader, test_loader, device, epochs = epochs):
training_losses = []
test_losses = []
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
for epoch in range(epochs):
model.train()
running_loss = 0.0
for inputs, labels in tqdm(training_loader, desc=f"Epoch {epoch+1}/{epochs}"):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
training_losses.append(running_loss / len(training_loader))
model.eval()
test_loss = 0.0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
test_losses.append(test_loss / len(test_loader))
print(f'Epoch {epoch+1}, Training Loss: {training_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}')
return training_losses, test_losses
s = time.time()
model = DNN(n_layer, feature, dropout).to(device)
training_losses, test_losses = train(model, training_loader, test_loader, device)
print("total time : ",time.time()-s)
plt.plot(training_losses, label='Training loss')
plt.plot(test_losses, label='Test loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
if __name__ == "__main__":
batch_size = 128
n_layer = 5
feature = 1024
epochs = 10
dropout = 0.3
main(batch_size, n_layer, feature, epochs, dropout)
'Deep Learning' 카테고리의 다른 글
163_Basic Graph Properties (0) | 2024.05.12 |
---|---|
111_MNIST Classification with Keras (vs PyTorch) (0) | 2024.03.20 |
102_Dropout Layers (0) | 2024.03.11 |
101_Overfitting (0) | 2024.03.10 |
100_DNN Example using PyTorch (0) | 2024.03.09 |