๐ Model
- Pytorch์์ ์ ๊ณตํ๋ ViT model ์ฌ์ฉ(Vit_b_16)
- Link : Pre-trained weights, github code
๐ Dataset
- Kaggel์ Animal-10 dataset์ ๊ฐ์์ง์ ๊ณ ์์ด ์ด๋ฏธ์ง๋ง ๋ฝ์์ ์ฌ์ฉ
๐ Task
- ๊ฐ์์ง์ ๊ณ ์์ด ์ด๋ฏธ์ง๋ฅผ ๋ถ๋ฅ
1. ์ ์ฒด ์คํ ํ๋ฆ (Main)
์ ์ฒด ์ฝ๋์ ํ๋ฆ์ ์๋์ ๊ฐ๋ค.
def main():
# ๋ฐ์ดํฐ ์ค๋น
FINE_TUNE_N = 1024 # ๊ฐ ํด๋์ค๋ณ๋ก fine tuning์ ์ฌ์ฉํ ์ด๋ฏธ์ง ์
PREDICT_N = 100 # ์์ธก์ ์ฌ์ฉํ ์ ์ฒด ์ด๋ฏธ์ง ์
prepare_fine_tuning_dataset(animal_dir, fine_tuning_dir, FINE_TUNE_N)
prepare_prediction_dataset(animal_dir, predict_dir, PREDICT_N)
# ๋ชจ๋ธ fine tuning
model = fine_tune_model(fine_tuning_dir, num_epochs=20, batch_size=32, learning_rate=1e-4)
# ์์ธก ๋ฐ ๋ก๊ทธ ์ ์ฅ
predict_model(model, predict_dir, batch_size=8)
# ๋ก๊ทธ ๋น๊ต ๋ฐ ํ๊ฐ
evaluate_predictions()
โ kaggle์ dataset์ ๋ฐ์์ ํ๋ จ์ฉ ๋ฐ์ดํฐ์ ํ ์คํธ์ฉ ๋ฐ์ดํฐ๋ฅผ ๋๋ค์ผ๋ก ๋ฝ์ ๋ถ๋ฆฌ
- ๊ธฐ์กด model์ Imagenet์ผ๋ก Pre-train ๋์ด ์๊ธฐ ๋๋ฌธ์ ๋ณธ ์ฝ๋์ task์ธ ๊ฐ์์ง์ ๊ณ ์์ด ์ด๋ฏธ์ง๋ฅผ ๋ถ๋ฅํ๊ธฐ ์ํด ํ์ํจ
- ๋ฐ์ดํฐ๋ basic data์์ ๋๋ค์ผ๋ก ๋ฝ์์ train๊ณผ test๋ฅผ ํ ์ ์๋๋ก ์ฝ๋ ๊ตฌ์ฑ
- basic data๋ cat ํด๋์ dog ํด๋๋ก ๊ตฌ์ฑ์ด ๋์ด์๊ธฐ ๋๋ฌธ์ ์ด๋ฏธ์ง์ ๋ํ ๋ผ๋ฒจ์ ์ ๋ log.txt๋ก ํ์ต ๋ฐ ํ๊ฐ ์งํ
โก ํ๋ จ์ฉ ๋ฐ์ดํฐ๋ก Fine tuning ์งํ
- Main์์ ์ค์ ํ ๊ฐ์ ๊ธฐ์ค์ผ๋ก ์ฌ์ ํ์ต๋ Vision Transformer๋ฅผ ๋ถ๋ฌ์ Fine-tuning ์งํ
โข ํ ์คํธ์ฉ ๋ฐ์ดํธ Test ์งํ ํ, ์ ํ๋ ์ธก์
2. ๊ฒฐ๊ณผ ํ์ธ
์๋์ ํ ์คํธ train data = 1024, test data = 100์ผ๋ก ์ค์ ํ์ฌ ๋ถ๋ฅ ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ์๋ค.
๋ ๋ฒ์ ์ธก์ ๋ชจ๋ ๋ถ๋ฅ๊ฐ ์๋๋ ๊ฒ์ผ๋ก ํ์ธ๋๋ฉฐ, Fine tuning์ด ์ ๋จน์ฌ์ง ๊ฒ์ ํ์ธํ ์ ์๋ค.
(์ด๋ฐ์ ์ ํ๋๊ฐ ๋๋ฌด ๋ฎ์์ ํ๋ จ ๋ฐ์ดํฐ๋ฅผ ์ ์ฐจ ๋๋ ธ์)
3. ์ ์ฒด ์ฝ๋
import os
import glob
import random
import shutil
import re
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights
# ํด๋ ์์ผ๋ฉด ์์ฑํ๋ ํจ์
def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)
# [Step 3] Fine Tuning ๋ฐ์ดํฐ์
์ค๋น (๊ฐ ํด๋์ค์์ N๊ฐ์ฉ ๋ณต์ฌ)
def prepare_fine_tuning_dataset(animal_dir, fine_tuning_dir, N):
categories = ["cat", "dog"]
for category in categories:
category_dir = os.path.join(animal_dir, category)
images = glob.glob(os.path.join(category_dir, "*.jpeg"))
if len(images) < N:
print(f"{category_dir}์ ์ด๋ฏธ์ง๊ฐ ๋ถ์กฑํฉ๋๋ค. (ํ์:{N}, ์์:{len(images)})")
continue
selected = random.sample(images, N)
for i, img_path in enumerate(selected, start=1):
filename = f"{category}_Img{i}.jpeg"
dest_path = os.path.join(fine_tuning_dir, filename)
shutil.copy(img_path, dest_path)
print("Fine tuning ๋ฐ์ดํฐ์
์ค๋น ์๋ฃ.")
# [Step 4] ์์ธก ๋ฐ์ดํฐ์
์ค๋น ๋ฐ DataLog.txt ์์ฑ
def prepare_prediction_dataset(animal_dir, predict_dir, N):
categories = ["cat", "dog"]
all_images = []
for category in categories:
category_dir = os.path.join(animal_dir, category)
images = glob.glob(os.path.join(category_dir, "*.jpeg"))
for img_path in images:
all_images.append((img_path, category))
if len(all_images) < N:
print(f"์ ์ฒด ์ด๋ฏธ์ง๊ฐ ๋ถ์กฑํฉ๋๋ค. (ํ์:{N}, ์์:{len(all_images)})")
N = len(all_images)
selected = random.sample(all_images, N)
random.shuffle(selected) # ์์๋ฅผ ์์
data_log_path = os.path.join("Data", "DataLog.txt")
with open(data_log_path, "w") as f:
for i, (img_path, category) in enumerate(selected, start=1):
filename = f"Img{i}.jpeg"
dest_path = os.path.join(predict_dir, filename)
shutil.copy(img_path, dest_path)
f.write(f"{filename}: {category}\n")
print("Predict ๋ฐ์ดํฐ์
์ค๋น ์๋ฃ ๋ฐ DataLog.txt ์์ฑ๋จ.")
# Fine Tuning์ฉ ์ปค์คํ
๋ฐ์ดํฐ์
(ํ์ผ๋ช
์ผ๋ก๋ถํฐ label ์ถ์ถ)
class FineTuneDataset(Dataset):
def __init__(self, root, transform=None):
self.root = root
self.transform = transform
self.image_files = [f for f in os.listdir(root) if f.endswith(".jpeg")]
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
filename = self.image_files[idx]
# ํ์ผ๋ช
ํ์: "cat_Img{i}.jpeg" ๋๋ "dog_Img{i}.jpeg"
label_str = filename.split("_")[0]
label = 0 if label_str.lower() == "cat" else 1
img_path = os.path.join(self.root, filename)
image = Image.open(img_path).convert("RGB")
if self.transform:
image = self.transform(image)
return image, label
# ์์ธก์ฉ ์ปค์คํ
๋ฐ์ดํฐ์
(ํ์ผ๋ช
์ ๋ ฌ)
class PredictDataset(Dataset):
def __init__(self, root, transform=None):
self.root = root
self.transform = transform
self.image_files = [f for f in os.listdir(root) if f.endswith(".jpeg")]
self.image_files.sort(key=lambda x: int(re.search(r"(\d+)", x).group(1)))
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
filename = self.image_files[idx]
img_path = os.path.join(self.root, filename)
image = Image.open(img_path).convert("RGB")
if self.transform:
image = self.transform(image)
return image, filename
def fine_tune_model(fine_tuning_dir, num_epochs=10, batch_size=8, learning_rate=1e-4):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
dataset = FineTuneDataset(fine_tuning_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# ImageNet์ผ๋ก ์ฌ์ ํ์ต๋ Vision Transformer ๋ถ๋ฌ์ค๊ธฐ
weights = ViT_B_16_Weights.IMAGENET1K_V1
model = vit_b_16(weights=weights)
# ๋ถ๋ฅ head๋ฅผ 2 ํด๋์ค ๋ถ๋ฅ๋ก ๋ณ๊ฒฝ (cat:0, dog:1)
num_features = model.heads.head.in_features
model.heads.head = nn.Linear(num_features, 2)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for images, labels in dataloader:
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * images.size(0)
epoch_loss = running_loss / len(dataset)
print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
# loss๊ฐ 0.001 ์ดํ์ด๋ฉด ํ์ต ์ค๋จ
if epoch_loss < 0.001:
print("Loss threshold reached. Stopping training.")
break
print("Fine tuning ์๋ฃ.")
return model
# [Step 4 ~ 5] ์์ธก ๋ฐ PredictLog.txt ๊ธฐ๋ก ํจ์
def predict_model(model, predict_dir, batch_size=8):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
dataset = PredictDataset(predict_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
model.eval()
predictions = {}
with torch.no_grad():
for images, filenames in dataloader:
images = images.to(device)
outputs = model(images)
_, preds = torch.max(outputs, 1)
preds = preds.cpu().numpy()
for filename, pred in zip(filenames, preds):
# ์ซ์ label์ ๋ฌธ์์ด(label)๋ก ๋ณํ
pred_label = "cat" if pred == 0 else "dog"
predictions[filename] = pred_label
predict_log_path = os.path.join("Data", "PredictLog.txt")
with open(predict_log_path, "w") as f:
# ํ์ผ๋ช
์์๋๋ก ๊ธฐ๋ก (Img1.jpeg, Img2.jpeg, …)
filenames_sorted = sorted(predictions.keys(), key=lambda x: int(re.search(r"(\d+)", x).group(1)))
for filename in filenames_sorted:
f.write(f"{filename}: {predictions[filename]}\n")
print("์์ธก ์๋ฃ ๋ฐ PredictLog.txt ์์ฑ๋จ.")
# [Step 6] DataLog.txt์ PredictLog.txt ๋น๊ตํ์ฌ ์ ํ๋ ํ๊ฐ ๋ฐ ์๋ชป ์์ธก๋ ์ด๋ฏธ์ง ํ์ผ ์ถ๋ ฅ
def evaluate_predictions():
data_log_path = os.path.join("Data", "DataLog.txt")
predict_log_path = os.path.join("Data", "PredictLog.txt")
with open(data_log_path, "r") as f:
data_lines = f.readlines()
with open(predict_log_path, "r") as f:
predict_lines = f.readlines()
if len(data_lines) != len(predict_lines):
print("๊ฒฝ๊ณ : DataLog์ PredictLog์ ํญ๋ชฉ ์๊ฐ ๋ค๋ฆ
๋๋ค.")
total = min(len(data_lines), len(predict_lines))
correct = 0
misclassified = []
for i in range(total):
# ๊ฐ ์ค์ "ImgX.jpeg: label" ํ์
data_parts = data_lines[i].strip().split(":")
predict_parts = predict_lines[i].strip().split(":")
filename = data_parts[0].strip()
true_label = data_parts[1].strip()
pred_label = predict_parts[1].strip()
if true_label == pred_label:
correct += 1
else:
misclassified.append((filename, true_label, pred_label))
print(f"์ ํ๋: {correct} / {total} (์ผ์นํ๋ ๊ฐ์)")
if misclassified:
print("\n์๋ชป ์์ธก๋ ์ด๋ฏธ์ง ํ์ผ๋ค:")
for filename, true_label, pred_label in misclassified:
print(f"{filename}: ์ค์ = {true_label}, ์์ธก = {pred_label}")
else:
print("\n๋ชจ๋ ์ด๋ฏธ์ง๊ฐ ์ ํํ๊ฒ ์์ธก๋์์ต๋๋ค.")
# ์ ์ฒด ์คํ ํ๋ฆ
def main():
# ๋๋ ํ ๋ฆฌ ์ค์
base_dir = "Data"
animal_dir = os.path.join(base_dir, "Animal-10")
fine_tuning_dir = os.path.join(base_dir, "Fine_tuning")
predict_dir = os.path.join(base_dir, "Predict")
# ํด๋ ์์ฑ (์์ผ๋ฉด)
create_dir(fine_tuning_dir)
create_dir(predict_dir)
# ์ฌ์ฉํ ์ด๋ฏธ์ง ๊ฐ์ (ํ์์ ๋ฐ๋ผ ์กฐ์ )
FINE_TUNE_N = 1024 # ๊ฐ ํด๋์ค๋ณ๋ก fine tuning์ ์ฌ์ฉํ ์ด๋ฏธ์ง ์
PREDICT_N = 100 # ์์ธก์ ์ฌ์ฉํ ์ ์ฒด ์ด๋ฏธ์ง ์
# ๋ฐ์ดํฐ ์ค๋น
prepare_fine_tuning_dataset(animal_dir, fine_tuning_dir, FINE_TUNE_N)
prepare_prediction_dataset(animal_dir, predict_dir, PREDICT_N)
# ๋ชจ๋ธ fine tuning
model = fine_tune_model(fine_tuning_dir, num_epochs=20, batch_size=32, learning_rate=1e-4)
# ์์ธก ๋ฐ ๋ก๊ทธ ์ ์ฅ
predict_model(model, predict_dir, batch_size=8)
# ๋ก๊ทธ ๋น๊ต ๋ฐ ํ๊ฐ
evaluate_predictions()
if __name__ == '__main__':
main()
'AI > Deep Learning' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[Transformer] C๋ก Transformer ๊ตฌํํ๊ธฐ (0) | 2025.02.26 |
---|---|
Attention is All You Need(Transformer) Pytorch๋ก ๊ตฌํ (0) | 2025.01.14 |
[Transformer ์ ๋ฆฌ] 03. Positional Encoding๊ณผ ํน์ ํ ํฐ (0) | 2025.01.13 |
[Transformer ์ ๋ฆฌ] 02. ํธ๋์คํฌ๋จธ ๊ธฐ๋ณธ ๊ตฌ์กฐ (0) | 2025.01.13 |
[Transformer ์ ๋ฆฌ] 01. ๊ฐ์ (2) | 2024.12.26 |