Files
llm-arch-research/llm/tests/training/test_trainer.py
Sergey Penkovsky d947b7beb3 update and expand scientific docstrings for optimizer, scheduler, trainer
- Expanded module-level and function/class docstrings in optimizer.py, scheduler.py, and trainer.py
- Described mathematical foundations, theoretical motivations, and provided detailed usage examples for students
- All docstrings in Russian, clear scientific style

test(training): add comprehensive tests for optimizer, scheduler, and trainer modules

- Added new test files for get_optimizer, get_linear_schedule_with_warmup, and Trainer
- Tests cover parameter handling, edge cases, and expected learning dynamics (lr schedules and loss behavior)
- Trainer now logs average epoch losses to self.loss_history for testability and analysis

refactor(training/trainer): log epoch loss to loss_history for downstream analysis and tests

BREAKING CHANGE: Trainer.loss_history is a new attribute consolidating average losses per epoch, enabling robust learning dynamics assertions in tests
2025-10-17 16:25:39 +03:00

63 lines
2.6 KiB
Python

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from llm.training.trainer import Trainer
# Синтетический небольшой датасет для автогрессивной LM задачи
class ToyLMDataset(Dataset):
def __init__(self, num_samples=16, seq_len=8, vocab_size=16):
self.data = torch.randint(1, vocab_size, (num_samples, seq_len))
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
# labels == input_ids (identity task)
return {"input_ids": self.data[idx], "labels": self.data[idx]}
# Простая dummy-модель — 1 слой linear over vocab
class TinyModel(nn.Module):
def __init__(self, vocab_size=16, seq_len=8):
super().__init__()
self.linear = nn.Linear(seq_len, vocab_size)
def forward(self, x):
# logits: (batch, seq_len, vocab_size)
# Для простоты делаем транспонирование
return self.linear(x.float()).unsqueeze(1).expand(-1, x.shape[1], -1)
def test_train_runs_without_errors():
train_data = ToyLMDataset(num_samples=16, seq_len=8, vocab_size=16)
model = TinyModel(vocab_size=16, seq_len=8)
trainer = Trainer(model, train_data, lr=1e-3, batch_size=4, num_epochs=1, warmup_steps=2)
trainer.train()
def test_trainer_evaluate_runs():
train_data = ToyLMDataset(num_samples=8)
val_data = ToyLMDataset(num_samples=8)
model = TinyModel()
trainer = Trainer(model, train_data, val_data, lr=1e-3, batch_size=4, num_epochs=1, warmup_steps=2)
trainer.train()
trainer.evaluate()
def test_trainer_tuple_output():
# Модель, возвращающая кортеж (logits, extra)
class TupleModel(nn.Module):
def __init__(self, vocab_size=16, seq_len=8):
super().__init__()
self.linear = nn.Linear(seq_len, vocab_size)
def forward(self, x):
logits = self.linear(x.float()).unsqueeze(1).expand(-1, x.shape[1], -1)
extra = torch.zeros(1)
return logits, extra
train_data = ToyLMDataset(num_samples=8)
model = TupleModel()
trainer = Trainer(model, train_data, lr=1e-3, batch_size=2, num_epochs=1, warmup_steps=1)
trainer.train()
def test_trainer_loss_decreases():
train_data = ToyLMDataset(num_samples=32, seq_len=8, vocab_size=8)
model = TinyModel(vocab_size=8, seq_len=8)
trainer = Trainer(model, train_data, lr=0.05, batch_size=8, num_epochs=2, warmup_steps=1)
trainer.train()
avg_losses = trainer.loss_history
assert avg_losses[-1] <= avg_losses[0] or abs(avg_losses[-1] - avg_losses[0]) < 1e-3