feat: add LLaMA model implementation with RoPE positional encoding

- Added LLaMA model architecture with RMSNorm and SwiGLU activation
- Implemented Rotary Positional Embeddings (RoPE) for better positional encoding
- Created training script for LLaMA with BPE tokenizer
- Fixed matplotlib dependency version in uv.lock
- Added LLaMA module initialization

The implementation includes:
- TokenEmbeddings, HeadAttention, MultiHeadAttention with RoPE support
- RMSNorm normalization layer
- SwiGLU feed-forward activation
- Cached decoder implementation for efficient generation
This commit is contained in:
Sergey Penkovsky
2025-10-06 13:26:20 +03:00
parent 9898e8ee83
commit f30cd530a9
4 changed files with 626 additions and 3 deletions

4
uv.lock generated
View File

@@ -1759,7 +1759,6 @@ dependencies = [
dev = [
{ name = "black" },
{ name = "jupyter" },
{ name = "matplotlib" },
{ name = "mypy" },
{ name = "pytest" },
{ name = "ruff" },
@@ -1777,8 +1776,7 @@ requires-dist = [
{ name = "ipykernel" },
{ name = "jupyter", marker = "extra == 'dev'", specifier = ">=1.0.0" },
{ name = "llm", editable = "llm" },
{ name = "matplotlib" },
{ name = "matplotlib", marker = "extra == 'dev'", specifier = ">=1.0.0" },
{ name = "matplotlib", specifier = "==3.10.6" },
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
{ name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.0" },