feat: add LLaMA model implementation with RoPE positional encoding

- Added LLaMA model architecture with RMSNorm and SwiGLU activation - Implemented Rotary Positional Embeddings (RoPE) for better positional encoding - Created training script for LLaMA with BPE tokenizer - Fixed matplotlib dependency version in uv.lock - Added LLaMA module initialization The implementation includes: - TokenEmbeddings, HeadAttention, MultiHeadAttention with RoPE support - RMSNorm normalization layer - SwiGLU feed-forward activation - Cached decoder implementation for efficient generation
2026-01-23 21:10:54 +00:00 · 2025-10-06 13:26:20 +03:00
parent 9898e8ee83
commit f30cd530a9
4 changed files with 626 additions and 3 deletions
--- a/uv.lock
+++ b/uv.lock
@@ -1759,7 +1759,6 @@ dependencies = [
 dev = [
    { name = "black" },
    { name = "jupyter" },
-    { name = "matplotlib" },
    { name = "mypy" },
    { name = "pytest" },
    { name = "ruff" },
@@ -1777,8 +1776,7 @@ requires-dist = [
    { name = "ipykernel" },
    { name = "jupyter", marker = "extra == 'dev'", specifier = ">=1.0.0" },
    { name = "llm", editable = "llm" },
-    { name = "matplotlib" },
-    { name = "matplotlib", marker = "extra == 'dev'", specifier = ">=1.0.0" },
+    { name = "matplotlib", specifier = "==3.10.6" },
    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
    { name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.0" },