mirror of
https://github.com/pese-git/llm-arch-research.git
synced 2026-01-23 21:10:54 +00:00
feat: add LLaMA model implementation with RoPE positional encoding
- Added LLaMA model architecture with RMSNorm and SwiGLU activation - Implemented Rotary Positional Embeddings (RoPE) for better positional encoding - Created training script for LLaMA with BPE tokenizer - Fixed matplotlib dependency version in uv.lock - Added LLaMA module initialization The implementation includes: - TokenEmbeddings, HeadAttention, MultiHeadAttention with RoPE support - RMSNorm normalization layer - SwiGLU feed-forward activation - Cached decoder implementation for efficient generation
This commit is contained in:
4
uv.lock
generated
4
uv.lock
generated
@@ -1759,7 +1759,6 @@ dependencies = [
|
||||
dev = [
|
||||
{ name = "black" },
|
||||
{ name = "jupyter" },
|
||||
{ name = "matplotlib" },
|
||||
{ name = "mypy" },
|
||||
{ name = "pytest" },
|
||||
{ name = "ruff" },
|
||||
@@ -1777,8 +1776,7 @@ requires-dist = [
|
||||
{ name = "ipykernel" },
|
||||
{ name = "jupyter", marker = "extra == 'dev'", specifier = ">=1.0.0" },
|
||||
{ name = "llm", editable = "llm" },
|
||||
{ name = "matplotlib" },
|
||||
{ name = "matplotlib", marker = "extra == 'dev'", specifier = ">=1.0.0" },
|
||||
{ name = "matplotlib", specifier = "==3.10.6" },
|
||||
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
|
||||
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
|
||||
{ name = "pytest", marker = "extra == 'test'", specifier = ">=8.0.0" },
|
||||
|
||||
Reference in New Issue
Block a user