mirror of
https://github.com/pese-git/llm-arch-research.git
synced 2026-01-23 21:10:54 +00:00
feat(mistral): add Mistral model implementation and configs
- implement Mistral model in llm/models/mistral/mistral.py with GroupedQueryAttention, SwiGLU, RoPE, sliding window attention - add __init__.py for module export - add config files for mistral training and generation - update universal experiment runner to support Mistral model - add notebook for Mistral experiments
This commit is contained in:
@@ -42,6 +42,9 @@ def load_model_class(model_name):
|
||||
elif model_name.lower() == 'llama':
|
||||
from llm.models.llama import Llama
|
||||
return Llama
|
||||
elif model_name.lower() == 'mistral':
|
||||
from llm.models.mistral import Mistral
|
||||
return Mistral
|
||||
else:
|
||||
raise ValueError(f"Модель '{model_name}' не поддерживается.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user