feat: implement bpe algorithm

This commit is contained in:
Sergey Penkovsky
2025-07-11 12:21:33 +03:00
parent 45eaaabd51
commit 362a7483e6
15 changed files with 714 additions and 1 deletions

13
tests/conftest.py Normal file
View File

@@ -0,0 +1,13 @@
import pytest
from simple_llm.tokenizer.simple_bpe import SimpleBPE
from simple_llm.tokenizer.optimize_bpe import OptimizeBPE
@pytest.fixture(scope="session")
def large_text():
"""Генерирует большой текст для тестирования"""
return " ".join(["мама мыла раму"] * 1000)
@pytest.fixture(params=[SimpleBPE, OptimizeBPE])
def bpe_class(request):
"""Возвращает классы BPE для тестирования"""
return request.param