fix: update PyTorch mask types and BPE tokenizer serialization

- Replace deprecated torch.uint8 and .byte() with torch.bool in GPT.generate - Add save/load methods to BPETokenizer for proper merges and vocab_list serialization - Update dependencies in pyproject.toml
2026-01-24 05:21:16 +00:00 · 2025-10-05 08:09:30 +03:00
parent ec07546ea8
commit f4bdc81829
4 changed files with 110 additions and 9 deletions
--- a/llm/pyproject.toml
+++ b/llm/pyproject.toml
@@ -12,6 +12,13 @@ dependencies = [
    "numpy>=1.24.0",
 ]

+[project.optional-dependencies]
+test = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "pytest-mock>=3.0.0",
+]
+
 [build-system]
 requires = ["uv_build>=0.8.22,<0.9.0"]
 build-backend = "uv_build"