mirror of
https://github.com/amazon-science/chronos-forecasting.git
synced 2024-11-25 16:51:05 +08:00
ac6ee36ace
Fixes https://github.com/amazon-science/chronos-forecasting/issues/181. Chronos' tokenizer has a vocabulary size of `n_tokens`. Among these, there are `n_special_tokens` reserved for EOS, PAD, etc. and `n_tokens - n_special_tokens` allocated to numerical values. However, the provided `MeanScaleUniformBins` tokenizer creates` n_tokens - n_special_tokens + 1` different buckets, resulting in a total of `n_tokens + 1` possible tokens. This causes training and inference errors when one of the data points gets allocated to the largest bucket, as the model requires 0 <= token_id < n_tokens. This PR modifies the `MeanScaleUniformBins` tokenizer, so that it creates one less bucket for numerical values. --- By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Lorenzo Stella <lorenzostella@gmail.com>
20 lines
522 B
TOML
20 lines
522 B
TOML
[project]
|
|
name = "chronos"
|
|
version = "1.2.1"
|
|
requires-python = ">=3.8"
|
|
license = { file = "LICENSE" }
|
|
dependencies = [
|
|
"torch~=2.0", # package was tested on 2.2
|
|
"transformers~=4.30",
|
|
"accelerate",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
test = ["pytest~=8.0", "numpy~=1.21"]
|
|
typecheck = ["mypy~=1.9"]
|
|
training = ["gluonts[pro]", "numpy", "tensorboard", "typer", "typer-config", "joblib", "scikit-learn"]
|
|
evaluation = ["gluonts[pro]", "datasets", "numpy", "typer"]
|
|
|
|
[tool.mypy]
|
|
ignore_missing_imports = true
|