update docs & fix bug (#1926)

This commit is contained in:
Jintao 2024-09-04 11:32:59 +08:00 committed by GitHub
parent 8c41771e9f
commit 4b72dc834d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 72 additions and 79 deletions

View File

@ -69,8 +69,8 @@ You can contact us and communicate with us by adding our group:
- 🔥2024.08.07: Support for using vLLM for accelerating inference and deployment of multimodal large models such as the llava series and phi3-vision models. You can refer to the [Multimodal & vLLM Inference Acceleration Documentation](docs/source_en/Multi-Modal/vllm-inference-acceleration.md) for more information.
- 2024.08.06: Support for minicpm-v-v2_6-chat is available. You can use `swift infer --model_type minicpm-v-v2_6-chat` for inference experience. Best practices can be found [here](https://github.com/modelscope/swift/issues/1613).
- 2024.08.06: Supports internlm2.5 series of 1.8b and 20b. Experience it using `swift infer --model_type internlm2_5-1_8b-chat`.
- 🔥2024.08.05: Support evaluation for multi-modal models! Same command with [new datasets](https://swift.readthedocs.io/en/latest/LLM/LLM-eval.html#introduction).
- 🔥2024.08.02: Support Fourier Ft. Use `--sft_type fourierft` to begin, Check parameter documentation [here](https://swift.readthedocs.io/en/latest/LLM/Command-line-parameters.html).
- 🔥2024.08.05: Support evaluation for multi-modal models! Same command with [new datasets](https://swift.readthedocs.io/en/latest/Instruction/LLM-eval.html#introduction).
- 🔥2024.08.02: Support Fourier Ft. Use `--sft_type fourierft` to begin, Check parameter documentation [here](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html).
- 🔥2024.07.29: Support the use of lmdeploy for inference acceleration of LLM and VLM models. Documentation can be found [here](docs/source_en/Multi-Modal/LmDeploy-inference-acceleration.md).
- 🔥2024.07.24: Support DPO/ORPO/SimPO/CPO alignment algorithm for vision MLLM, training scripts can be find in [Document](docs/source_en/Multi-Modal/human-preference-alignment-training-documentation.md). support RLAIF-V dataset.
- 🔥2024.07.24: Support using Megatron for CPT and SFT on the Qwen2 series. You can refer to the [Megatron training documentation](docs/source_en/LLM/Megatron-training.md).
@ -89,7 +89,7 @@ You can contact us and communicate with us by adding our group:
- 2024.07.06: Support codegeex4-9b-chat.
- 2024.07.04: Support internlm2_5-7b series: internlm2_5-7b, internlm2_5-7b-chat, internlm2_5-7b-chat-1m.
- 2024.07.02: Support for `llava1_6-vicuna-7b-instruct`, `llava1_6-vicuna-13b-instruct` and other llava-hf models. For best practices, refer to [here](docs/source_en/Multi-Modal/llava-best-practice.md).
- 🔥2024.06.29: Support [eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass) for evaluation! Now we have supported over 50 eval datasets like `BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`, please check our [Eval Doc](https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/LLM-eval.md) to begin! Next sprint we will support Multi-modal and Agent evaluation, remember to follow us : )
- 🔥2024.06.29: Support [eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass) for evaluation! Now we have supported over 50 eval datasets like `BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`, please check our [Eval Doc](https://github.com/modelscope/swift/blob/main/docs/source_en/Instruction/LLM-eval.md) to begin! Next sprint we will support Multi-modal and Agent evaluation, remember to follow us : )
- 🔥2024.06.28: Support for **Florence** series model! See [document](docs/source_en/Multi-Modal/florence-best-pratice.md)
- 🔥2024.06.28: Support for Gemma2 series models: gemma2-9b, gemma2-9b-instruct, gemma2-27b, gemma2-27b-instruct.
@ -109,7 +109,7 @@ You can contact us and communicate with us by adding our group:
- 🔥2024.05.17: Support peft=0.11.0. Meanwhile support 3 new tuners: `BOFT`, `Vera` and `Pissa`. use `--sft_type boft/vera` to use BOFT or Vera, use `--init_lora_weights pissa` with `--sft_type lora` to use Pissa.
- 2024.05.16: Supports Llava-Next (Stronger) series models. For best practice, you can refer to [here](https://github.com/modelscope/swift/tree/main/docs/source_en/Multi-Modal/llava-best-practice.md).
- 🔥2024.05.13: Support Yi-1.5 series modelsuse `--model_type yi-1_5-9b-chat` to begin!
- 2024.05.11: Support for qlora training and quantized inference using [hqq](https://github.com/mobiusml/hqq) and [eetq](https://github.com/NetEase-FuXi/EETQ). For more information, see the [LLM Quantization Documentation](https://github.com/modelscope/swift/tree/main/docs/source_en/LLM/LLM-quantization-and-export.md).
- 2024.05.11: Support for qlora training and quantized inference using [hqq](https://github.com/mobiusml/hqq) and [eetq](https://github.com/NetEase-FuXi/EETQ). For more information, see the [LLM Quantization Documentation](https://github.com/modelscope/swift/tree/main/docs/source_en/Instruction/LLM-quantization-and-export.md).
- 2024.05.10: Support split a sequence to multiple GPUs to reduce memory usage. Use this feature by `pip install .[seq_parallel]`, then add `--sequence_parallel_size n` to your DDP script to begin!
- 2024.05.08: Support DeepSeek-V2-Chat model, you can refer to [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/deepseek-v2-chat/lora_ddp_ds3/sft.sh).Support InternVL-Chat-V1.5-Int8 model, for best practice, you can refer to [here](https://github.com/modelscope/swift/tree/main/docs/source_en/Multi-Modal/internvl-best-practice.md).
- 🔥2024.05.07: Supoprts **ORPO** training! See [document](https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/ORPO.md) to start training!
@ -154,7 +154,7 @@ You can contact us and communicate with us by adding our group:
- 2024.02.25: Support `swift export` to quantize models using **AWQ/GPTQ** and push to ModelScope Hub. See documentation: [LLM Quantization](docs/source_en/Instruction/LLM-quantization-and-export.md).
- 2024.02.22: Support gemma series: gemma-2b, [gemma-2b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/gemma_2b_instruct), gemma-7b, gemma-7b-instruct.
- 2024.02.16: Support deepseek-math series: deepseek-math-7b, deepseek-math-7b-instruct, deepseek-math-7b-chat.
- 🔥2024.02.05: Support **Qwen1.5** series models, see [model list](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B) for all supported Qwen1.5 models. Provide fine-tuning scripts for [qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8).
- 🔥2024.02.05: Support **Qwen1.5** series models, see [model list](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B) for all supported Qwen1.5 models. Provide fine-tuning scripts for [qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8).
- 2024.02.05: Support training of diffusion models such as **SDXL**, **SD**, **ControlNet**, as well as **DreamBooth** training. See corresponding [training scripts](https://github.com/modelscope/swift/tree/main/examples/pytorch/sdxl/scripts) for details.
- 2024.02.01: Support minicpm series: [minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/minicpm_2b_sft_chat), minicpm-2b-chat.
- 🔥2024.02.01: Support dataset mixing to reduce **catastrophic forgetting**. Use `--train_dataset_mix_ratio 2.0` to enable training! We also open sourced the general knowledge dataset [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
@ -182,9 +182,9 @@ You can contact us and communicate with us by adding our group:
- 2023.12.18: Support VLLM for inference acceleration.
- 2023.12.15: Support deepseek, deepseek-coder series: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, openbuddy-deepseek-67b-chat, deepseek-coder-1_3b, deepseek-coder-1_3b-instruct, deepseek-coder-6_7b, deepseek-coder-6_7b-instruct, deepseek-coder-33b, deepseek-coder-33b-instruct.
- 2023.12.13: Support mistral-7b-instruct-v2, [mixtral-moe-7b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe), [mixtral-moe-7b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe_instruct).
- 2023.12.09: Support `freeze_parameters_ratio` parameter as a compromise between lora and full-parameter training. Corresponding sh can be found in [full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). Support `disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc` parameters, see [command line arguments](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md) for details.
- 2023.12.09: Support `freeze_parameters_ratio` parameter as a compromise between lora and full-parameter training. Corresponding sh can be found in [full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). Support `disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc` parameters, see [command line arguments](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md) for details.
- 2023.12.08: Support [sus-34b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/sus_34b_chat), support yi-6b-200k, yi-34b-200k.
- 2023.12.07: Support [Multi-Node DDP training](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
- 2023.12.07: Support [Multi-Node DDP training](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
- 2023.12.05: Support models: zephyr-7b-beta-chat, openbuddy-zephyr-7b-chat. Support datasets: hc3-zh, hc3-en.
- 🔥2023.12.02: [Self-cognition fine-tuning best practices](docs/source_en/LLM/Self-cognition-best-practice.md), **10 minutes to fine-tune a large model for self-cognition**, create your own unique large model.
- 🔥2023.11.30: Support training and inference of **qwen-1_8b**, **qwen-72b**, **qwen-audio** series models. Corresponding sh scripts can be found in [qwen_1_8b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_1_8b_chat), [qwen_72b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_72b_chat), [qwen_audio_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_audio_chat)

View File

@ -70,8 +70,8 @@ SWIFT具有丰富全面的文档请查看我们的文档网站:
- 🔥2024.08.07: 支持使用vllm对多模态大模型: llava系列, internvl2系列, phi3-vision, minicpm-v2.5进行推理加速和部署. 可以查看[多模态&vLLM推理加速文档](docs/source/Multi-Modal/vLLM推理加速文档.md)获取更多信息.
- 2024.08.06: 支持minicpm-v-v2_6-chat, 使用`swift infer --model_type minicpm-v-v2_6-chat`进行推理体验, 最佳实践可以查看[这里](https://github.com/modelscope/swift/issues/1613).
- 2024.08.06: 支持internlm2.5的1.8b和20b系列. 使用`swift infer --model_type internlm2_5-1_8b-chat`进行体验.
- 🔥2024.08.05: 支持多模态数据集的评测!命令行完全一致,新增了许多[多模态数据集](https://swift.readthedocs.io/zh-cn/latest/LLM/LLM%E8%AF%84%E6%B5%8B%E6%96%87%E6%A1%A3.html#id2).
- 🔥2024.08.02: 支持Fourier Ft训练. 使用方式为`--sft_type fourierft`, 参数可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html).
- 🔥2024.08.05: 支持多模态数据集的评测!命令行完全一致,新增了许多[多模态数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/LLM%E8%AF%84%E6%B5%8B%E6%96%87%E6%A1%A3.html#id2).
- 🔥2024.08.02: 支持Fourier Ft训练. 使用方式为`--sft_type fourierft`, 参数可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html).
- 🔥2024.07.29: 支持使用lmdeploy对LLM和VLM模型进行推理加速. 文档可以查看[这里](docs/source/Multi-Modal/LmDeploy推理加速文档.md).
- 🔥2024.07.24: 人类偏好对齐算法支持视觉多模态大模型, 包括DPO/ORPO/SimPO/CPO, 训练参考[文档](docs/source/Multi-Modal/人类偏好对齐训练文档.md). 支持数据集RLAIF-V.
- 🔥2024.07.24: 支持使用megatron对qwen2系列进行CPT和SFT. 可以查看[megatron训练文档](docs/source/LLM/Megatron训练文档.md).
@ -90,7 +90,7 @@ SWIFT具有丰富全面的文档请查看我们的文档网站:
- 2024.07.06: 支持codegeex4-9b-chat.
- 2024.07.04: 支持internlm2_5-7b系列: internlm2_5-7b, internlm2_5-7b-chat, internlm2_5-7b-chat-1m.
- 2024.07.02: 支持`llava1_6-vicuna-7b-instruct`, `llava1_6-vicuna-13b-instruct`等llava-hf模型. 最佳实践可以查看[这里](docs/source/Multi-Modal/llava最佳实践.md).
- 🔥2024.06.29: 支持[eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass)评测! 我们支持了包含`BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`等50+标准数据集在内的评测流程, 请查看我们的[评测文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM评测文档.md)来使用。下个迭代我们会支持多模态评测和Agent评测记得持续关注我们: )
- 🔥2024.06.29: 支持[eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass)评测! 我们支持了包含`BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`等50+标准数据集在内的评测流程, 请查看我们的[评测文档](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM评测文档.md)来使用。下个迭代我们会支持多模态评测和Agent评测记得持续关注我们: )
- 🔥2024.06.28: 支持**Florence**系列模型: 可以查看[Florence最佳实践](docs/source/Multi-Modal/florence最佳实践.md).
- 🔥2024.06.28: 支持**Gemma2**系列模型: gemma2-9b, gemma2-9b-instruct, gemma2-27b, gemma2-27b-instruct.
- 🔥2024.06.18: 支持**DeepSeek-Coder-v2**系列模型! 使用model_type`deepseek-coder-v2-instruct`和`deepseek-coder-v2-lite-instruct`来开启训练和推理.
@ -109,7 +109,7 @@ SWIFT具有丰富全面的文档请查看我们的文档网站:
- 🔥2024.05.17: 支持peft=0.11.0. 同时支持了三个新的tuner方法 `BOFT`, `Vera``Pissa`. 使用 `--sft_type boft/vera` 开启BOFT或者Vera, 使用 `--init_lora_weights pissa` 以及 `--sft_type lora` 来使用 Pissa.
- 2024.05.16: 支持Llava-Next (Stronger)系列模型,最佳实践可以查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/Multi-Modal/llava最佳实践.md).
- 🔥2024.05.13: 支持Yi-1.5系列模型,使用`--model_type yi-1_5-9b-chat`等开始体验
- 2024.05.11: 支持使用[hqq](https://github.com/mobiusml/hqq)和[eetq](https://github.com/NetEase-FuXi/EETQ)进行qlora训练和量化推理可以查看[LLM量化与导出文档](https://github.com/modelscope/swift/tree/main/docs/source/LLM/LLM量化与导出文档.md)
- 2024.05.11: 支持使用[hqq](https://github.com/mobiusml/hqq)和[eetq](https://github.com/NetEase-FuXi/EETQ)进行qlora训练和量化推理可以查看[LLM量化与导出文档](https://github.com/modelscope/swift/tree/main/docs/source/Instruction/LLM量化与导出文档.md)
- 2024.05.10: 支持序列并行. 先安装`pip install .[seq_parallel]`, 之后在DDP环境中添加`--sequence_parallel_size n`即可使用!
- 2024.05.08: 支持DeepSeek-V2-Chat模型, 训练参考[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/deepseek-v2-chat/lora_ddp_ds3/sft.sh)。支持InternVL-Chat-V1.5-Int8模型最佳实践参考[这里](https://github.com/modelscope/swift/tree/main/docs/source/Multi-Modal/internvl最佳实践.md).
- 🔥2024.05.07: 支持**ORPO**训练,使用`swift orpo`来开始训练, 最佳实践可以查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/LLM/ORPO算法最佳实践.md)
@ -151,10 +151,10 @@ SWIFT具有丰富全面的文档请查看我们的文档网站:
- 2024.03.06: 支持AWQ量化模型的训练和推理, 使用[这个Qwen1.5-AWQ模型脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_awq/lora/sft.sh)开始训练, 并支持[yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_zero3)的训练和推理.
- 🔥2024.02.29: 支持[LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh)即可开始训练.
- 🔥2024.02.29: 支持[LoRA+](https://arxiv.org/pdf/2402.12354.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh)即可开始训练.
- 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看: [LLM量化与导出文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM量化与导出文档.md).
- 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看: [LLM量化与导出文档](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM量化与导出文档.md).
- 2024.02.22: 支持gemma系列: gemma-2b, [gemma-2b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/gemma_2b_instruct), gemma-7b, gemma-7b-instruct.
- 2024.02.16: 支持deepseek-math系列: deepseek-math-7b, deepseek-math-7b-instruct, deepseek-math-7b-chat.
- 🔥2024.02.05: 支持**Qwen1.5**系列模型, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B). 提供了[qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8)微调的脚本.
- 🔥2024.02.05: 支持**Qwen1.5**系列模型, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B). 提供了[qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8)微调的脚本.
- 2024.02.05: 支持扩散模型如**SDXL**, **SD**, **ControlNet**的训练, 同时也支持**DreamBooth**的训练, 详情可以查看对应的[训练脚本](https://github.com/modelscope/swift/tree/main/examples/pytorch/sdxl/scripts).
- 2024.02.01: 支持minicpm系列: [minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/minicpm_2b_sft_chat), minicpm-2b-chat.
- 🔥2024.02.01: 支持数据集打混来减少 **灾难性遗忘问题**. 使用`--train_dataset_mix_ratio 2.0`开启训练!同时我们也开源了通用知识数据集 [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
@ -182,9 +182,9 @@ SWIFT具有丰富全面的文档请查看我们的文档网站:
- 2023.12.18: 支持VLLM进行推理加速.
- 2023.12.15: 支持deepseek, deepseek-coder系列: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, openbuddy-deepseek-67b-chat, deepseek-coder-1_3b, deepseek-coder-1_3b-instruct, deepseek-coder-6_7b, deepseek-coder-6_7b-instruct, deepseek-coder-33b, deepseek-coder-33b-instruct.
- 2023.12.13: 支持mistral-7b-instruct-v2, [mixtral-moe-7b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe), [mixtral-moe-7b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe_instruct).
- 2023.12.09: 支持`freeze_parameters_ratio`参数, 作为lora和全参数训练的折中方案. 对应的sh可以查看[full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). 支持`disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc`参数, 具体可以查看[命令行参数](https://github.com/modelscope/swift/blob/main/docs/source/LLM/命令行参数.md).
- 2023.12.09: 支持`freeze_parameters_ratio`参数, 作为lora和全参数训练的折中方案. 对应的sh可以查看[full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). 支持`disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc`参数, 具体可以查看[命令行参数](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/命令行参数.md).
- 2023.12.08: 支持[sus-34b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/sus_34b_chat), 支持yi-6b-200k, yi-34b-200k.
- 2023.12.07: 支持[Multi-Node DDP训练](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
- 2023.12.07: 支持[Multi-Node DDP训练](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
- 2023.12.05: 支持模型: zephyr-7b-beta-chat, openbuddy-zephyr-7b-chat. 支持数据集: hc3-zh, hc3-en.
- 🔥 2023.12.02: [自我认知微调最佳实践](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自我认知微调最佳实践.md), **10分钟对大模型进行自我认知微调**, 创建专属于自己的大模型.
- 🔥 2023.11.30: 支持**qwen-1_8b**, **qwen-72b**, **qwen-audio**系列模型的训练的推理. 对应的sh脚本可以查看[qwen_1_8b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_1_8b_chat), [qwen_72b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_72b_chat), [qwen_audio_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_audio_chat)

View File

@ -24,7 +24,7 @@ pip install -e '.[llm]'
pip install deepspeed -U
# 如果你想要使用基于auto_gptq的qlora训练. (推荐, 效果优于bnb)
# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/支持的模型和数据集.md#模型`
# auto_gptq和cuda版本有对应关系请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
pip install auto_gptq -U
@ -83,7 +83,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
--output_dir output \
# 使用自己的数据集
# 自定义数据集格式查看: https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E8%87%AA%E5%AE%9A%E4%B9%89%E4%B8%8E%E6%8B%93%E5%B1%95.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86
# 自定义数据集格式查看: https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E8%87%AA%E5%AE%9A%E4%B9%89%E4%B8%8E%E6%8B%93%E5%B1%95.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86
CUDA_VISIBLE_DEVICES=0 swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset chatml.jsonl \

View File

@ -15,7 +15,7 @@ pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
pip install 'ms-swift[llm]' -U
# 如果你想要使用基于auto_gptq的模型进行推理.
# 使用auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
# 使用auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/支持的模型和数据集.md#模型`
# auto_gptq和cuda版本有对应关系请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
pip install auto_gptq -U

View File

@ -298,7 +298,7 @@ RLHF参数继承了sft参数, 除此之外增加了以下参数:
- `--model_revision`: 默认值为`None`. 具体的参数介绍可以在`sft命令行参数`中查看. 如果`model_id_or_path`为None或者是本地的模型目录, 则该参数失效.
- `--🔥sft_type`: 默认值为`'lora'`, 具体的参数介绍可以在`sft命令行参数`中查看.
- `--🔥template_type`: 默认值为`'AUTO'`, 具体的参数介绍可以在`sft命令行参数`中查看.
- `--🔥infer_backend`: 你可以选择'AUTO', 'vllm', 'pt'. 默认使用'AUTO', 进行智能选择, 即如果没有传入`ckpt_dir`或使用全参数微调, 并且安装了vllm且模型支持vllm则使用vllm引擎, 否则使用原生torch进行推理. vllm环境准备可以参考[VLLM推理加速与部署](VLLM推理加速与部署.md#环境准备), vllm支持的模型可以查看[支持的模型](../LLM/支持的模型和数据集.md#模型).
- `--🔥infer_backend`: 你可以选择'AUTO', 'vllm', 'pt'. 默认使用'AUTO', 进行智能选择, 即如果没有传入`ckpt_dir`或使用全参数微调, 并且安装了vllm且模型支持vllm则使用vllm引擎, 否则使用原生torch进行推理. vllm环境准备可以参考[VLLM推理加速与部署](VLLM推理加速与部署.md#环境准备), vllm支持的模型可以查看[支持的模型](../Instruction/支持的模型和数据集.md#模型).
- `--🔥ckpt_dir`: 必填项, 值为SFT阶段保存的checkpoint路径, e.g. `'/path/to/your/vx-xxx/checkpoint-xxx'`.
- `--load_args_from_ckpt_dir`: 是否从`ckpt_dir`的`sft_args.json`文件中读取模型配置信息. 默认是`True`.
- `--🔥load_dataset_config`: 该参数只有在`--load_args_from_ckpt_dir true`时才生效. 即是否从`ckpt_dir`的`sft_args.json`文件中读取数据集相关的配置信息. 默认为`False`.
@ -426,7 +426,7 @@ app-ui参数继承了infer参数, 除此之外增加了以下参数:
deploy参数继承了infer参数, 除此之外增加了以下参数:
- `--🔥host`: 默认为`'127.0.0.1`. 要使其在非本机上可访问, 可设置为'0.0.0.0'.
- `--host`: 默认为`'0.0.0.0'`.
- `--port`: 默认为`8000`.
- `--api_key`: 默认为`None`, 即不对请求进行api_key验证.
- `--ssl_keyfile`: 默认为`None`.

View File

@ -379,8 +379,8 @@
|qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|✔|✘|✘|✘|transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
|glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|✘|✘|✘|✘|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
|idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|llava1_6-mistral-7b-instruct|[swift/llava-v1.6-mistral-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-mistral-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-mistral|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)|
|llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
|llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
@ -391,16 +391,16 @@
|llava-onevision-qwen2-0_5b-ov|[AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-0.5b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)|
|llava-onevision-qwen2-7b-ov|[AI-ModelScope/llava-onevision-qwen2-7b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-7b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-7b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-7b-ov-hf)|
|llava-onevision-qwen2-72b-ov|[AI-ModelScope/llava-onevision-qwen2-72b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-72b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-72b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf)|
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✘|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
|llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
|llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|
|llava-next-video-34b-instruct|[swift/LLaVA-NeXT-Video-34B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-34B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video-yi|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-34B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-34B-hf)|
|yi-vl-6b-chat|[01ai/Yi-VL-6B](https://modelscope.cn/models/01ai/Yi-VL-6B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)|
|yi-vl-34b-chat|[01ai/Yi-VL-34B](https://modelscope.cn/models/01ai/Yi-VL-34B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)|
|llava-llama-3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✘|✔|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|llava-llama3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✔|✘|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)|
|internlm-xcomposer2-4khd-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2-4khd|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-4khd-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b)|
|internlm-xcomposer2_5-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2_5|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b)|

View File

@ -267,7 +267,7 @@ The following parameters take effect when the `sft_type` is set to `reft`.
### Liger Parameters
- `--use_liger`: Use liger-kernel to train.
-
## PT Parameters
PT parameters inherit from the SFT parameters with some modifications to the default values:
@ -426,7 +426,7 @@ app-ui parameters inherit from infer parameters, with the following added parame
deploy parameters inherit from infer parameters, with the following added parameters:
- `--🔥host`: Default is `'127.0.0.1`. To make it accessible on the local network, you can set it to '0.0.0.0'.
- `--host`: Default is `'0.0.0.0'`.
- `--port`: Default is `8000`.
- `--api_key`: The default is `None`, meaning that the request will not be subjected to api_key verification.
- `--ssl_keyfile`: Default is `None`.

View File

@ -20,7 +20,7 @@ pip install -e '.[llm]'
pip install deepspeed -U
# If you want to use qlora training based on auto_gptq. (Recommended, better than bnb)
# Models supporting auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/supported-models-and-datasets.md#models`
# Models supporting auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/supported-models-and-datasets.md#models`
# auto_gptq and cuda versions are related, please choose the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation`
pip install auto_gptq -U
@ -79,7 +79,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
--output_dir output \
# Using your own dataset
# custom dataset format: https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/Customization.md#custom-datasets
# custom dataset format: https://github.com/modelscope/swift/blob/main/docs/source_en/Instruction/Customization.md#custom-datasets
CUDA_VISIBLE_DEVICES=0 swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset chatml.jsonl \

View File

@ -13,7 +13,7 @@ GPU devices: A10, 3090, V100, A100 are all supported.
pip install 'ms-swift[llm]' -U
# If you want to use models based on auto_gptq for inference.
# Models using auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/Supported Models and Datasets.md#Models`
# Models using auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/Supported Models and Datasets.md#Models`
# auto_gptq and cuda versions have a correspondence, please select the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation`
pip install auto_gptq -U

View File

@ -379,8 +379,8 @@ The table below introcudes all models supported by SWIFT:
|qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|✔|✘|✘|✘|transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
|glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|✘|✘|✘|✘|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
|idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|llava1_6-mistral-7b-instruct|[swift/llava-v1.6-mistral-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-mistral-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-mistral|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)|
|llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
|llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
@ -391,16 +391,16 @@ The table below introcudes all models supported by SWIFT:
|llava-onevision-qwen2-0_5b-ov|[AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-0.5b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)|
|llava-onevision-qwen2-7b-ov|[AI-ModelScope/llava-onevision-qwen2-7b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-7b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-7b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-7b-ov-hf)|
|llava-onevision-qwen2-72b-ov|[AI-ModelScope/llava-onevision-qwen2-72b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-72b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-72b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf)|
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✘|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
|llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
|llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|
|llava-next-video-34b-instruct|[swift/LLaVA-NeXT-Video-34B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-34B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video-yi|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-34B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-34B-hf)|
|yi-vl-6b-chat|[01ai/Yi-VL-6B](https://modelscope.cn/models/01ai/Yi-VL-6B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)|
|yi-vl-34b-chat|[01ai/Yi-VL-34B](https://modelscope.cn/models/01ai/Yi-VL-34B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)|
|llava-llama-3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✘|✔|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|llava-llama3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✔|✘|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)|
|internlm-xcomposer2-4khd-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2-4khd|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-4khd-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b)|
|internlm-xcomposer2_5-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2_5|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b)|

View File

@ -1,4 +1,4 @@
def test_benchmark(infer_backend):
def test_benchmark(infer_backend: str) -> None:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TIMEOUT'] = '-1'

View File

@ -8,7 +8,7 @@ from swift.utils import stat_array
def write_dataset_info() -> None:
fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
fpaths = ['docs/source/Instruction/支持的模型和数据集.md', 'docs/source_en/Instruction/Supported-models-datasets.md']
pre_texts = []
for fpath in fpaths:
if os.path.exists(fpath):

View File

@ -4,7 +4,7 @@ from swift.llm import MODEL_MAPPING, ModelType, get_default_lora_target_modules
def get_model_info_table():
fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
fpaths = ['docs/source/Instruction/支持的模型和数据集.md', 'docs/source_en/Instruction/Supported-models-datasets.md']
end_words = [['### 多模态大模型', '## 数据集'], ['### MLLM', '## Datasets']]
model_name_list = ModelType.get_model_name_list()
result = [

View File

@ -457,8 +457,7 @@ async def inference_lmdeploy_async(request: Union[ChatCompletionRequest, Complet
usage_info = UsageInfo(
prompt_tokens=num_prompt_tokens,
completion_tokens=num_generated_tokens,
total_tokens=num_prompt_tokens + num_generated_tokens,
)
total_tokens=num_prompt_tokens + num_generated_tokens)
finish_reason = None
if output.status.name == 'FINISH':
finish_reason = 'stop'
@ -477,17 +476,12 @@ async def inference_lmdeploy_async(request: Union[ChatCompletionRequest, Complet
ChatCompletionResponseChoice(
index=0,
message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
finish_reason=finish_reason,
)
finish_reason=finish_reason)
]
response = ChatCompletionResponse(
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
else:
choices = [CompletionResponseChoice(
index=0,
text=response,
finish_reason=finish_reason,
)]
choices = [CompletionResponseChoice(index=0, text=response, finish_reason=finish_reason)]
response = CompletionResponse(
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
if _args.log_interval > 0:
@ -676,17 +670,12 @@ async def inference_pt_async(request: Union[ChatCompletionRequest, CompletionReq
ChatCompletionResponseChoice(
index=0,
message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
finish_reason=None,
)
finish_reason=None)
]
response = ChatCompletionResponse(
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
else:
choices = [CompletionResponseChoice(
index=0,
text=response,
finish_reason=None,
)]
choices = [CompletionResponseChoice(index=0, text=response, finish_reason=None)]
response = CompletionResponse(
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
if _args.log_interval > 0:

View File

@ -1567,7 +1567,7 @@ class AppUIArguments(InferArguments):
@dataclass
class DeployArguments(InferArguments):
host: str = '127.0.0.1'
host: str = '0.0.0.0'
port: int = 8000
api_key: Optional[str] = None
ssl_keyfile: Optional[str] = None

View File

@ -230,7 +230,12 @@ def _pre_inference_client(model_type: str,
else:
raise ValueError(f'model_type: {model_type}, model_list: {[model.id for model in model_list.data]}')
assert is_chat_request is not None and is_multimodal is not None
data = {k: v for k, v in request_config.__dict__.items() if not k.startswith('__')}
data = {}
request_config_origin = XRequestConfig()
for k, v in request_config.__dict__.items():
v_origin = getattr(request_config_origin, k)
if v != v_origin:
data[k] = v
url = kwargs.pop('url', None)
if url is None:
url = f'http://{host}:{port}/v1'
@ -253,9 +258,9 @@ def _pre_inference_client(model_type: str,
if medias:
medias = convert_to_base64(images=medias)['images']
data[media_key] = medias
if tools and len(tools) > 0:
if tools:
data['tools'] = tools
if tool_choice:
if tool_choice and tool_choice != 'auto':
data['tool_choice'] = tool_choice
return url, data, is_chat_request

View File

@ -129,6 +129,7 @@ class LmdeployGenerationConfig(_LmdeployGenerationConfig):
*,
n: int = 1,
stop_words: Optional[List[int]] = None,
logprobs: Optional[int] = None,
random_seed: Optional[int] = None,
skip_special_tokens: bool = False,
**kwargs,
@ -146,6 +147,7 @@ class LmdeployGenerationConfig(_LmdeployGenerationConfig):
repetition_penalty=repetition_penalty,
n=n,
stop_words=stop_words,
logprobs=logprobs,
random_seed=random_seed,
skip_special_tokens=skip_special_tokens,
**kwargs)

View File

@ -286,7 +286,7 @@ class ModelType:
yi_vl_6b_chat = 'yi-vl-6b-chat'
yi_vl_34b_chat = 'yi-vl-34b-chat'
# llava-llama (xtuner)
llava_llama3_8b_v1_1 = 'llava-llama-3-8b-v1_1'
llava_llama3_8b_v1_1 = 'llava-llama3-8b-v1_1'
# internlm
internlm_7b = 'internlm-7b'
internlm_7b_chat = 'internlm-7b-chat'
@ -989,7 +989,7 @@ def get_model_tokenizer_cogvlm2(*args, **kwargs):
LoRATM.llava,
TemplateType.llava_llama_instruct,
support_flash_attn=True,
support_lmdeploy=True,
support_vllm=True,
requires=['transformers>=4.36'],
tags=['multi-modal', 'vision'],
hf_model_id='xtuner/llava-llama-3-8b-v1_1-transformers')
@ -6046,7 +6046,6 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
eos_token='</s>',
support_flash_attn=True,
support_vllm=True,
support_lmdeploy=True,
requires=['transformers>=4.36'],
tags=['multi-modal', 'vision'],
hf_model_id='llava-hf/llava-1.5-13b-hf')
@ -6058,7 +6057,6 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
eos_token='</s>',
support_flash_attn=True,
support_vllm=True,
support_lmdeploy=True,
requires=['transformers>=4.36'],
tags=['multi-modal', 'vision'],
hf_model_id='llava-hf/llava-1.5-7b-hf')
@ -6245,7 +6243,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
LoRATM.llava_llama,
TemplateType.llama3_llava_next,
support_flash_attn=True,
support_lmdeploy=True,
tags=['multi-modal', 'vision'],
function_kwargs={'llm_model_type': 'next_llama'},
hf_model_id='lmms-lab/llama3-llava-next-8b')
@ -6255,7 +6252,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
LoRATM.llava,
TemplateType.llava_qwen,
support_flash_attn=True,
support_lmdeploy=True,
tags=['multi-modal', 'vision'],
function_kwargs={'llm_model_type': 'next_qwen'},
hf_model_id='lmms-lab/llava-next-72b')
@ -6265,7 +6261,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
LoRATM.llava,
TemplateType.llava_qwen,
support_flash_attn=True,
support_lmdeploy=True,
tags=['multi-modal', 'vision'],
function_kwargs={'llm_model_type': 'next_qwen'},
hf_model_id='lmms-lab/llava-next-110b')

View File

@ -12,7 +12,7 @@ def random_uuid() -> str:
@dataclass
class Model:
id: str # model_type
is_chat: Optional[bool] = None # chat model or generation model
is_chat: bool = True # chat model or generation model
is_multimodal: bool = False
object: str = 'model'
@ -151,8 +151,8 @@ class CompletionResponse:
@dataclass
class DeltaMessage:
role: Literal['system', 'user', 'assistant']
content: str
role: Literal['system', 'user', 'assistant', None] = None
content: Optional[str] = None
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
@ -167,7 +167,7 @@ class ChatCompletionResponseStreamChoice:
class ChatCompletionStreamResponse:
model: str
choices: List[ChatCompletionResponseStreamChoice]
usage: UsageInfo
usage: Optional[UsageInfo] = None
id: str = field(default_factory=lambda: f'chatcmpl-{random_uuid()}')
object: str = 'chat.completion.chunk'
created: int = field(default_factory=lambda: int(time.time()))
@ -184,7 +184,7 @@ class CompletionResponseStreamChoice:
class CompletionStreamResponse:
model: str
choices: List[CompletionResponseStreamChoice]
usage: UsageInfo
usage: Optional[UsageInfo] = None
id: str = field(default_factory=lambda: f'cmpl-{random_uuid()}')
object: str = 'text_completion.chunk'
created: int = field(default_factory=lambda: int(time.time()))

View File

@ -1785,7 +1785,7 @@ class InternLMXComposer2Template(Template):
while i < len(input_ids):
if input_ids[i] == 2: # replace_token
res_input_ids = torch.tensor([1] + input_ids[pre_i:i], device=device)
res_inputs_embeds.append(tok_embeddings(res_input_ids))
res_inputs_embeds.append(tok_embeddings(res_input_ids[None])[0])
wrap_im_mask += [0] * len(res_input_ids)
res_labels += [-100] + labels[pre_i:i]
if len(images) > 0 and idx < images.shape[0]:
@ -1894,7 +1894,7 @@ class InternvlTemplate(Template):
embedding = self.model.get_input_embeddings()
device = embedding.weight.device
input_ids = data['input_ids']
inputs_embeds = embedding(input_ids).to(device=device)
inputs_embeds = embedding(input_ids[None])[0].to(device=device)
pixel_values = data['pixel_values']
if pixel_values is not None:
pixel_values = pixel_values.to(device=device)

View File

@ -714,13 +714,11 @@ def inference_stream(model: PreTrainedModel,
except StopIteration:
is_finished = True
res = {}
generate_ids = template.get_generate_ids(torch.tensor(raw_generate_ids)[None], token_len)
if return_dict and is_finished:
thread.join()
res = dict(result_queue.get())
if res['sequences'][0].tolist() != raw_generate_ids:
logger.warning(f"res['sequences'][0].tolist(): {res['sequences'][0].tolist()}\n"
f'raw_generate_ids: {raw_generate_ids}')
generate_ids = template.get_generate_ids(torch.tensor(raw_generate_ids)[None], token_len)
res['sequences'] = generate_ids
generation_info['num_generated_tokens'] = len(generate_ids)
response = template.generate_ids_to_response(
generate_ids,
@ -834,6 +832,7 @@ def inference(model: PreTrainedModel,
generation_info['samples/s'] = 1 / runtime
generation_info['tokens/s'] = generation_info['num_generated_tokens'] / runtime
if return_dict:
res['sequences'] = generate_ids
res.update({'response': response, 'history': history})
return res
else:

View File

@ -184,6 +184,7 @@ if version.parse(vllm.__version__) < version.parse('0.5.5'):
num_beams: int = 1,
*,
n: int = 1,
logprobs: Optional[int] = None,
seed: Optional[int] = None,
length_penalty: float = 1.,
stop: Optional[List[str]] = None,
@ -214,6 +215,7 @@ if version.parse(vllm.__version__) < version.parse('0.5.5'):
kwargs['use_beam_search'] = True
kwargs['best_of'] = num_beams
kwargs['n'] = n
kwargs['logprobs'] = logprobs
kwargs['seed'] = seed
kwargs['length_penalty'] = length_penalty
kwargs['stop'] = stop
@ -236,6 +238,7 @@ else:
repetition_penalty: float = 1.
num_beams: int = 1
n: int = 1
logprobs: Optional[int] = None
seed: Optional[int] = None
length_penalty: float = 1.
stop: Optional[List[str]] = None
@ -254,7 +257,7 @@ else:
'the output of num_beams in transformers.')
assert self.best_of is None
self.use_beam_search = True
self.best_of = num_beams
self.best_of = self.num_beams
if self.top_k == 0:
self.top_k = -1
if self.stop is None: