update docs & fix bug (#1926)

2024-11-25 18:52:50 +08:00 · 2024-09-04 11:32:59 +08:00 · 2024-09-04 11:32:59 +08:00 · 4b72dc834d
commit 4b72dc834d
parent 8c41771e9f
22 changed files with 72 additions and 79 deletions
--- a/README.md
+++ b/README.md
@ -69,8 +69,8 @@ You can contact us and communicate with us by adding our group:
 - 🔥2024.08.07: Support for using vLLM for accelerating inference and deployment of multimodal large models such as the llava series and phi3-vision models. You can refer to the [Multimodal & vLLM Inference Acceleration Documentation](docs/source_en/Multi-Modal/vllm-inference-acceleration.md) for more information.
 - 2024.08.06: Support for minicpm-v-v2_6-chat is available. You can use `swift infer --model_type minicpm-v-v2_6-chat` for inference experience. Best practices can be found [here](https://github.com/modelscope/swift/issues/1613).
 - 2024.08.06: Supports internlm2.5 series of 1.8b and 20b. Experience it using `swift infer --model_type internlm2_5-1_8b-chat`.
- 🔥2024.08.05: Support evaluation for multi-modal models! Same command with [new datasets](https://swift.readthedocs.io/en/latest/LLM/LLM-eval.html#introduction).
- 🔥2024.08.02: Support Fourier Ft. Use `--sft_type fourierft` to begin, Check parameter documentation [here](https://swift.readthedocs.io/en/latest/LLM/Command-line-parameters.html).
+- 🔥2024.08.05: Support evaluation for multi-modal models! Same command with [new datasets](https://swift.readthedocs.io/en/latest/Instruction/LLM-eval.html#introduction).
+- 🔥2024.08.02: Support Fourier Ft. Use `--sft_type fourierft` to begin, Check parameter documentation [here](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html).
 - 🔥2024.07.29: Support the use of lmdeploy for inference acceleration of LLM and VLM models. Documentation can be found [here](docs/source_en/Multi-Modal/LmDeploy-inference-acceleration.md).
 - 🔥2024.07.24: Support DPO/ORPO/SimPO/CPO alignment algorithm for vision MLLM, training scripts can be find in [Document](docs/source_en/Multi-Modal/human-preference-alignment-training-documentation.md). support RLAIF-V dataset.
 - 🔥2024.07.24: Support using Megatron for CPT and SFT on the Qwen2 series. You can refer to the [Megatron training documentation](docs/source_en/LLM/Megatron-training.md).
@ -89,7 +89,7 @@ You can contact us and communicate with us by adding our group:
 - 2024.07.06: Support codegeex4-9b-chat.
 - 2024.07.04: Support internlm2_5-7b series: internlm2_5-7b, internlm2_5-7b-chat, internlm2_5-7b-chat-1m.
 - 2024.07.02: Support for `llava1_6-vicuna-7b-instruct`, `llava1_6-vicuna-13b-instruct` and other llava-hf models. For best practices, refer to [here](docs/source_en/Multi-Modal/llava-best-practice.md).
- 🔥2024.06.29: Support [eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass) for evaluation! Now we have supported over 50 eval datasets like `BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`, please check our [Eval Doc](https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/LLM-eval.md) to begin! Next sprint we will support Multi-modal and Agent evaluation, remember to follow us : )
+- 🔥2024.06.29: Support [eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass) for evaluation! Now we have supported over 50 eval datasets like `BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`, please check our [Eval Doc](https://github.com/modelscope/swift/blob/main/docs/source_en/Instruction/LLM-eval.md) to begin! Next sprint we will support Multi-modal and Agent evaluation, remember to follow us : )

 - 🔥2024.06.28: Support for **Florence** series model! See [document](docs/source_en/Multi-Modal/florence-best-pratice.md)
 - 🔥2024.06.28: Support for Gemma2 series models: gemma2-9b, gemma2-9b-instruct, gemma2-27b, gemma2-27b-instruct.
@ -109,7 +109,7 @@ You can contact us and communicate with us by adding our group:
 - 🔥2024.05.17: Support peft=0.11.0. Meanwhile support 3 new tuners: `BOFT`, `Vera` and `Pissa`. use `--sft_type boft/vera` to use BOFT or Vera, use `--init_lora_weights pissa` with `--sft_type lora` to use Pissa.
 - 2024.05.16: Supports Llava-Next (Stronger) series models. For best practice, you can refer to [here](https://github.com/modelscope/swift/tree/main/docs/source_en/Multi-Modal/llava-best-practice.md).
 - 🔥2024.05.13: Support Yi-1.5 series models，use `--model_type yi-1_5-9b-chat` to begin!
- 2024.05.11: Support for qlora training and quantized inference using [hqq](https://github.com/mobiusml/hqq) and [eetq](https://github.com/NetEase-FuXi/EETQ). For more information, see the [LLM Quantization Documentation](https://github.com/modelscope/swift/tree/main/docs/source_en/LLM/LLM-quantization-and-export.md).
+- 2024.05.11: Support for qlora training and quantized inference using [hqq](https://github.com/mobiusml/hqq) and [eetq](https://github.com/NetEase-FuXi/EETQ). For more information, see the [LLM Quantization Documentation](https://github.com/modelscope/swift/tree/main/docs/source_en/Instruction/LLM-quantization-and-export.md).
 - 2024.05.10: Support split a sequence to multiple GPUs to reduce memory usage. Use this feature by `pip install .[seq_parallel]`, then add `--sequence_parallel_size n` to your DDP script to begin!
 - 2024.05.08: Support DeepSeek-V2-Chat model, you can refer to [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/deepseek-v2-chat/lora_ddp_ds3/sft.sh).Support InternVL-Chat-V1.5-Int8 model, for best practice, you can refer to [here](https://github.com/modelscope/swift/tree/main/docs/source_en/Multi-Modal/internvl-best-practice.md).
 - 🔥2024.05.07: Supoprts **ORPO** training! See [document](https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/ORPO.md) to start training!
@ -154,7 +154,7 @@ You can contact us and communicate with us by adding our group:
 - 2024.02.25: Support `swift export` to quantize models using **AWQ/GPTQ** and push to ModelScope Hub. See documentation: [LLM Quantization](docs/source_en/Instruction/LLM-quantization-and-export.md).
 - 2024.02.22: Support gemma series: gemma-2b, [gemma-2b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/gemma_2b_instruct), gemma-7b, gemma-7b-instruct.
 - 2024.02.16: Support deepseek-math series: deepseek-math-7b, deepseek-math-7b-instruct, deepseek-math-7b-chat.
- 🔥2024.02.05: Support **Qwen1.5** series models, see [model list](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B) for all supported Qwen1.5 models. Provide fine-tuning scripts for [qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8).
+- 🔥2024.02.05: Support **Qwen1.5** series models, see [model list](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B) for all supported Qwen1.5 models. Provide fine-tuning scripts for [qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8).
 - 2024.02.05: Support training of diffusion models such as **SDXL**, **SD**, **ControlNet**, as well as **DreamBooth** training. See corresponding [training scripts](https://github.com/modelscope/swift/tree/main/examples/pytorch/sdxl/scripts) for details.
 - 2024.02.01: Support minicpm series: [minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/minicpm_2b_sft_chat), minicpm-2b-chat.
 - 🔥2024.02.01: Support dataset mixing to reduce **catastrophic forgetting**. Use `--train_dataset_mix_ratio 2.0` to enable training! We also open sourced the general knowledge dataset [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
@ -182,9 +182,9 @@ You can contact us and communicate with us by adding our group:
 - 2023.12.18: Support VLLM for inference acceleration.
 - 2023.12.15: Support deepseek, deepseek-coder series: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, openbuddy-deepseek-67b-chat, deepseek-coder-1_3b, deepseek-coder-1_3b-instruct, deepseek-coder-6_7b, deepseek-coder-6_7b-instruct, deepseek-coder-33b, deepseek-coder-33b-instruct.
 - 2023.12.13: Support mistral-7b-instruct-v2, [mixtral-moe-7b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe), [mixtral-moe-7b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe_instruct).
- 2023.12.09: Support `freeze_parameters_ratio` parameter as a compromise between lora and full-parameter training. Corresponding sh can be found in [full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). Support `disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc` parameters, see [command line arguments](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md) for details.
+- 2023.12.09: Support `freeze_parameters_ratio` parameter as a compromise between lora and full-parameter training. Corresponding sh can be found in [full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). Support `disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc` parameters, see [command line arguments](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md) for details.
 - 2023.12.08: Support [sus-34b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/sus_34b_chat), support yi-6b-200k, yi-34b-200k.
- 2023.12.07: Support [Multi-Node DDP training](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
+- 2023.12.07: Support [Multi-Node DDP training](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
 - 2023.12.05: Support models: zephyr-7b-beta-chat, openbuddy-zephyr-7b-chat. Support datasets: hc3-zh, hc3-en.
 - 🔥2023.12.02: [Self-cognition fine-tuning best practices](docs/source_en/LLM/Self-cognition-best-practice.md), **10 minutes to fine-tune a large model for self-cognition**, create your own unique large model.
 - 🔥2023.11.30: Support training and inference of **qwen-1_8b**, **qwen-72b**, **qwen-audio** series models. Corresponding sh scripts can be found in [qwen_1_8b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_1_8b_chat), [qwen_72b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_72b_chat), [qwen_audio_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_audio_chat)
--- a/README_CN.md
+++ b/README_CN.md
@ -70,8 +70,8 @@ SWIFT具有丰富全面的文档，请查看我们的文档网站:
 - 🔥2024.08.07: 支持使用vllm对多模态大模型: llava系列, internvl2系列, phi3-vision, minicpm-v2.5进行推理加速和部署. 可以查看[多模态&vLLM推理加速文档](docs/source/Multi-Modal/vLLM推理加速文档.md)获取更多信息.
 - 2024.08.06: 支持minicpm-v-v2_6-chat, 使用`swift infer --model_type minicpm-v-v2_6-chat`进行推理体验, 最佳实践可以查看[这里](https://github.com/modelscope/swift/issues/1613).
 - 2024.08.06: 支持internlm2.5的1.8b和20b系列. 使用`swift infer --model_type internlm2_5-1_8b-chat`进行体验.
- 🔥2024.08.05: 支持多模态数据集的评测！命令行完全一致，新增了许多[多模态数据集](https://swift.readthedocs.io/zh-cn/latest/LLM/LLM%E8%AF%84%E6%B5%8B%E6%96%87%E6%A1%A3.html#id2).
- 🔥2024.08.02: 支持Fourier Ft训练. 使用方式为`--sft_type fourierft`, 参数可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html).
+- 🔥2024.08.05: 支持多模态数据集的评测！命令行完全一致，新增了许多[多模态数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/LLM%E8%AF%84%E6%B5%8B%E6%96%87%E6%A1%A3.html#id2).
+- 🔥2024.08.02: 支持Fourier Ft训练. 使用方式为`--sft_type fourierft`, 参数可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html).
 - 🔥2024.07.29: 支持使用lmdeploy对LLM和VLM模型进行推理加速. 文档可以查看[这里](docs/source/Multi-Modal/LmDeploy推理加速文档.md).
 - 🔥2024.07.24: 人类偏好对齐算法支持视觉多模态大模型, 包括DPO/ORPO/SimPO/CPO, 训练参考[文档](docs/source/Multi-Modal/人类偏好对齐训练文档.md). 支持数据集RLAIF-V.
 - 🔥2024.07.24: 支持使用megatron对qwen2系列进行CPT和SFT. 可以查看[megatron训练文档](docs/source/LLM/Megatron训练文档.md).
@ -90,7 +90,7 @@ SWIFT具有丰富全面的文档，请查看我们的文档网站:
 - 2024.07.06: 支持codegeex4-9b-chat.
 - 2024.07.04: 支持internlm2_5-7b系列: internlm2_5-7b, internlm2_5-7b-chat, internlm2_5-7b-chat-1m.
 - 2024.07.02: 支持`llava1_6-vicuna-7b-instruct`, `llava1_6-vicuna-13b-instruct`等llava-hf模型. 最佳实践可以查看[这里](docs/source/Multi-Modal/llava最佳实践.md).
- 🔥2024.06.29: 支持[eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass)评测! 我们支持了包含`BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`等50+标准数据集在内的评测流程, 请查看我们的[评测文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM评测文档.md)来使用。下个迭代我们会支持多模态评测和Agent评测，记得持续关注我们: )
+- 🔥2024.06.29: 支持[eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass)评测! 我们支持了包含`BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`等50+标准数据集在内的评测流程, 请查看我们的[评测文档](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM评测文档.md)来使用。下个迭代我们会支持多模态评测和Agent评测，记得持续关注我们: )
 - 🔥2024.06.28: 支持**Florence**系列模型: 可以查看[Florence最佳实践](docs/source/Multi-Modal/florence最佳实践.md).
 - 🔥2024.06.28: 支持**Gemma2**系列模型: gemma2-9b, gemma2-9b-instruct, gemma2-27b, gemma2-27b-instruct.
 - 🔥2024.06.18: 支持**DeepSeek-Coder-v2**系列模型! 使用model_type`deepseek-coder-v2-instruct`和`deepseek-coder-v2-lite-instruct`来开启训练和推理.
@ -109,7 +109,7 @@ SWIFT具有丰富全面的文档，请查看我们的文档网站:
 - 🔥2024.05.17: 支持peft=0.11.0. 同时支持了三个新的tuner方法： `BOFT`, `Vera` 和 `Pissa`. 使用 `--sft_type boft/vera` 开启BOFT或者Vera, 使用 `--init_lora_weights pissa` 以及 `--sft_type lora` 来使用 Pissa.
 - 2024.05.16: 支持Llava-Next (Stronger)系列模型，最佳实践可以查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/Multi-Modal/llava最佳实践.md).
 - 🔥2024.05.13: 支持Yi-1.5系列模型，使用`--model_type yi-1_5-9b-chat`等开始体验
- 2024.05.11: 支持使用[hqq](https://github.com/mobiusml/hqq)和[eetq](https://github.com/NetEase-FuXi/EETQ)进行qlora训练和量化推理，可以查看[LLM量化与导出文档](https://github.com/modelscope/swift/tree/main/docs/source/LLM/LLM量化与导出文档.md)
+- 2024.05.11: 支持使用[hqq](https://github.com/mobiusml/hqq)和[eetq](https://github.com/NetEase-FuXi/EETQ)进行qlora训练和量化推理，可以查看[LLM量化与导出文档](https://github.com/modelscope/swift/tree/main/docs/source/Instruction/LLM量化与导出文档.md)
 - 2024.05.10: 支持序列并行. 先安装`pip install .[seq_parallel]`, 之后在DDP环境中添加`--sequence_parallel_size n`即可使用!
 - 2024.05.08: 支持DeepSeek-V2-Chat模型, 训练参考[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/deepseek-v2-chat/lora_ddp_ds3/sft.sh)。支持InternVL-Chat-V1.5-Int8模型，最佳实践参考[这里](https://github.com/modelscope/swift/tree/main/docs/source/Multi-Modal/internvl最佳实践.md).
 - 🔥2024.05.07: 支持**ORPO**训练，使用`swift orpo`来开始训练， 最佳实践可以查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/LLM/ORPO算法最佳实践.md)
@ -151,10 +151,10 @@ SWIFT具有丰富全面的文档，请查看我们的文档网站:
 - 2024.03.06: 支持AWQ量化模型的训练和推理, 使用[这个Qwen1.5-AWQ模型脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_awq/lora/sft.sh)开始训练, 并支持[yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_zero3)的训练和推理.
 - 🔥2024.02.29: 支持[LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh)即可开始训练.
 - 🔥2024.02.29: 支持[LoRA+](https://arxiv.org/pdf/2402.12354.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh)即可开始训练.
- 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看: [LLM量化与导出文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM量化与导出文档.md).
+- 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看: [LLM量化与导出文档](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM量化与导出文档.md).
 - 2024.02.22: 支持gemma系列: gemma-2b, [gemma-2b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/gemma_2b_instruct), gemma-7b, gemma-7b-instruct.
 - 2024.02.16: 支持deepseek-math系列: deepseek-math-7b, deepseek-math-7b-instruct, deepseek-math-7b-chat.
- 🔥2024.02.05: 支持**Qwen1.5**系列模型, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B). 提供了[qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8)微调的脚本.
+- 🔥2024.02.05: 支持**Qwen1.5**系列模型, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B). 提供了[qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8)微调的脚本.
 - 2024.02.05: 支持扩散模型如**SDXL**, **SD**, **ControlNet**的训练, 同时也支持**DreamBooth**的训练, 详情可以查看对应的[训练脚本](https://github.com/modelscope/swift/tree/main/examples/pytorch/sdxl/scripts).
 - 2024.02.01: 支持minicpm系列: [minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/minicpm_2b_sft_chat), minicpm-2b-chat.
 - 🔥2024.02.01: 支持数据集打混来减少 **灾难性遗忘问题**. 使用`--train_dataset_mix_ratio 2.0`开启训练！同时我们也开源了通用知识数据集 [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
@ -182,9 +182,9 @@ SWIFT具有丰富全面的文档，请查看我们的文档网站:
 - 2023.12.18: 支持VLLM进行推理加速.
 - 2023.12.15: 支持deepseek, deepseek-coder系列: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, openbuddy-deepseek-67b-chat, deepseek-coder-1_3b, deepseek-coder-1_3b-instruct, deepseek-coder-6_7b, deepseek-coder-6_7b-instruct, deepseek-coder-33b, deepseek-coder-33b-instruct.
 - 2023.12.13: 支持mistral-7b-instruct-v2, [mixtral-moe-7b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe), [mixtral-moe-7b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe_instruct).
- 2023.12.09: 支持`freeze_parameters_ratio`参数, 作为lora和全参数训练的折中方案. 对应的sh可以查看[full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). 支持`disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc`参数, 具体可以查看[命令行参数](https://github.com/modelscope/swift/blob/main/docs/source/LLM/命令行参数.md).
+- 2023.12.09: 支持`freeze_parameters_ratio`参数, 作为lora和全参数训练的折中方案. 对应的sh可以查看[full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). 支持`disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc`参数, 具体可以查看[命令行参数](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/命令行参数.md).
 - 2023.12.08: 支持[sus-34b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/sus_34b_chat), 支持yi-6b-200k, yi-34b-200k.
- 2023.12.07: 支持[Multi-Node DDP训练](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
+- 2023.12.07: 支持[Multi-Node DDP训练](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
 - 2023.12.05: 支持模型: zephyr-7b-beta-chat, openbuddy-zephyr-7b-chat. 支持数据集: hc3-zh, hc3-en.
 - 🔥 2023.12.02: [自我认知微调最佳实践](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自我认知微调最佳实践.md), **10分钟对大模型进行自我认知微调**, 创建专属于自己的大模型.
 - 🔥 2023.11.30: 支持**qwen-1_8b**, **qwen-72b**, **qwen-audio**系列模型的训练的推理. 对应的sh脚本可以查看[qwen_1_8b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_1_8b_chat), [qwen_72b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_72b_chat), [qwen_audio_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_audio_chat)
--- a/docs/source/Instruction/LLM微调文档.md
+++ b/docs/source/Instruction/LLM微调文档.md
@ -24,7 +24,7 @@ pip install -e '.[llm]'
 pip install deepspeed -U

 # 如果你想要使用基于auto_gptq的qlora训练. (推荐, 效果优于bnb)
-# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
+# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/支持的模型和数据集.md#模型`
 # auto_gptq和cuda版本有对应关系，请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
 pip install auto_gptq -U

@ -83,7 +83,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
    --output_dir output \

 # 使用自己的数据集
-# 自定义数据集格式查看: https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E8%87%AA%E5%AE%9A%E4%B9%89%E4%B8%8E%E6%8B%93%E5%B1%95.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86
+# 自定义数据集格式查看: https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E8%87%AA%E5%AE%9A%E4%B9%89%E4%B8%8E%E6%8B%93%E5%B1%95.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86
 CUDA_VISIBLE_DEVICES=0 swift sft \
    --model_id_or_path qwen/Qwen-7B-Chat \
    --dataset chatml.jsonl \
--- a/docs/source/Instruction/LLM推理文档.md
+++ b/docs/source/Instruction/LLM推理文档.md
@ -15,7 +15,7 @@ pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
 pip install 'ms-swift[llm]' -U

 # 如果你想要使用基于auto_gptq的模型进行推理.
-# 使用auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
+# 使用auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/支持的模型和数据集.md#模型`
 # auto_gptq和cuda版本有对应关系，请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
 pip install auto_gptq -U

--- a/docs/source/Instruction/命令行参数.md
+++ b/docs/source/Instruction/命令行参数.md
@ -298,7 +298,7 @@ RLHF参数继承了sft参数, 除此之外增加了以下参数:
 - `--model_revision`: 默认值为`None`. 具体的参数介绍可以在`sft命令行参数`中查看. 如果`model_id_or_path`为None或者是本地的模型目录, 则该参数失效.
 - `--🔥sft_type`: 默认值为`'lora'`, 具体的参数介绍可以在`sft命令行参数`中查看.
 - `--🔥template_type`: 默认值为`'AUTO'`, 具体的参数介绍可以在`sft命令行参数`中查看.
- `--🔥infer_backend`: 你可以选择'AUTO', 'vllm', 'pt'. 默认使用'AUTO', 进行智能选择, 即如果没有传入`ckpt_dir`或使用全参数微调, 并且安装了vllm且模型支持vllm则使用vllm引擎, 否则使用原生torch进行推理. vllm环境准备可以参考[VLLM推理加速与部署](VLLM推理加速与部署.md#环境准备), vllm支持的模型可以查看[支持的模型](../LLM/支持的模型和数据集.md#模型).
+- `--🔥infer_backend`: 你可以选择'AUTO', 'vllm', 'pt'. 默认使用'AUTO', 进行智能选择, 即如果没有传入`ckpt_dir`或使用全参数微调, 并且安装了vllm且模型支持vllm则使用vllm引擎, 否则使用原生torch进行推理. vllm环境准备可以参考[VLLM推理加速与部署](VLLM推理加速与部署.md#环境准备), vllm支持的模型可以查看[支持的模型](../Instruction/支持的模型和数据集.md#模型).
 - `--🔥ckpt_dir`: 必填项, 值为SFT阶段保存的checkpoint路径, e.g. `'/path/to/your/vx-xxx/checkpoint-xxx'`.
 - `--load_args_from_ckpt_dir`: 是否从`ckpt_dir`的`sft_args.json`文件中读取模型配置信息. 默认是`True`.
 - `--🔥load_dataset_config`: 该参数只有在`--load_args_from_ckpt_dir true`时才生效. 即是否从`ckpt_dir`的`sft_args.json`文件中读取数据集相关的配置信息. 默认为`False`.
@ -426,7 +426,7 @@ app-ui参数继承了infer参数, 除此之外增加了以下参数:

 deploy参数继承了infer参数, 除此之外增加了以下参数:

- `--🔥host`: 默认为`'127.0.0.1`. 要使其在非本机上可访问, 可设置为'0.0.0.0'.
+- `--host`: 默认为`'0.0.0.0'`.
 - `--port`: 默认为`8000`.
 - `--api_key`: 默认为`None`, 即不对请求进行api_key验证.
 - `--ssl_keyfile`: 默认为`None`.
--- a/docs/source/Instruction/支持的模型和数据集.md
+++ b/docs/source/Instruction/支持的模型和数据集.md
@ -379,8 +379,8 @@
 |qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
 |glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
 |idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
-|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
-|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
+|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
+|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
 |llava1_6-mistral-7b-instruct|[swift/llava-v1.6-mistral-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-mistral-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-mistral|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.39|vision|[llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)|
 |llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
 |llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
@ -391,16 +391,16 @@
 |llava-onevision-qwen2-0_5b-ov|[AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-0.5b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)|
 |llava-onevision-qwen2-7b-ov|[AI-ModelScope/llava-onevision-qwen2-7b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-7b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-7b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-7b-ov-hf)|
 |llava-onevision-qwen2-72b-ov|[AI-ModelScope/llava-onevision-qwen2-72b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-72b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-72b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf)|
-|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
-|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
-|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
+|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|&#x2714;|&#x2718;|&#x2718;|&#x2718;||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
+|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
+|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
 |llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
 |llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
 |llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|
 |llava-next-video-34b-instruct|[swift/LLaVA-NeXT-Video-34B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-34B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video-yi|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-34B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-34B-hf)|
 |yi-vl-6b-chat|[01ai/Yi-VL-6B](https://modelscope.cn/models/01ai/Yi-VL-6B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.34|vision|[01-ai/Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)|
 |yi-vl-34b-chat|[01ai/Yi-VL-34B](https://modelscope.cn/models/01ai/Yi-VL-34B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.34|vision|[01-ai/Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)|
-|llava-llama-3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|&#x2714;|&#x2718;|&#x2714;|&#x2718;|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
+|llava-llama3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
 |internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)|
 |internlm-xcomposer2-4khd-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2-4khd|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[internlm/internlm-xcomposer2-4khd-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b)|
 |internlm-xcomposer2_5-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2_5|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[internlm/internlm-xcomposer2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b)|
--- a/docs/source_en/Instruction/Command-line-parameters.md
+++ b/docs/source_en/Instruction/Command-line-parameters.md
@ -267,7 +267,7 @@ The following parameters take effect when the `sft_type` is set to `reft`.
 ### Liger Parameters

 - `--use_liger`: Use liger-kernel to train.
-
+
 ## PT Parameters

 PT parameters inherit from the SFT parameters with some modifications to the default values:
@ -426,7 +426,7 @@ app-ui parameters inherit from infer parameters, with the following added parame

 deploy parameters inherit from infer parameters, with the following added parameters:

- `--🔥host`: Default is `'127.0.0.1`. To make it accessible on the local network, you can set it to '0.0.0.0'.
+- `--host`: Default is `'0.0.0.0'`.
 - `--port`: Default is `8000`.
 - `--api_key`: The default is `None`, meaning that the request will not be subjected to api_key verification.
 - `--ssl_keyfile`: Default is `None`.
--- a/docs/source_en/Instruction/LLM-fine-tuning.md
+++ b/docs/source_en/Instruction/LLM-fine-tuning.md
@ -20,7 +20,7 @@ pip install -e '.[llm]'
 pip install deepspeed -U

 # If you want to use qlora training based on auto_gptq. (Recommended, better than bnb)
-# Models supporting auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/supported-models-and-datasets.md#models`
+# Models supporting auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/supported-models-and-datasets.md#models`
 # auto_gptq and cuda versions are related, please choose the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation`
 pip install auto_gptq -U

@ -79,7 +79,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
    --output_dir output \

 # Using your own dataset
-# custom dataset format: https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/Customization.md#custom-datasets
+# custom dataset format: https://github.com/modelscope/swift/blob/main/docs/source_en/Instruction/Customization.md#custom-datasets
 CUDA_VISIBLE_DEVICES=0 swift sft \
    --model_id_or_path qwen/Qwen-7B-Chat \
    --dataset chatml.jsonl \
--- a/docs/source_en/Instruction/LLM-inference.md
+++ b/docs/source_en/Instruction/LLM-inference.md
@ -13,7 +13,7 @@ GPU devices: A10, 3090, V100, A100 are all supported.
 pip install 'ms-swift[llm]' -U

 # If you want to use models based on auto_gptq for inference.
-# Models using auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/Supported Models and Datasets.md#Models`
+# Models using auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/Supported Models and Datasets.md#Models`
 # auto_gptq and cuda versions have a correspondence, please select the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation`
 pip install auto_gptq -U

--- a/docs/source_en/Instruction/Supported-models-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-datasets.md
@ -379,8 +379,8 @@ The table below introcudes all models supported by SWIFT:
 |qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
 |glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
 |idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
-|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
-|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
+|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
+|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
 |llava1_6-mistral-7b-instruct|[swift/llava-v1.6-mistral-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-mistral-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-mistral|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.39|vision|[llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)|
 |llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
 |llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
@ -391,16 +391,16 @@ The table below introcudes all models supported by SWIFT:
 |llava-onevision-qwen2-0_5b-ov|[AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-0.5b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)|
 |llava-onevision-qwen2-7b-ov|[AI-ModelScope/llava-onevision-qwen2-7b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-7b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-7b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-7b-ov-hf)|
 |llava-onevision-qwen2-72b-ov|[AI-ModelScope/llava-onevision-qwen2-72b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-72b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-72b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf)|
-|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
-|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
-|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
+|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|&#x2714;|&#x2718;|&#x2718;|&#x2718;||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
+|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
+|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|&#x2714;|&#x2718;|&#x2718;|&#x2718;||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
 |llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
 |llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
 |llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|
 |llava-next-video-34b-instruct|[swift/LLaVA-NeXT-Video-34B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-34B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video-yi|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-34B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-34B-hf)|
 |yi-vl-6b-chat|[01ai/Yi-VL-6B](https://modelscope.cn/models/01ai/Yi-VL-6B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.34|vision|[01-ai/Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)|
 |yi-vl-34b-chat|[01ai/Yi-VL-34B](https://modelscope.cn/models/01ai/Yi-VL-34B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.34|vision|[01-ai/Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)|
-|llava-llama-3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|&#x2714;|&#x2718;|&#x2714;|&#x2718;|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
+|llava-llama3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
 |internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)|
 |internlm-xcomposer2-4khd-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2-4khd|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[internlm/internlm-xcomposer2-4khd-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b)|
 |internlm-xcomposer2_5-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2_5|&#x2714;|&#x2718;|&#x2714;|&#x2718;||vision|[internlm/internlm-xcomposer2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b)|
--- a/scripts/benchmark/deploy.py
+++ b/scripts/benchmark/deploy.py
@ -1,4 +1,4 @@
-def test_benchmark(infer_backend):
+def test_benchmark(infer_backend: str) -> None:
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    os.environ['TIMEOUT'] = '-1'
--- a/scripts/utils/run_dataset_info.py
+++ b/scripts/utils/run_dataset_info.py
@ -8,7 +8,7 @@ from swift.utils import stat_array


 def write_dataset_info() -> None:
-    fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
+    fpaths = ['docs/source/Instruction/支持的模型和数据集.md', 'docs/source_en/Instruction/Supported-models-datasets.md']
    pre_texts = []
    for fpath in fpaths:
        if os.path.exists(fpath):
--- a/scripts/utils/run_model_info.py
+++ b/scripts/utils/run_model_info.py
@ -4,7 +4,7 @@ from swift.llm import MODEL_MAPPING, ModelType, get_default_lora_target_modules


 def get_model_info_table():
-    fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
+    fpaths = ['docs/source/Instruction/支持的模型和数据集.md', 'docs/source_en/Instruction/Supported-models-datasets.md']
    end_words = [['### 多模态大模型', '## 数据集'], ['### MLLM', '## Datasets']]
    model_name_list = ModelType.get_model_name_list()
    result = [
--- a/swift/llm/deploy.py
+++ b/swift/llm/deploy.py
@ -457,8 +457,7 @@ async def inference_lmdeploy_async(request: Union[ChatCompletionRequest, Complet
        usage_info = UsageInfo(
            prompt_tokens=num_prompt_tokens,
            completion_tokens=num_generated_tokens,
-            total_tokens=num_prompt_tokens + num_generated_tokens,
-        )
+            total_tokens=num_prompt_tokens + num_generated_tokens)
        finish_reason = None
        if output.status.name == 'FINISH':
            finish_reason = 'stop'
@ -477,17 +476,12 @@ async def inference_lmdeploy_async(request: Union[ChatCompletionRequest, Complet
                ChatCompletionResponseChoice(
                    index=0,
                    message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
-                    finish_reason=finish_reason,
-                )
+                    finish_reason=finish_reason)
            ]
            response = ChatCompletionResponse(
                model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
        else:
-            choices = [CompletionResponseChoice(
-                index=0,
-                text=response,
-                finish_reason=finish_reason,
-            )]
+            choices = [CompletionResponseChoice(index=0, text=response, finish_reason=finish_reason)]
            response = CompletionResponse(
                model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
        if _args.log_interval > 0:
@ -676,17 +670,12 @@ async def inference_pt_async(request: Union[ChatCompletionRequest, CompletionReq
                ChatCompletionResponseChoice(
                    index=0,
                    message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
-                    finish_reason=None,
-                )
+                    finish_reason=None)
            ]
            response = ChatCompletionResponse(
                model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
        else:
-            choices = [CompletionResponseChoice(
-                index=0,
-                text=response,
-                finish_reason=None,
-            )]
+            choices = [CompletionResponseChoice(index=0, text=response, finish_reason=None)]
            response = CompletionResponse(
                model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
        if _args.log_interval > 0:
--- a/swift/llm/utils/argument.py
+++ b/swift/llm/utils/argument.py
@ -1567,7 +1567,7 @@ class AppUIArguments(InferArguments):

@dataclass
 class DeployArguments(InferArguments):
-    host: str = '127.0.0.1'
+    host: str = '0.0.0.0'
    port: int = 8000
    api_key: Optional[str] = None
    ssl_keyfile: Optional[str] = None
--- a/swift/llm/utils/client_utils.py
+++ b/swift/llm/utils/client_utils.py
@ -230,7 +230,12 @@ def _pre_inference_client(model_type: str,
        else:
            raise ValueError(f'model_type: {model_type}, model_list: {[model.id for model in model_list.data]}')
    assert is_chat_request is not None and is_multimodal is not None
-    data = {k: v for k, v in request_config.__dict__.items() if not k.startswith('__')}
+    data = {}
+    request_config_origin = XRequestConfig()
+    for k, v in request_config.__dict__.items():
+        v_origin = getattr(request_config_origin, k)
+        if v != v_origin:
+            data[k] = v
    url = kwargs.pop('url', None)
    if url is None:
        url = f'http://{host}:{port}/v1'
@ -253,9 +258,9 @@ def _pre_inference_client(model_type: str,
        if medias:
            medias = convert_to_base64(images=medias)['images']
            data[media_key] = medias
-    if tools and len(tools) > 0:
+    if tools:
        data['tools'] = tools
-    if tool_choice:
+    if tool_choice and tool_choice != 'auto':
        data['tool_choice'] = tool_choice
    return url, data, is_chat_request

--- a/swift/llm/utils/lmdeploy_utils.py
+++ b/swift/llm/utils/lmdeploy_utils.py
@ -129,6 +129,7 @@ class LmdeployGenerationConfig(_LmdeployGenerationConfig):
        *,
        n: int = 1,
        stop_words: Optional[List[int]] = None,
+        logprobs: Optional[int] = None,
        random_seed: Optional[int] = None,
        skip_special_tokens: bool = False,
        **kwargs,
@ -146,6 +147,7 @@ class LmdeployGenerationConfig(_LmdeployGenerationConfig):
            repetition_penalty=repetition_penalty,
            n=n,
            stop_words=stop_words,
+            logprobs=logprobs,
            random_seed=random_seed,
            skip_special_tokens=skip_special_tokens,
            **kwargs)
--- a/swift/llm/utils/model.py
+++ b/swift/llm/utils/model.py
@ -286,7 +286,7 @@ class ModelType:
    yi_vl_6b_chat = 'yi-vl-6b-chat'
    yi_vl_34b_chat = 'yi-vl-34b-chat'
    # llava-llama (xtuner)
-    llava_llama3_8b_v1_1 = 'llava-llama-3-8b-v1_1'
+    llava_llama3_8b_v1_1 = 'llava-llama3-8b-v1_1'
    # internlm
    internlm_7b = 'internlm-7b'
    internlm_7b_chat = 'internlm-7b-chat'
@ -989,7 +989,7 @@ def get_model_tokenizer_cogvlm2(*args, **kwargs):
    LoRATM.llava,
    TemplateType.llava_llama_instruct,
    support_flash_attn=True,
-    support_lmdeploy=True,
+    support_vllm=True,
    requires=['transformers>=4.36'],
    tags=['multi-modal', 'vision'],
    hf_model_id='xtuner/llava-llama-3-8b-v1_1-transformers')
@ -6046,7 +6046,6 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
    eos_token='</s>',
    support_flash_attn=True,
    support_vllm=True,
-    support_lmdeploy=True,
    requires=['transformers>=4.36'],
    tags=['multi-modal', 'vision'],
    hf_model_id='llava-hf/llava-1.5-13b-hf')
@ -6058,7 +6057,6 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
    eos_token='</s>',
    support_flash_attn=True,
    support_vllm=True,
-    support_lmdeploy=True,
    requires=['transformers>=4.36'],
    tags=['multi-modal', 'vision'],
    hf_model_id='llava-hf/llava-1.5-7b-hf')
@ -6245,7 +6243,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
    LoRATM.llava_llama,
    TemplateType.llama3_llava_next,
    support_flash_attn=True,
-    support_lmdeploy=True,
    tags=['multi-modal', 'vision'],
    function_kwargs={'llm_model_type': 'next_llama'},
    hf_model_id='lmms-lab/llama3-llava-next-8b')
@ -6255,7 +6252,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
    LoRATM.llava,
    TemplateType.llava_qwen,
    support_flash_attn=True,
-    support_lmdeploy=True,
    tags=['multi-modal', 'vision'],
    function_kwargs={'llm_model_type': 'next_qwen'},
    hf_model_id='lmms-lab/llava-next-72b')
@ -6265,7 +6261,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
    LoRATM.llava,
    TemplateType.llava_qwen,
    support_flash_attn=True,
-    support_lmdeploy=True,
    tags=['multi-modal', 'vision'],
    function_kwargs={'llm_model_type': 'next_qwen'},
    hf_model_id='lmms-lab/llava-next-110b')
--- a/swift/llm/utils/protocol.py
+++ b/swift/llm/utils/protocol.py
@ -12,7 +12,7 @@ def random_uuid() -> str:
@dataclass
 class Model:
    id: str  # model_type
-    is_chat: Optional[bool] = None  # chat model or generation model
+    is_chat: bool = True  # chat model or generation model
    is_multimodal: bool = False

    object: str = 'model'
@ -151,8 +151,8 @@ class CompletionResponse:

@dataclass
 class DeltaMessage:
-    role: Literal['system', 'user', 'assistant']
-    content: str
+    role: Literal['system', 'user', 'assistant', None] = None
+    content: Optional[str] = None
    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None


@ -167,7 +167,7 @@ class ChatCompletionResponseStreamChoice:
 class ChatCompletionStreamResponse:
    model: str
    choices: List[ChatCompletionResponseStreamChoice]
-    usage: UsageInfo
+    usage: Optional[UsageInfo] = None
    id: str = field(default_factory=lambda: f'chatcmpl-{random_uuid()}')
    object: str = 'chat.completion.chunk'
    created: int = field(default_factory=lambda: int(time.time()))
@ -184,7 +184,7 @@ class CompletionResponseStreamChoice:
 class CompletionStreamResponse:
    model: str
    choices: List[CompletionResponseStreamChoice]
-    usage: UsageInfo
+    usage: Optional[UsageInfo] = None
    id: str = field(default_factory=lambda: f'cmpl-{random_uuid()}')
    object: str = 'text_completion.chunk'
    created: int = field(default_factory=lambda: int(time.time()))
--- a/swift/llm/utils/template.py
+++ b/swift/llm/utils/template.py
@ -1785,7 +1785,7 @@ class InternLMXComposer2Template(Template):
        while i < len(input_ids):
            if input_ids[i] == 2:  # replace_token
                res_input_ids = torch.tensor([1] + input_ids[pre_i:i], device=device)
-                res_inputs_embeds.append(tok_embeddings(res_input_ids))
+                res_inputs_embeds.append(tok_embeddings(res_input_ids[None])[0])
                wrap_im_mask += [0] * len(res_input_ids)
                res_labels += [-100] + labels[pre_i:i]
                if len(images) > 0 and idx < images.shape[0]:
@ -1894,7 +1894,7 @@ class InternvlTemplate(Template):
        embedding = self.model.get_input_embeddings()
        device = embedding.weight.device
        input_ids = data['input_ids']
-        inputs_embeds = embedding(input_ids).to(device=device)
+        inputs_embeds = embedding(input_ids[None])[0].to(device=device)
        pixel_values = data['pixel_values']
        if pixel_values is not None:
            pixel_values = pixel_values.to(device=device)
--- a/swift/llm/utils/utils.py
+++ b/swift/llm/utils/utils.py
@ -714,13 +714,11 @@ def inference_stream(model: PreTrainedModel,
        except StopIteration:
            is_finished = True
        res = {}
+        generate_ids = template.get_generate_ids(torch.tensor(raw_generate_ids)[None], token_len)
        if return_dict and is_finished:
            thread.join()
            res = dict(result_queue.get())
-            if res['sequences'][0].tolist() != raw_generate_ids:
-                logger.warning(f"res['sequences'][0].tolist(): {res['sequences'][0].tolist()}\n"
-                               f'raw_generate_ids: {raw_generate_ids}')
-        generate_ids = template.get_generate_ids(torch.tensor(raw_generate_ids)[None], token_len)
+            res['sequences'] = generate_ids
        generation_info['num_generated_tokens'] = len(generate_ids)
        response = template.generate_ids_to_response(
            generate_ids,
@ -834,6 +832,7 @@ def inference(model: PreTrainedModel,
    generation_info['samples/s'] = 1 / runtime
    generation_info['tokens/s'] = generation_info['num_generated_tokens'] / runtime
    if return_dict:
+        res['sequences'] = generate_ids
        res.update({'response': response, 'history': history})
        return res
    else:
--- a/swift/llm/utils/vllm_utils.py
+++ b/swift/llm/utils/vllm_utils.py
@ -184,6 +184,7 @@ if version.parse(vllm.__version__) < version.parse('0.5.5'):
            num_beams: int = 1,
            *,
            n: int = 1,
+            logprobs: Optional[int] = None,
            seed: Optional[int] = None,
            length_penalty: float = 1.,
            stop: Optional[List[str]] = None,
@ -214,6 +215,7 @@ if version.parse(vllm.__version__) < version.parse('0.5.5'):
                kwargs['use_beam_search'] = True
                kwargs['best_of'] = num_beams
            kwargs['n'] = n
+            kwargs['logprobs'] = logprobs
            kwargs['seed'] = seed
            kwargs['length_penalty'] = length_penalty
            kwargs['stop'] = stop
@ -236,6 +238,7 @@ else:
        repetition_penalty: float = 1.
        num_beams: int = 1
        n: int = 1
+        logprobs: Optional[int] = None
        seed: Optional[int] = None
        length_penalty: float = 1.
        stop: Optional[List[str]] = None
@ -254,7 +257,7 @@ else:
                               'the output of num_beams in transformers.')
                assert self.best_of is None
                self.use_beam_search = True
-                self.best_of = num_beams
+                self.best_of = self.num_beams
            if self.top_k == 0:
                self.top_k = -1
            if self.stop is None: