mirror of
https://github.com/modelscope/ms-swift.git
synced 2024-11-25 18:52:50 +08:00
update docs & fix bug (#1926)
This commit is contained in:
parent
8c41771e9f
commit
4b72dc834d
14
README.md
14
README.md
@ -69,8 +69,8 @@ You can contact us and communicate with us by adding our group:
|
||||
- 🔥2024.08.07: Support for using vLLM for accelerating inference and deployment of multimodal large models such as the llava series and phi3-vision models. You can refer to the [Multimodal & vLLM Inference Acceleration Documentation](docs/source_en/Multi-Modal/vllm-inference-acceleration.md) for more information.
|
||||
- 2024.08.06: Support for minicpm-v-v2_6-chat is available. You can use `swift infer --model_type minicpm-v-v2_6-chat` for inference experience. Best practices can be found [here](https://github.com/modelscope/swift/issues/1613).
|
||||
- 2024.08.06: Supports internlm2.5 series of 1.8b and 20b. Experience it using `swift infer --model_type internlm2_5-1_8b-chat`.
|
||||
- 🔥2024.08.05: Support evaluation for multi-modal models! Same command with [new datasets](https://swift.readthedocs.io/en/latest/LLM/LLM-eval.html#introduction).
|
||||
- 🔥2024.08.02: Support Fourier Ft. Use `--sft_type fourierft` to begin, Check parameter documentation [here](https://swift.readthedocs.io/en/latest/LLM/Command-line-parameters.html).
|
||||
- 🔥2024.08.05: Support evaluation for multi-modal models! Same command with [new datasets](https://swift.readthedocs.io/en/latest/Instruction/LLM-eval.html#introduction).
|
||||
- 🔥2024.08.02: Support Fourier Ft. Use `--sft_type fourierft` to begin, Check parameter documentation [here](https://swift.readthedocs.io/en/latest/Instruction/Command-line-parameters.html).
|
||||
- 🔥2024.07.29: Support the use of lmdeploy for inference acceleration of LLM and VLM models. Documentation can be found [here](docs/source_en/Multi-Modal/LmDeploy-inference-acceleration.md).
|
||||
- 🔥2024.07.24: Support DPO/ORPO/SimPO/CPO alignment algorithm for vision MLLM, training scripts can be find in [Document](docs/source_en/Multi-Modal/human-preference-alignment-training-documentation.md). support RLAIF-V dataset.
|
||||
- 🔥2024.07.24: Support using Megatron for CPT and SFT on the Qwen2 series. You can refer to the [Megatron training documentation](docs/source_en/LLM/Megatron-training.md).
|
||||
@ -89,7 +89,7 @@ You can contact us and communicate with us by adding our group:
|
||||
- 2024.07.06: Support codegeex4-9b-chat.
|
||||
- 2024.07.04: Support internlm2_5-7b series: internlm2_5-7b, internlm2_5-7b-chat, internlm2_5-7b-chat-1m.
|
||||
- 2024.07.02: Support for `llava1_6-vicuna-7b-instruct`, `llava1_6-vicuna-13b-instruct` and other llava-hf models. For best practices, refer to [here](docs/source_en/Multi-Modal/llava-best-practice.md).
|
||||
- 🔥2024.06.29: Support [eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass) for evaluation! Now we have supported over 50 eval datasets like `BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`, please check our [Eval Doc](https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/LLM-eval.md) to begin! Next sprint we will support Multi-modal and Agent evaluation, remember to follow us : )
|
||||
- 🔥2024.06.29: Support [eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass) for evaluation! Now we have supported over 50 eval datasets like `BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`, please check our [Eval Doc](https://github.com/modelscope/swift/blob/main/docs/source_en/Instruction/LLM-eval.md) to begin! Next sprint we will support Multi-modal and Agent evaluation, remember to follow us : )
|
||||
|
||||
- 🔥2024.06.28: Support for **Florence** series model! See [document](docs/source_en/Multi-Modal/florence-best-pratice.md)
|
||||
- 🔥2024.06.28: Support for Gemma2 series models: gemma2-9b, gemma2-9b-instruct, gemma2-27b, gemma2-27b-instruct.
|
||||
@ -109,7 +109,7 @@ You can contact us and communicate with us by adding our group:
|
||||
- 🔥2024.05.17: Support peft=0.11.0. Meanwhile support 3 new tuners: `BOFT`, `Vera` and `Pissa`. use `--sft_type boft/vera` to use BOFT or Vera, use `--init_lora_weights pissa` with `--sft_type lora` to use Pissa.
|
||||
- 2024.05.16: Supports Llava-Next (Stronger) series models. For best practice, you can refer to [here](https://github.com/modelscope/swift/tree/main/docs/source_en/Multi-Modal/llava-best-practice.md).
|
||||
- 🔥2024.05.13: Support Yi-1.5 series models,use `--model_type yi-1_5-9b-chat` to begin!
|
||||
- 2024.05.11: Support for qlora training and quantized inference using [hqq](https://github.com/mobiusml/hqq) and [eetq](https://github.com/NetEase-FuXi/EETQ). For more information, see the [LLM Quantization Documentation](https://github.com/modelscope/swift/tree/main/docs/source_en/LLM/LLM-quantization-and-export.md).
|
||||
- 2024.05.11: Support for qlora training and quantized inference using [hqq](https://github.com/mobiusml/hqq) and [eetq](https://github.com/NetEase-FuXi/EETQ). For more information, see the [LLM Quantization Documentation](https://github.com/modelscope/swift/tree/main/docs/source_en/Instruction/LLM-quantization-and-export.md).
|
||||
- 2024.05.10: Support split a sequence to multiple GPUs to reduce memory usage. Use this feature by `pip install .[seq_parallel]`, then add `--sequence_parallel_size n` to your DDP script to begin!
|
||||
- 2024.05.08: Support DeepSeek-V2-Chat model, you can refer to [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/deepseek-v2-chat/lora_ddp_ds3/sft.sh).Support InternVL-Chat-V1.5-Int8 model, for best practice, you can refer to [here](https://github.com/modelscope/swift/tree/main/docs/source_en/Multi-Modal/internvl-best-practice.md).
|
||||
- 🔥2024.05.07: Supoprts **ORPO** training! See [document](https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/ORPO.md) to start training!
|
||||
@ -154,7 +154,7 @@ You can contact us and communicate with us by adding our group:
|
||||
- 2024.02.25: Support `swift export` to quantize models using **AWQ/GPTQ** and push to ModelScope Hub. See documentation: [LLM Quantization](docs/source_en/Instruction/LLM-quantization-and-export.md).
|
||||
- 2024.02.22: Support gemma series: gemma-2b, [gemma-2b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/gemma_2b_instruct), gemma-7b, gemma-7b-instruct.
|
||||
- 2024.02.16: Support deepseek-math series: deepseek-math-7b, deepseek-math-7b-instruct, deepseek-math-7b-chat.
|
||||
- 🔥2024.02.05: Support **Qwen1.5** series models, see [model list](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B) for all supported Qwen1.5 models. Provide fine-tuning scripts for [qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8).
|
||||
- 🔥2024.02.05: Support **Qwen1.5** series models, see [model list](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B) for all supported Qwen1.5 models. Provide fine-tuning scripts for [qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8).
|
||||
- 2024.02.05: Support training of diffusion models such as **SDXL**, **SD**, **ControlNet**, as well as **DreamBooth** training. See corresponding [training scripts](https://github.com/modelscope/swift/tree/main/examples/pytorch/sdxl/scripts) for details.
|
||||
- 2024.02.01: Support minicpm series: [minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/minicpm_2b_sft_chat), minicpm-2b-chat.
|
||||
- 🔥2024.02.01: Support dataset mixing to reduce **catastrophic forgetting**. Use `--train_dataset_mix_ratio 2.0` to enable training! We also open sourced the general knowledge dataset [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
|
||||
@ -182,9 +182,9 @@ You can contact us and communicate with us by adding our group:
|
||||
- 2023.12.18: Support VLLM for inference acceleration.
|
||||
- 2023.12.15: Support deepseek, deepseek-coder series: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, openbuddy-deepseek-67b-chat, deepseek-coder-1_3b, deepseek-coder-1_3b-instruct, deepseek-coder-6_7b, deepseek-coder-6_7b-instruct, deepseek-coder-33b, deepseek-coder-33b-instruct.
|
||||
- 2023.12.13: Support mistral-7b-instruct-v2, [mixtral-moe-7b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe), [mixtral-moe-7b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe_instruct).
|
||||
- 2023.12.09: Support `freeze_parameters_ratio` parameter as a compromise between lora and full-parameter training. Corresponding sh can be found in [full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). Support `disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc` parameters, see [command line arguments](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md) for details.
|
||||
- 2023.12.09: Support `freeze_parameters_ratio` parameter as a compromise between lora and full-parameter training. Corresponding sh can be found in [full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). Support `disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc` parameters, see [command line arguments](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.md) for details.
|
||||
- 2023.12.08: Support [sus-34b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/sus_34b_chat), support yi-6b-200k, yi-34b-200k.
|
||||
- 2023.12.07: Support [Multi-Node DDP training](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
|
||||
- 2023.12.07: Support [Multi-Node DDP training](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
|
||||
- 2023.12.05: Support models: zephyr-7b-beta-chat, openbuddy-zephyr-7b-chat. Support datasets: hc3-zh, hc3-en.
|
||||
- 🔥2023.12.02: [Self-cognition fine-tuning best practices](docs/source_en/LLM/Self-cognition-best-practice.md), **10 minutes to fine-tune a large model for self-cognition**, create your own unique large model.
|
||||
- 🔥2023.11.30: Support training and inference of **qwen-1_8b**, **qwen-72b**, **qwen-audio** series models. Corresponding sh scripts can be found in [qwen_1_8b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_1_8b_chat), [qwen_72b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_72b_chat), [qwen_audio_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_audio_chat)
|
||||
|
16
README_CN.md
16
README_CN.md
@ -70,8 +70,8 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
|
||||
- 🔥2024.08.07: 支持使用vllm对多模态大模型: llava系列, internvl2系列, phi3-vision, minicpm-v2.5进行推理加速和部署. 可以查看[多模态&vLLM推理加速文档](docs/source/Multi-Modal/vLLM推理加速文档.md)获取更多信息.
|
||||
- 2024.08.06: 支持minicpm-v-v2_6-chat, 使用`swift infer --model_type minicpm-v-v2_6-chat`进行推理体验, 最佳实践可以查看[这里](https://github.com/modelscope/swift/issues/1613).
|
||||
- 2024.08.06: 支持internlm2.5的1.8b和20b系列. 使用`swift infer --model_type internlm2_5-1_8b-chat`进行体验.
|
||||
- 🔥2024.08.05: 支持多模态数据集的评测!命令行完全一致,新增了许多[多模态数据集](https://swift.readthedocs.io/zh-cn/latest/LLM/LLM%E8%AF%84%E6%B5%8B%E6%96%87%E6%A1%A3.html#id2).
|
||||
- 🔥2024.08.02: 支持Fourier Ft训练. 使用方式为`--sft_type fourierft`, 参数可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/LLM/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html).
|
||||
- 🔥2024.08.05: 支持多模态数据集的评测!命令行完全一致,新增了许多[多模态数据集](https://swift.readthedocs.io/zh-cn/latest/Instruction/LLM%E8%AF%84%E6%B5%8B%E6%96%87%E6%A1%A3.html#id2).
|
||||
- 🔥2024.08.02: 支持Fourier Ft训练. 使用方式为`--sft_type fourierft`, 参数可以参考[这里](https://swift.readthedocs.io/zh-cn/latest/Instruction/%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%8F%82%E6%95%B0.html).
|
||||
- 🔥2024.07.29: 支持使用lmdeploy对LLM和VLM模型进行推理加速. 文档可以查看[这里](docs/source/Multi-Modal/LmDeploy推理加速文档.md).
|
||||
- 🔥2024.07.24: 人类偏好对齐算法支持视觉多模态大模型, 包括DPO/ORPO/SimPO/CPO, 训练参考[文档](docs/source/Multi-Modal/人类偏好对齐训练文档.md). 支持数据集RLAIF-V.
|
||||
- 🔥2024.07.24: 支持使用megatron对qwen2系列进行CPT和SFT. 可以查看[megatron训练文档](docs/source/LLM/Megatron训练文档.md).
|
||||
@ -90,7 +90,7 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
|
||||
- 2024.07.06: 支持codegeex4-9b-chat.
|
||||
- 2024.07.04: 支持internlm2_5-7b系列: internlm2_5-7b, internlm2_5-7b-chat, internlm2_5-7b-chat-1m.
|
||||
- 2024.07.02: 支持`llava1_6-vicuna-7b-instruct`, `llava1_6-vicuna-13b-instruct`等llava-hf模型. 最佳实践可以查看[这里](docs/source/Multi-Modal/llava最佳实践.md).
|
||||
- 🔥2024.06.29: 支持[eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass)评测! 我们支持了包含`BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`等50+标准数据集在内的评测流程, 请查看我们的[评测文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM评测文档.md)来使用。下个迭代我们会支持多模态评测和Agent评测,记得持续关注我们: )
|
||||
- 🔥2024.06.29: 支持[eval-scope](https://github.com/modelscope/eval-scope)&[open-compass](https://github.com/open-compass/opencompass)评测! 我们支持了包含`BoolQ, ocnli, humaneval, math, ceval, mmlu, gsk8k, ARC_e`等50+标准数据集在内的评测流程, 请查看我们的[评测文档](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM评测文档.md)来使用。下个迭代我们会支持多模态评测和Agent评测,记得持续关注我们: )
|
||||
- 🔥2024.06.28: 支持**Florence**系列模型: 可以查看[Florence最佳实践](docs/source/Multi-Modal/florence最佳实践.md).
|
||||
- 🔥2024.06.28: 支持**Gemma2**系列模型: gemma2-9b, gemma2-9b-instruct, gemma2-27b, gemma2-27b-instruct.
|
||||
- 🔥2024.06.18: 支持**DeepSeek-Coder-v2**系列模型! 使用model_type`deepseek-coder-v2-instruct`和`deepseek-coder-v2-lite-instruct`来开启训练和推理.
|
||||
@ -109,7 +109,7 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
|
||||
- 🔥2024.05.17: 支持peft=0.11.0. 同时支持了三个新的tuner方法: `BOFT`, `Vera` 和 `Pissa`. 使用 `--sft_type boft/vera` 开启BOFT或者Vera, 使用 `--init_lora_weights pissa` 以及 `--sft_type lora` 来使用 Pissa.
|
||||
- 2024.05.16: 支持Llava-Next (Stronger)系列模型,最佳实践可以查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/Multi-Modal/llava最佳实践.md).
|
||||
- 🔥2024.05.13: 支持Yi-1.5系列模型,使用`--model_type yi-1_5-9b-chat`等开始体验
|
||||
- 2024.05.11: 支持使用[hqq](https://github.com/mobiusml/hqq)和[eetq](https://github.com/NetEase-FuXi/EETQ)进行qlora训练和量化推理,可以查看[LLM量化与导出文档](https://github.com/modelscope/swift/tree/main/docs/source/LLM/LLM量化与导出文档.md)
|
||||
- 2024.05.11: 支持使用[hqq](https://github.com/mobiusml/hqq)和[eetq](https://github.com/NetEase-FuXi/EETQ)进行qlora训练和量化推理,可以查看[LLM量化与导出文档](https://github.com/modelscope/swift/tree/main/docs/source/Instruction/LLM量化与导出文档.md)
|
||||
- 2024.05.10: 支持序列并行. 先安装`pip install .[seq_parallel]`, 之后在DDP环境中添加`--sequence_parallel_size n`即可使用!
|
||||
- 2024.05.08: 支持DeepSeek-V2-Chat模型, 训练参考[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/deepseek-v2-chat/lora_ddp_ds3/sft.sh)。支持InternVL-Chat-V1.5-Int8模型,最佳实践参考[这里](https://github.com/modelscope/swift/tree/main/docs/source/Multi-Modal/internvl最佳实践.md).
|
||||
- 🔥2024.05.07: 支持**ORPO**训练,使用`swift orpo`来开始训练, 最佳实践可以查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/LLM/ORPO算法最佳实践.md)
|
||||
@ -151,10 +151,10 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
|
||||
- 2024.03.06: 支持AWQ量化模型的训练和推理, 使用[这个Qwen1.5-AWQ模型脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_awq/lora/sft.sh)开始训练, 并支持[yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_zero3)的训练和推理.
|
||||
- 🔥2024.02.29: 支持[LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh)即可开始训练.
|
||||
- 🔥2024.02.29: 支持[LoRA+](https://arxiv.org/pdf/2402.12354.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh)即可开始训练.
|
||||
- 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看: [LLM量化与导出文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM量化与导出文档.md).
|
||||
- 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看: [LLM量化与导出文档](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM量化与导出文档.md).
|
||||
- 2024.02.22: 支持gemma系列: gemma-2b, [gemma-2b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/gemma_2b_instruct), gemma-7b, gemma-7b-instruct.
|
||||
- 2024.02.16: 支持deepseek-math系列: deepseek-math-7b, deepseek-math-7b-instruct, deepseek-math-7b-chat.
|
||||
- 🔥2024.02.05: 支持**Qwen1.5**系列模型, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B). 提供了[qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8)微调的脚本.
|
||||
- 🔥2024.02.05: 支持**Qwen1.5**系列模型, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md#%E6%A8%A1%E5%9E%8B). 提供了[qwen1half-7b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat), [qwen1half-7b-chat-int8](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_int8)微调的脚本.
|
||||
- 2024.02.05: 支持扩散模型如**SDXL**, **SD**, **ControlNet**的训练, 同时也支持**DreamBooth**的训练, 详情可以查看对应的[训练脚本](https://github.com/modelscope/swift/tree/main/examples/pytorch/sdxl/scripts).
|
||||
- 2024.02.01: 支持minicpm系列: [minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/minicpm_2b_sft_chat), minicpm-2b-chat.
|
||||
- 🔥2024.02.01: 支持数据集打混来减少 **灾难性遗忘问题**. 使用`--train_dataset_mix_ratio 2.0`开启训练!同时我们也开源了通用知识数据集 [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
|
||||
@ -182,9 +182,9 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
|
||||
- 2023.12.18: 支持VLLM进行推理加速.
|
||||
- 2023.12.15: 支持deepseek, deepseek-coder系列: deepseek-7b, deepseek-7b-chat, deepseek-67b, deepseek-67b-chat, openbuddy-deepseek-67b-chat, deepseek-coder-1_3b, deepseek-coder-1_3b-instruct, deepseek-coder-6_7b, deepseek-coder-6_7b-instruct, deepseek-coder-33b, deepseek-coder-33b-instruct.
|
||||
- 2023.12.13: 支持mistral-7b-instruct-v2, [mixtral-moe-7b](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe), [mixtral-moe-7b-instruct](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/mixtral_7b_moe_instruct).
|
||||
- 2023.12.09: 支持`freeze_parameters_ratio`参数, 作为lora和全参数训练的折中方案. 对应的sh可以查看[full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). 支持`disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc`参数, 具体可以查看[命令行参数](https://github.com/modelscope/swift/blob/main/docs/source/LLM/命令行参数.md).
|
||||
- 2023.12.09: 支持`freeze_parameters_ratio`参数, 作为lora和全参数训练的折中方案. 对应的sh可以查看[full_freeze_ddp](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_7b_chat/full_freeze_ddp). 支持`disable_tqdm`, `lazy_tokenize`, `preprocess_num_proc`参数, 具体可以查看[命令行参数](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/命令行参数.md).
|
||||
- 2023.12.08: 支持[sus-34b-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/sus_34b_chat), 支持yi-6b-200k, yi-34b-200k.
|
||||
- 2023.12.07: 支持[Multi-Node DDP训练](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
|
||||
- 2023.12.07: 支持[Multi-Node DDP训练](https://github.com/modelscope/swift/blob/main/docs/source/Instruction/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E4%BD%BF%E7%94%A8cli).
|
||||
- 2023.12.05: 支持模型: zephyr-7b-beta-chat, openbuddy-zephyr-7b-chat. 支持数据集: hc3-zh, hc3-en.
|
||||
- 🔥 2023.12.02: [自我认知微调最佳实践](https://github.com/modelscope/swift/blob/main/docs/source/LLM/自我认知微调最佳实践.md), **10分钟对大模型进行自我认知微调**, 创建专属于自己的大模型.
|
||||
- 🔥 2023.11.30: 支持**qwen-1_8b**, **qwen-72b**, **qwen-audio**系列模型的训练的推理. 对应的sh脚本可以查看[qwen_1_8b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_1_8b_chat), [qwen_72b_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_72b_chat), [qwen_audio_chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/qwen_audio_chat)
|
||||
|
@ -24,7 +24,7 @@ pip install -e '.[llm]'
|
||||
pip install deepspeed -U
|
||||
|
||||
# 如果你想要使用基于auto_gptq的qlora训练. (推荐, 效果优于bnb)
|
||||
# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
|
||||
# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/支持的模型和数据集.md#模型`
|
||||
# auto_gptq和cuda版本有对应关系,请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
|
||||
pip install auto_gptq -U
|
||||
|
||||
@ -83,7 +83,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
|
||||
--output_dir output \
|
||||
|
||||
# 使用自己的数据集
|
||||
# 自定义数据集格式查看: https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E8%87%AA%E5%AE%9A%E4%B9%89%E4%B8%8E%E6%8B%93%E5%B1%95.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86
|
||||
# 自定义数据集格式查看: https://github.com/modelscope/swift/blob/main/docs/source/Instruction/%E8%87%AA%E5%AE%9A%E4%B9%89%E4%B8%8E%E6%8B%93%E5%B1%95.md#%E8%87%AA%E5%AE%9A%E4%B9%89%E6%95%B0%E6%8D%AE%E9%9B%86
|
||||
CUDA_VISIBLE_DEVICES=0 swift sft \
|
||||
--model_id_or_path qwen/Qwen-7B-Chat \
|
||||
--dataset chatml.jsonl \
|
||||
|
@ -15,7 +15,7 @@ pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
|
||||
pip install 'ms-swift[llm]' -U
|
||||
|
||||
# 如果你想要使用基于auto_gptq的模型进行推理.
|
||||
# 使用auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
|
||||
# 使用auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/支持的模型和数据集.md#模型`
|
||||
# auto_gptq和cuda版本有对应关系,请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
|
||||
pip install auto_gptq -U
|
||||
|
||||
|
@ -298,7 +298,7 @@ RLHF参数继承了sft参数, 除此之外增加了以下参数:
|
||||
- `--model_revision`: 默认值为`None`. 具体的参数介绍可以在`sft命令行参数`中查看. 如果`model_id_or_path`为None或者是本地的模型目录, 则该参数失效.
|
||||
- `--🔥sft_type`: 默认值为`'lora'`, 具体的参数介绍可以在`sft命令行参数`中查看.
|
||||
- `--🔥template_type`: 默认值为`'AUTO'`, 具体的参数介绍可以在`sft命令行参数`中查看.
|
||||
- `--🔥infer_backend`: 你可以选择'AUTO', 'vllm', 'pt'. 默认使用'AUTO', 进行智能选择, 即如果没有传入`ckpt_dir`或使用全参数微调, 并且安装了vllm且模型支持vllm则使用vllm引擎, 否则使用原生torch进行推理. vllm环境准备可以参考[VLLM推理加速与部署](VLLM推理加速与部署.md#环境准备), vllm支持的模型可以查看[支持的模型](../LLM/支持的模型和数据集.md#模型).
|
||||
- `--🔥infer_backend`: 你可以选择'AUTO', 'vllm', 'pt'. 默认使用'AUTO', 进行智能选择, 即如果没有传入`ckpt_dir`或使用全参数微调, 并且安装了vllm且模型支持vllm则使用vllm引擎, 否则使用原生torch进行推理. vllm环境准备可以参考[VLLM推理加速与部署](VLLM推理加速与部署.md#环境准备), vllm支持的模型可以查看[支持的模型](../Instruction/支持的模型和数据集.md#模型).
|
||||
- `--🔥ckpt_dir`: 必填项, 值为SFT阶段保存的checkpoint路径, e.g. `'/path/to/your/vx-xxx/checkpoint-xxx'`.
|
||||
- `--load_args_from_ckpt_dir`: 是否从`ckpt_dir`的`sft_args.json`文件中读取模型配置信息. 默认是`True`.
|
||||
- `--🔥load_dataset_config`: 该参数只有在`--load_args_from_ckpt_dir true`时才生效. 即是否从`ckpt_dir`的`sft_args.json`文件中读取数据集相关的配置信息. 默认为`False`.
|
||||
@ -426,7 +426,7 @@ app-ui参数继承了infer参数, 除此之外增加了以下参数:
|
||||
|
||||
deploy参数继承了infer参数, 除此之外增加了以下参数:
|
||||
|
||||
- `--🔥host`: 默认为`'127.0.0.1`. 要使其在非本机上可访问, 可设置为'0.0.0.0'.
|
||||
- `--host`: 默认为`'0.0.0.0'`.
|
||||
- `--port`: 默认为`8000`.
|
||||
- `--api_key`: 默认为`None`, 即不对请求进行api_key验证.
|
||||
- `--ssl_keyfile`: 默认为`None`.
|
||||
|
@ -379,8 +379,8 @@
|
||||
|qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|✔|✘|✘|✘|transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
|
||||
|glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|✘|✘|✘|✘|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
|
||||
|idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
|
||||
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|
||||
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|
||||
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|
||||
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|
||||
|llava1_6-mistral-7b-instruct|[swift/llava-v1.6-mistral-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-mistral-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-mistral|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)|
|
||||
|llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
|
||||
|llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
|
||||
@ -391,16 +391,16 @@
|
||||
|llava-onevision-qwen2-0_5b-ov|[AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-0.5b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)|
|
||||
|llava-onevision-qwen2-7b-ov|[AI-ModelScope/llava-onevision-qwen2-7b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-7b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-7b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-7b-ov-hf)|
|
||||
|llava-onevision-qwen2-72b-ov|[AI-ModelScope/llava-onevision-qwen2-72b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-72b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-72b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf)|
|
||||
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|
||||
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|
||||
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|
||||
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✘|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|
||||
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|
||||
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|
||||
|llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
|
||||
|llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
|
||||
|llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|
|
||||
|llava-next-video-34b-instruct|[swift/LLaVA-NeXT-Video-34B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-34B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video-yi|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-34B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-34B-hf)|
|
||||
|yi-vl-6b-chat|[01ai/Yi-VL-6B](https://modelscope.cn/models/01ai/Yi-VL-6B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)|
|
||||
|yi-vl-34b-chat|[01ai/Yi-VL-34B](https://modelscope.cn/models/01ai/Yi-VL-34B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)|
|
||||
|llava-llama-3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✘|✔|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|
||||
|llava-llama3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✔|✘|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|
||||
|internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)|
|
||||
|internlm-xcomposer2-4khd-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2-4khd|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-4khd-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b)|
|
||||
|internlm-xcomposer2_5-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2_5|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b)|
|
||||
|
@ -267,7 +267,7 @@ The following parameters take effect when the `sft_type` is set to `reft`.
|
||||
### Liger Parameters
|
||||
|
||||
- `--use_liger`: Use liger-kernel to train.
|
||||
-
|
||||
|
||||
## PT Parameters
|
||||
|
||||
PT parameters inherit from the SFT parameters with some modifications to the default values:
|
||||
@ -426,7 +426,7 @@ app-ui parameters inherit from infer parameters, with the following added parame
|
||||
|
||||
deploy parameters inherit from infer parameters, with the following added parameters:
|
||||
|
||||
- `--🔥host`: Default is `'127.0.0.1`. To make it accessible on the local network, you can set it to '0.0.0.0'.
|
||||
- `--host`: Default is `'0.0.0.0'`.
|
||||
- `--port`: Default is `8000`.
|
||||
- `--api_key`: The default is `None`, meaning that the request will not be subjected to api_key verification.
|
||||
- `--ssl_keyfile`: Default is `None`.
|
||||
|
@ -20,7 +20,7 @@ pip install -e '.[llm]'
|
||||
pip install deepspeed -U
|
||||
|
||||
# If you want to use qlora training based on auto_gptq. (Recommended, better than bnb)
|
||||
# Models supporting auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/supported-models-and-datasets.md#models`
|
||||
# Models supporting auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/supported-models-and-datasets.md#models`
|
||||
# auto_gptq and cuda versions are related, please choose the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation`
|
||||
pip install auto_gptq -U
|
||||
|
||||
@ -79,7 +79,7 @@ CUDA_VISIBLE_DEVICES=0 swift sft \
|
||||
--output_dir output \
|
||||
|
||||
# Using your own dataset
|
||||
# custom dataset format: https://github.com/modelscope/swift/blob/main/docs/source_en/LLM/Customization.md#custom-datasets
|
||||
# custom dataset format: https://github.com/modelscope/swift/blob/main/docs/source_en/Instruction/Customization.md#custom-datasets
|
||||
CUDA_VISIBLE_DEVICES=0 swift sft \
|
||||
--model_id_or_path qwen/Qwen-7B-Chat \
|
||||
--dataset chatml.jsonl \
|
||||
|
@ -13,7 +13,7 @@ GPU devices: A10, 3090, V100, A100 are all supported.
|
||||
pip install 'ms-swift[llm]' -U
|
||||
|
||||
# If you want to use models based on auto_gptq for inference.
|
||||
# Models using auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/Supported Models and Datasets.md#Models`
|
||||
# Models using auto_gptq: `https://github.com/modelscope/swift/blob/main/docs/source/Instruction/Supported Models and Datasets.md#Models`
|
||||
# auto_gptq and cuda versions have a correspondence, please select the version according to `https://github.com/PanQiWei/AutoGPTQ#quick-installation`
|
||||
pip install auto_gptq -U
|
||||
|
||||
|
@ -379,8 +379,8 @@ The table below introcudes all models supported by SWIFT:
|
||||
|qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|✔|✘|✘|✘|transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
|
||||
|glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|✘|✘|✘|✘|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
|
||||
|idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
|
||||
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|
||||
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✔|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|
||||
|llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|
|
||||
|llava1_5-13b-instruct|[swift/llava-1.5-13b-hf](https://modelscope.cn/models/swift/llava-1.5-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|✔|✔|✘|✘|transformers>=4.36|vision|[llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)|
|
||||
|llava1_6-mistral-7b-instruct|[swift/llava-v1.6-mistral-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-mistral-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-mistral|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)|
|
||||
|llava1_6-vicuna-7b-instruct|[swift/llava-v1.6-vicuna-7b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)|
|
||||
|llava1_6-vicuna-13b-instruct|[swift/llava-v1.6-vicuna-13b-hf](https://modelscope.cn/models/swift/llava-v1.6-vicuna-13b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-vicuna|✔|✔|✘|✘|transformers>=4.39|vision|[llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)|
|
||||
@ -391,16 +391,16 @@ The table below introcudes all models supported by SWIFT:
|
||||
|llava-onevision-qwen2-0_5b-ov|[AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-0.5b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-0.5b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)|
|
||||
|llava-onevision-qwen2-7b-ov|[AI-ModelScope/llava-onevision-qwen2-7b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-7b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-7b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-7b-ov-hf)|
|
||||
|llava-onevision-qwen2-72b-ov|[AI-ModelScope/llava-onevision-qwen2-72b-ov-hf](https://modelscope.cn/models/AI-ModelScope/llava-onevision-qwen2-72b-ov-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-onevision-qwen|✔|✘|✘|✘|transformers>=4.45.0.dev0|vision, video|[llava-hf/llava-onevision-qwen2-72b-ov-hf](https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf)|
|
||||
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✔|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|
||||
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|
||||
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✔|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|
||||
|llama3-llava-next-8b|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3-llava-next|✔|✘|✘|✘||vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
|
||||
|llava-next-72b|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
|
||||
|llava-next-110b|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-qwen|✔|✘|✘|✘||vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
|
||||
|llava-next-video-7b-instruct|[swift/LLaVA-NeXT-Video-7B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-hf)|
|
||||
|llava-next-video-7b-32k-instruct|[swift/LLaVA-NeXT-Video-7B-32K-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-32K-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-32K-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-32K-hf)|
|
||||
|llava-next-video-7b-dpo-instruct|[swift/LLaVA-NeXT-Video-7B-DPO-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-7B-DPO-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-7B-DPO-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-7B-DPO-hf)|
|
||||
|llava-next-video-34b-instruct|[swift/LLaVA-NeXT-Video-34B-hf](https://modelscope.cn/models/swift/LLaVA-NeXT-Video-34B-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-next-video-yi|✔|✘|✘|✘|transformers>=4.42, av|video|[llava-hf/LLaVA-NeXT-Video-34B-hf](https://huggingface.co/llava-hf/LLaVA-NeXT-Video-34B-hf)|
|
||||
|yi-vl-6b-chat|[01ai/Yi-VL-6B](https://modelscope.cn/models/01ai/Yi-VL-6B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)|
|
||||
|yi-vl-34b-chat|[01ai/Yi-VL-34B](https://modelscope.cn/models/01ai/Yi-VL-34B/summary)|^(model.layers\|model.mm_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|yi-vl|✔|✘|✘|✘|transformers>=4.34|vision|[01-ai/Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)|
|
||||
|llava-llama-3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✘|✔|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|
||||
|llava-llama3-8b-v1_1|[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava-llama-instruct|✔|✔|✘|✘|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
|
||||
|internlm-xcomposer2-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b)|
|
||||
|internlm-xcomposer2-4khd-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2-4khd-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2-4khd|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2-4khd-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b)|
|
||||
|internlm-xcomposer2_5-7b-chat|[Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-xcomposer2d5-7b/summary)|attention.wqkv, attention.wo, feed_forward.w1, feed_forward.w2, feed_forward.w3|internlm-xcomposer2_5|✔|✘|✔|✘||vision|[internlm/internlm-xcomposer2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b)|
|
||||
|
@ -1,4 +1,4 @@
|
||||
def test_benchmark(infer_backend):
|
||||
def test_benchmark(infer_backend: str) -> None:
|
||||
import os
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
||||
os.environ['TIMEOUT'] = '-1'
|
||||
|
@ -8,7 +8,7 @@ from swift.utils import stat_array
|
||||
|
||||
|
||||
def write_dataset_info() -> None:
|
||||
fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
|
||||
fpaths = ['docs/source/Instruction/支持的模型和数据集.md', 'docs/source_en/Instruction/Supported-models-datasets.md']
|
||||
pre_texts = []
|
||||
for fpath in fpaths:
|
||||
if os.path.exists(fpath):
|
||||
|
@ -4,7 +4,7 @@ from swift.llm import MODEL_MAPPING, ModelType, get_default_lora_target_modules
|
||||
|
||||
|
||||
def get_model_info_table():
|
||||
fpaths = ['docs/source/LLM/支持的模型和数据集.md', 'docs/source_en/LLM/Supported-models-datasets.md']
|
||||
fpaths = ['docs/source/Instruction/支持的模型和数据集.md', 'docs/source_en/Instruction/Supported-models-datasets.md']
|
||||
end_words = [['### 多模态大模型', '## 数据集'], ['### MLLM', '## Datasets']]
|
||||
model_name_list = ModelType.get_model_name_list()
|
||||
result = [
|
||||
|
@ -457,8 +457,7 @@ async def inference_lmdeploy_async(request: Union[ChatCompletionRequest, Complet
|
||||
usage_info = UsageInfo(
|
||||
prompt_tokens=num_prompt_tokens,
|
||||
completion_tokens=num_generated_tokens,
|
||||
total_tokens=num_prompt_tokens + num_generated_tokens,
|
||||
)
|
||||
total_tokens=num_prompt_tokens + num_generated_tokens)
|
||||
finish_reason = None
|
||||
if output.status.name == 'FINISH':
|
||||
finish_reason = 'stop'
|
||||
@ -477,17 +476,12 @@ async def inference_lmdeploy_async(request: Union[ChatCompletionRequest, Complet
|
||||
ChatCompletionResponseChoice(
|
||||
index=0,
|
||||
message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
finish_reason=finish_reason)
|
||||
]
|
||||
response = ChatCompletionResponse(
|
||||
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
|
||||
else:
|
||||
choices = [CompletionResponseChoice(
|
||||
index=0,
|
||||
text=response,
|
||||
finish_reason=finish_reason,
|
||||
)]
|
||||
choices = [CompletionResponseChoice(index=0, text=response, finish_reason=finish_reason)]
|
||||
response = CompletionResponse(
|
||||
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
|
||||
if _args.log_interval > 0:
|
||||
@ -676,17 +670,12 @@ async def inference_pt_async(request: Union[ChatCompletionRequest, CompletionReq
|
||||
ChatCompletionResponseChoice(
|
||||
index=0,
|
||||
message=ChatMessage(role='assistant', content=response, tool_calls=toolcall),
|
||||
finish_reason=None,
|
||||
)
|
||||
finish_reason=None)
|
||||
]
|
||||
response = ChatCompletionResponse(
|
||||
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
|
||||
else:
|
||||
choices = [CompletionResponseChoice(
|
||||
index=0,
|
||||
text=response,
|
||||
finish_reason=None,
|
||||
)]
|
||||
choices = [CompletionResponseChoice(index=0, text=response, finish_reason=None)]
|
||||
response = CompletionResponse(
|
||||
model=request.model, choices=choices, usage=usage_info, id=request_id, created=created_time)
|
||||
if _args.log_interval > 0:
|
||||
|
@ -1567,7 +1567,7 @@ class AppUIArguments(InferArguments):
|
||||
|
||||
@dataclass
|
||||
class DeployArguments(InferArguments):
|
||||
host: str = '127.0.0.1'
|
||||
host: str = '0.0.0.0'
|
||||
port: int = 8000
|
||||
api_key: Optional[str] = None
|
||||
ssl_keyfile: Optional[str] = None
|
||||
|
@ -230,7 +230,12 @@ def _pre_inference_client(model_type: str,
|
||||
else:
|
||||
raise ValueError(f'model_type: {model_type}, model_list: {[model.id for model in model_list.data]}')
|
||||
assert is_chat_request is not None and is_multimodal is not None
|
||||
data = {k: v for k, v in request_config.__dict__.items() if not k.startswith('__')}
|
||||
data = {}
|
||||
request_config_origin = XRequestConfig()
|
||||
for k, v in request_config.__dict__.items():
|
||||
v_origin = getattr(request_config_origin, k)
|
||||
if v != v_origin:
|
||||
data[k] = v
|
||||
url = kwargs.pop('url', None)
|
||||
if url is None:
|
||||
url = f'http://{host}:{port}/v1'
|
||||
@ -253,9 +258,9 @@ def _pre_inference_client(model_type: str,
|
||||
if medias:
|
||||
medias = convert_to_base64(images=medias)['images']
|
||||
data[media_key] = medias
|
||||
if tools and len(tools) > 0:
|
||||
if tools:
|
||||
data['tools'] = tools
|
||||
if tool_choice:
|
||||
if tool_choice and tool_choice != 'auto':
|
||||
data['tool_choice'] = tool_choice
|
||||
return url, data, is_chat_request
|
||||
|
||||
|
@ -129,6 +129,7 @@ class LmdeployGenerationConfig(_LmdeployGenerationConfig):
|
||||
*,
|
||||
n: int = 1,
|
||||
stop_words: Optional[List[int]] = None,
|
||||
logprobs: Optional[int] = None,
|
||||
random_seed: Optional[int] = None,
|
||||
skip_special_tokens: bool = False,
|
||||
**kwargs,
|
||||
@ -146,6 +147,7 @@ class LmdeployGenerationConfig(_LmdeployGenerationConfig):
|
||||
repetition_penalty=repetition_penalty,
|
||||
n=n,
|
||||
stop_words=stop_words,
|
||||
logprobs=logprobs,
|
||||
random_seed=random_seed,
|
||||
skip_special_tokens=skip_special_tokens,
|
||||
**kwargs)
|
||||
|
@ -286,7 +286,7 @@ class ModelType:
|
||||
yi_vl_6b_chat = 'yi-vl-6b-chat'
|
||||
yi_vl_34b_chat = 'yi-vl-34b-chat'
|
||||
# llava-llama (xtuner)
|
||||
llava_llama3_8b_v1_1 = 'llava-llama-3-8b-v1_1'
|
||||
llava_llama3_8b_v1_1 = 'llava-llama3-8b-v1_1'
|
||||
# internlm
|
||||
internlm_7b = 'internlm-7b'
|
||||
internlm_7b_chat = 'internlm-7b-chat'
|
||||
@ -989,7 +989,7 @@ def get_model_tokenizer_cogvlm2(*args, **kwargs):
|
||||
LoRATM.llava,
|
||||
TemplateType.llava_llama_instruct,
|
||||
support_flash_attn=True,
|
||||
support_lmdeploy=True,
|
||||
support_vllm=True,
|
||||
requires=['transformers>=4.36'],
|
||||
tags=['multi-modal', 'vision'],
|
||||
hf_model_id='xtuner/llava-llama-3-8b-v1_1-transformers')
|
||||
@ -6046,7 +6046,6 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
|
||||
eos_token='</s>',
|
||||
support_flash_attn=True,
|
||||
support_vllm=True,
|
||||
support_lmdeploy=True,
|
||||
requires=['transformers>=4.36'],
|
||||
tags=['multi-modal', 'vision'],
|
||||
hf_model_id='llava-hf/llava-1.5-13b-hf')
|
||||
@ -6058,7 +6057,6 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
|
||||
eos_token='</s>',
|
||||
support_flash_attn=True,
|
||||
support_vllm=True,
|
||||
support_lmdeploy=True,
|
||||
requires=['transformers>=4.36'],
|
||||
tags=['multi-modal', 'vision'],
|
||||
hf_model_id='llava-hf/llava-1.5-7b-hf')
|
||||
@ -6245,7 +6243,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
|
||||
LoRATM.llava_llama,
|
||||
TemplateType.llama3_llava_next,
|
||||
support_flash_attn=True,
|
||||
support_lmdeploy=True,
|
||||
tags=['multi-modal', 'vision'],
|
||||
function_kwargs={'llm_model_type': 'next_llama'},
|
||||
hf_model_id='lmms-lab/llama3-llava-next-8b')
|
||||
@ -6255,7 +6252,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
|
||||
LoRATM.llava,
|
||||
TemplateType.llava_qwen,
|
||||
support_flash_attn=True,
|
||||
support_lmdeploy=True,
|
||||
tags=['multi-modal', 'vision'],
|
||||
function_kwargs={'llm_model_type': 'next_qwen'},
|
||||
hf_model_id='lmms-lab/llava-next-72b')
|
||||
@ -6265,7 +6261,6 @@ def get_model_tokenizer_llava_next_video_yi(*args, **kwargs):
|
||||
LoRATM.llava,
|
||||
TemplateType.llava_qwen,
|
||||
support_flash_attn=True,
|
||||
support_lmdeploy=True,
|
||||
tags=['multi-modal', 'vision'],
|
||||
function_kwargs={'llm_model_type': 'next_qwen'},
|
||||
hf_model_id='lmms-lab/llava-next-110b')
|
||||
|
@ -12,7 +12,7 @@ def random_uuid() -> str:
|
||||
@dataclass
|
||||
class Model:
|
||||
id: str # model_type
|
||||
is_chat: Optional[bool] = None # chat model or generation model
|
||||
is_chat: bool = True # chat model or generation model
|
||||
is_multimodal: bool = False
|
||||
|
||||
object: str = 'model'
|
||||
@ -151,8 +151,8 @@ class CompletionResponse:
|
||||
|
||||
@dataclass
|
||||
class DeltaMessage:
|
||||
role: Literal['system', 'user', 'assistant']
|
||||
content: str
|
||||
role: Literal['system', 'user', 'assistant', None] = None
|
||||
content: Optional[str] = None
|
||||
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
|
||||
|
||||
|
||||
@ -167,7 +167,7 @@ class ChatCompletionResponseStreamChoice:
|
||||
class ChatCompletionStreamResponse:
|
||||
model: str
|
||||
choices: List[ChatCompletionResponseStreamChoice]
|
||||
usage: UsageInfo
|
||||
usage: Optional[UsageInfo] = None
|
||||
id: str = field(default_factory=lambda: f'chatcmpl-{random_uuid()}')
|
||||
object: str = 'chat.completion.chunk'
|
||||
created: int = field(default_factory=lambda: int(time.time()))
|
||||
@ -184,7 +184,7 @@ class CompletionResponseStreamChoice:
|
||||
class CompletionStreamResponse:
|
||||
model: str
|
||||
choices: List[CompletionResponseStreamChoice]
|
||||
usage: UsageInfo
|
||||
usage: Optional[UsageInfo] = None
|
||||
id: str = field(default_factory=lambda: f'cmpl-{random_uuid()}')
|
||||
object: str = 'text_completion.chunk'
|
||||
created: int = field(default_factory=lambda: int(time.time()))
|
||||
|
@ -1785,7 +1785,7 @@ class InternLMXComposer2Template(Template):
|
||||
while i < len(input_ids):
|
||||
if input_ids[i] == 2: # replace_token
|
||||
res_input_ids = torch.tensor([1] + input_ids[pre_i:i], device=device)
|
||||
res_inputs_embeds.append(tok_embeddings(res_input_ids))
|
||||
res_inputs_embeds.append(tok_embeddings(res_input_ids[None])[0])
|
||||
wrap_im_mask += [0] * len(res_input_ids)
|
||||
res_labels += [-100] + labels[pre_i:i]
|
||||
if len(images) > 0 and idx < images.shape[0]:
|
||||
@ -1894,7 +1894,7 @@ class InternvlTemplate(Template):
|
||||
embedding = self.model.get_input_embeddings()
|
||||
device = embedding.weight.device
|
||||
input_ids = data['input_ids']
|
||||
inputs_embeds = embedding(input_ids).to(device=device)
|
||||
inputs_embeds = embedding(input_ids[None])[0].to(device=device)
|
||||
pixel_values = data['pixel_values']
|
||||
if pixel_values is not None:
|
||||
pixel_values = pixel_values.to(device=device)
|
||||
|
@ -714,13 +714,11 @@ def inference_stream(model: PreTrainedModel,
|
||||
except StopIteration:
|
||||
is_finished = True
|
||||
res = {}
|
||||
generate_ids = template.get_generate_ids(torch.tensor(raw_generate_ids)[None], token_len)
|
||||
if return_dict and is_finished:
|
||||
thread.join()
|
||||
res = dict(result_queue.get())
|
||||
if res['sequences'][0].tolist() != raw_generate_ids:
|
||||
logger.warning(f"res['sequences'][0].tolist(): {res['sequences'][0].tolist()}\n"
|
||||
f'raw_generate_ids: {raw_generate_ids}')
|
||||
generate_ids = template.get_generate_ids(torch.tensor(raw_generate_ids)[None], token_len)
|
||||
res['sequences'] = generate_ids
|
||||
generation_info['num_generated_tokens'] = len(generate_ids)
|
||||
response = template.generate_ids_to_response(
|
||||
generate_ids,
|
||||
@ -834,6 +832,7 @@ def inference(model: PreTrainedModel,
|
||||
generation_info['samples/s'] = 1 / runtime
|
||||
generation_info['tokens/s'] = generation_info['num_generated_tokens'] / runtime
|
||||
if return_dict:
|
||||
res['sequences'] = generate_ids
|
||||
res.update({'response': response, 'history': history})
|
||||
return res
|
||||
else:
|
||||
|
@ -184,6 +184,7 @@ if version.parse(vllm.__version__) < version.parse('0.5.5'):
|
||||
num_beams: int = 1,
|
||||
*,
|
||||
n: int = 1,
|
||||
logprobs: Optional[int] = None,
|
||||
seed: Optional[int] = None,
|
||||
length_penalty: float = 1.,
|
||||
stop: Optional[List[str]] = None,
|
||||
@ -214,6 +215,7 @@ if version.parse(vllm.__version__) < version.parse('0.5.5'):
|
||||
kwargs['use_beam_search'] = True
|
||||
kwargs['best_of'] = num_beams
|
||||
kwargs['n'] = n
|
||||
kwargs['logprobs'] = logprobs
|
||||
kwargs['seed'] = seed
|
||||
kwargs['length_penalty'] = length_penalty
|
||||
kwargs['stop'] = stop
|
||||
@ -236,6 +238,7 @@ else:
|
||||
repetition_penalty: float = 1.
|
||||
num_beams: int = 1
|
||||
n: int = 1
|
||||
logprobs: Optional[int] = None
|
||||
seed: Optional[int] = None
|
||||
length_penalty: float = 1.
|
||||
stop: Optional[List[str]] = None
|
||||
@ -254,7 +257,7 @@ else:
|
||||
'the output of num_beams in transformers.')
|
||||
assert self.best_of is None
|
||||
self.use_beam_search = True
|
||||
self.best_of = num_beams
|
||||
self.best_of = self.num_beams
|
||||
if self.top_k == 0:
|
||||
self.top_k = -1
|
||||
if self.stop is None:
|
||||
|
Loading…
Reference in New Issue
Block a user