diff --git a/.gitignore b/.gitignore index c40424b7fa0a6cb5e9e3cc30b9c52f2ba6512f64..aa14bce105aea8a096ecadef09361833ef6deb72 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ dist/ migration_scripts/ venv/ +.idea .venv/ .ipynb_checkpoints .__pycache__ diff --git a/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py b/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py index e08543fb629be20a3e3bd0506b10687b5ec6ae4d..5b34ca956f4b22fffca9dc9cd3dd94e643972701 100644 --- a/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py +++ b/llama-index-integrations/llms/llama-index-llms-vllm/llama_index/llms/vllm/base.py @@ -214,13 +214,14 @@ class Vllm(LLM): def __del__(self) -> None: import torch - from vllm.model_executor.parallel_utils.parallel_state import ( - destroy_model_parallel, - ) - destroy_model_parallel() - del self._client if torch.cuda.is_available(): + from vllm.model_executor.parallel_utils.parallel_state import ( + destroy_model_parallel, + ) + + destroy_model_parallel() + del self._client torch.cuda.synchronize() def _get_all_kwargs(self, **kwargs: Any) -> Dict[str, Any]: