From c014ae7cb89f45f2dc90a7877eaf8c1f7bb59a65 Mon Sep 17 00:00:00 2001 From: Hamid Shojanazeri <hamid.nazeri2010@gmail.com> Date: Sun, 27 Aug 2023 20:03:02 +0000 Subject: [PATCH] setting BT option to true --- inference/code-llama/code_completion_example.py | 2 +- inference/code-llama/code_infilling_example.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inference/code-llama/code_completion_example.py b/inference/code-llama/code_completion_example.py index 348dadee..d8ba7b23 100644 --- a/inference/code-llama/code_completion_example.py +++ b/inference/code-llama/code_completion_example.py @@ -34,7 +34,7 @@ def main( enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 max_padding_length: int=None, # the max padding length to be used with tokenizer padding the prompts. - use_fast_kernels: bool = False, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels + use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels **kwargs ): if prompt_file is not None: diff --git a/inference/code-llama/code_infilling_example.py b/inference/code-llama/code_infilling_example.py index 601ea7c3..9c1a6585 100644 --- a/inference/code-llama/code_infilling_example.py +++ b/inference/code-llama/code_infilling_example.py @@ -34,7 +34,7 @@ def main( enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5 max_padding_length: int=None, # the max padding length to be used with tokenizer padding the prompts. - use_fast_kernels: bool = False, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels + use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels **kwargs ): if prompt_file is not None: -- GitLab