diff --git a/inference/code-llama/code_completion_example.py b/inference/code-llama/code_completion_example.py
index 348dadee228992f2f4c34b4ad157504ea423bbe2..d8ba7b2303f7f393e1499072d5e36e1d39ec5be4 100644
--- a/inference/code-llama/code_completion_example.py
+++ b/inference/code-llama/code_completion_example.py
@@ -34,7 +34,7 @@ def main(
     enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs
     enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5
     max_padding_length: int=None, # the max padding length to be used with tokenizer padding the prompts.
-    use_fast_kernels: bool = False, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
+    use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
     **kwargs
 ):
     if prompt_file is not None:
diff --git a/inference/code-llama/code_infilling_example.py b/inference/code-llama/code_infilling_example.py
index 601ea7c35df75bf87be9506c0bd98291e1c1cb63..9c1a6585c17a3ef57ac2cf4d5f1ee41a5ae425bf 100644
--- a/inference/code-llama/code_infilling_example.py
+++ b/inference/code-llama/code_infilling_example.py
@@ -34,7 +34,7 @@ def main(
     enable_sensitive_topics: bool=False, # Enable check for sensitive topics using AuditNLG APIs
     enable_salesforce_content_safety: bool=True, # Enable safety check with Salesforce safety flan t5
     max_padding_length: int=None, # the max padding length to be used with tokenizer padding the prompts.
-    use_fast_kernels: bool = False, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
+    use_fast_kernels: bool = True, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
     **kwargs
 ):
     if prompt_file is not None: