diff --git a/.changeset/kind-beans-sit.md b/.changeset/kind-beans-sit.md
new file mode 100644
index 0000000000000000000000000000000000000000..05c562ec7ebefcdcfc74ecefdbefbee614835861
--- /dev/null
+++ b/.changeset/kind-beans-sit.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add error handling for generating the next question
diff --git a/.changeset/ten-worms-pump.md b/.changeset/ten-worms-pump.md
new file mode 100644
index 0000000000000000000000000000000000000000..c2c6471147b05be0a0b7f66dbbaf47b56201e92d
--- /dev/null
+++ b/.changeset/ten-worms-pump.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Fix wrong api key variable in Azure OpenAI provider
diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts
index e3f041cedd1c14bd27a9cf932840f98ddd30a1ca..1ddd269c101eb0d2d4662050e66d71e9f188e985 100644
--- a/helpers/env-variables.ts
+++ b/helpers/env-variables.ts
@@ -311,7 +311,7 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
     ...(modelConfig.provider === "azure-openai"
       ? [
           {
-            name: "AZURE_OPENAI_KEY",
+            name: "AZURE_OPENAI_API_KEY",
             description: "The Azure OpenAI key to use.",
             value: modelConfig.apiKey,
           },
diff --git a/helpers/providers/azure.ts b/helpers/providers/azure.ts
index 74f901423568bf9151301e4127648d27de9fb64d..e450715f543d35622e111c32ae9829cc69978ec0 100644
--- a/helpers/providers/azure.ts
+++ b/helpers/providers/azure.ts
@@ -9,6 +9,7 @@ const ALL_AZURE_OPENAI_CHAT_MODELS: Record<string, { openAIModel: string }> = {
     openAIModel: "gpt-3.5-turbo-16k",
   },
   "gpt-4o": { openAIModel: "gpt-4o" },
+  "gpt-4o-mini": { openAIModel: "gpt-4o-mini" },
   "gpt-4": { openAIModel: "gpt-4" },
   "gpt-4-32k": { openAIModel: "gpt-4-32k" },
   "gpt-4-turbo": {
@@ -26,6 +27,9 @@ const ALL_AZURE_OPENAI_CHAT_MODELS: Record<string, { openAIModel: string }> = {
   "gpt-4o-2024-05-13": {
     openAIModel: "gpt-4o-2024-05-13",
   },
+  "gpt-4o-mini-2024-07-18": {
+    openAIModel: "gpt-4o-mini-2024-07-18",
+  },
 };
 
 const ALL_AZURE_OPENAI_EMBEDDING_MODELS: Record<
@@ -35,10 +39,6 @@ const ALL_AZURE_OPENAI_EMBEDDING_MODELS: Record<
     openAIModel: string;
   }
 > = {
-  "text-embedding-ada-002": {
-    dimensions: 1536,
-    openAIModel: "text-embedding-ada-002",
-  },
   "text-embedding-3-small": {
     dimensions: 1536,
     openAIModel: "text-embedding-3-small",
diff --git a/templates/components/llamaindex/typescript/streaming/suggestion.ts b/templates/components/llamaindex/typescript/streaming/suggestion.ts
index 835671113e1ab2ddafaa95b6efaba692f16f9f62..0dacaeadd828bd14fada2d5ff82e8ac6603f1458 100644
--- a/templates/components/llamaindex/typescript/streaming/suggestion.ts
+++ b/templates/components/llamaindex/typescript/streaming/suggestion.ts
@@ -33,8 +33,8 @@ export async function generateNextQuestions(
     const questions = extractQuestions(response.text);
     return questions;
   } catch (error) {
-    console.error("Error: ", error);
-    throw error;
+    console.error("Error when generating the next questions: ", error);
+    return [];
   }
 }
 
diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py
index b723bf3e04ed98891a0bd28d25267a88aad5f165..bb8287059c948b00b5b3af5e812794872795d9b9 100644
--- a/templates/components/settings/python/settings.py
+++ b/templates/components/settings/python/settings.py
@@ -82,7 +82,7 @@ def init_azure_openai():
     dimensions = os.getenv("EMBEDDING_DIM")
 
     azure_config = {
-        "api_key": os.environ["AZURE_OPENAI_KEY"],
+        "api_key": os.environ["AZURE_OPENAI_API_KEY"],
         "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
         "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
         or os.getenv("OPENAI_API_VERSION"),
diff --git a/templates/types/streaming/fastapi/app/api/services/suggestion.py b/templates/types/streaming/fastapi/app/api/services/suggestion.py
index 406b0aec02796be37f776e15a9b4d78f008a48c1..ea563b17e8f901352932d7306b154ab4d0bf8521 100644
--- a/templates/types/streaming/fastapi/app/api/services/suggestion.py
+++ b/templates/types/streaming/fastapi/app/api/services/suggestion.py
@@ -1,3 +1,4 @@
+import logging
 from typing import List
 
 from app.api.routers.models import Message
@@ -9,11 +10,14 @@ NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
     "You're a helpful assistant! Your task is to suggest the next question that user might ask. "
     "\nHere is the conversation history"
     "\n---------------------\n{conversation}\n---------------------"
-    "Given the conversation history, please give me $number_of_questions questions that you might ask next!"
+    "Given the conversation history, please give me {number_of_questions} questions that you might ask next!"
 )
 N_QUESTION_TO_GENERATE = 3
 
 
+logger = logging.getLogger("uvicorn")
+
+
 class NextQuestions(BaseModel):
     """A list of questions that user might ask next"""
 
@@ -21,28 +25,37 @@ class NextQuestions(BaseModel):
 
 
 class NextQuestionSuggestion:
+
     @staticmethod
     async def suggest_next_questions(
         messages: List[Message],
         number_of_questions: int = N_QUESTION_TO_GENERATE,
     ) -> List[str]:
-        # Reduce the cost by only using the last two messages
-        last_user_message = None
-        last_assistant_message = None
-        for message in reversed(messages):
-            if message.role == "user":
-                last_user_message = f"User: {message.content}"
-            elif message.role == "assistant":
-                last_assistant_message = f"Assistant: {message.content}"
-            if last_user_message and last_assistant_message:
-                break
-        conversation: str = f"{last_user_message}\n{last_assistant_message}"
-
-        output: NextQuestions = await Settings.llm.astructured_predict(
-            NextQuestions,
-            prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
-            conversation=conversation,
-            nun_questions=number_of_questions,
-        )
-
-        return output.questions
+        """
+        Suggest the next questions that user might ask based on the conversation history
+        Return as empty list if there is an error
+        """
+        try:
+            # Reduce the cost by only using the last two messages
+            last_user_message = None
+            last_assistant_message = None
+            for message in reversed(messages):
+                if message.role == "user":
+                    last_user_message = f"User: {message.content}"
+                elif message.role == "assistant":
+                    last_assistant_message = f"Assistant: {message.content}"
+                if last_user_message and last_assistant_message:
+                    break
+            conversation: str = f"{last_user_message}\n{last_assistant_message}"
+
+            output: NextQuestions = await Settings.llm.astructured_predict(
+                NextQuestions,
+                prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
+                conversation=conversation,
+                number_of_questions=number_of_questions,
+            )
+
+            return output.questions
+        except Exception as e:
+            logger.error(f"Error when generating next question: {e}")
+            return []