diff --git a/llama-index-core/llama_index/core/evaluation/batch_runner.py b/llama-index-core/llama_index/core/evaluation/batch_runner.py
index e7ce75b5c45c687785b66b427a00f0f79961092d..53a7d72a881f7e404b18f77749866159869fec57 100644
--- a/llama-index-core/llama_index/core/evaluation/batch_runner.py
+++ b/llama-index-core/llama_index/core/evaluation/batch_runner.py
@@ -1,4 +1,5 @@
 import asyncio
+from tenacity import retry, stop_after_attempt, wait_exponential
 from typing import Any, Dict, List, Optional, Sequence, Tuple, cast
 
 from llama_index.core.async_utils import asyncio_module
@@ -7,6 +8,11 @@ from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
 from llama_index.core.evaluation.base import BaseEvaluator, EvaluationResult
 
 
+@retry(
+    reraise=True,
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+)
 async def eval_response_worker(
     semaphore: asyncio.Semaphore,
     evaluator: BaseEvaluator,
@@ -26,6 +32,11 @@ async def eval_response_worker(
         )
 
 
+@retry(
+    reraise=True,
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+)
 async def eval_worker(
     semaphore: asyncio.Semaphore,
     evaluator: BaseEvaluator,
@@ -46,6 +57,11 @@ async def eval_worker(
         )
 
 
+@retry(
+    reraise=True,
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=4, max=10),
+)
 async def response_worker(
     semaphore: asyncio.Semaphore,
     query_engine: BaseQueryEngine,