diff --git a/gpt_index/data_structs/node_v2.py b/gpt_index/data_structs/node_v2.py
index 3face7f5c9f87480b5750944f70f9162ba79d193..d5e78be1da3db9af60863bed7c6a58d8d5ef3cff 100644
--- a/gpt_index/data_structs/node_v2.py
+++ b/gpt_index/data_structs/node_v2.py
@@ -94,8 +94,9 @@ class Node(BaseDocument):
     def get_text(self) -> str:
         """Get text."""
         text = super().get_text()
+        extra_info_exists = self.extra_info is not None and len(self.extra_info) > 0
         result_text = (
-            text if self.extra_info_str is None else f"{self.extra_info_str}\n\n{text}"
+            text if not extra_info_exists else f"{self.extra_info_str}\n\n{text}"
         )
         return result_text
 
diff --git a/gpt_index/indices/vector_store/base.py b/gpt_index/indices/vector_store/base.py
index 360d776b127604eafdb5d054fcd3edb60348460f..46999a20049614b4d4d6cc0226d9070e17ee708e 100644
--- a/gpt_index/indices/vector_store/base.py
+++ b/gpt_index/indices/vector_store/base.py
@@ -17,7 +17,6 @@ from gpt_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
 from gpt_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT
 from gpt_index.prompts.prompts import QuestionAnswerPrompt
 from gpt_index.token_counter.token_counter import llm_token_counter
-from gpt_index.utils import get_new_id
 from gpt_index.vector_stores.simple import SimpleVectorStore
 from gpt_index.vector_stores.types import NodeEmbeddingResult, VectorStore
 
@@ -83,7 +82,7 @@ class GPTVectorStoreIndex(BaseGPTIndex[IndexDict]):
         id_to_embed_map: Dict[str, List[float]] = {}
 
         for n in nodes:
-            new_id = get_new_id(existing_node_ids.union(id_to_node_map.keys()))
+            new_id = n.get_doc_id()
             if n.embedding is None:
                 self._service_context.embed_model.queue_text_for_embeddding(
                     new_id, n.get_text()
@@ -127,7 +126,7 @@ class GPTVectorStoreIndex(BaseGPTIndex[IndexDict]):
 
         text_queue: List[Tuple[str, str]] = []
         for n in nodes:
-            new_id = get_new_id(existing_node_ids.union(id_to_node_map.keys()))
+            new_id = n.get_doc_id()
             if n.embedding is None:
                 text_queue.append((new_id, n.get_text()))
             else:
diff --git a/gpt_index/vector_stores/chatgpt_plugin.py b/gpt_index/vector_stores/chatgpt_plugin.py
index 383cae9ea5b88529fa1f0333a89ae93e7a03851e..6055087e70f11f1f40fcb9616db02e6ece7fa314 100644
--- a/gpt_index/vector_stores/chatgpt_plugin.py
+++ b/gpt_index/vector_stores/chatgpt_plugin.py
@@ -7,7 +7,7 @@ import requests
 from requests.adapters import HTTPAdapter, Retry
 from tqdm.auto import tqdm
 
-from gpt_index.data_structs.data_structs_v2 import Node
+from gpt_index.data_structs.node_v2 import Node, DocumentRelationship
 from gpt_index.vector_stores.types import (
     NodeEmbeddingResult,
     VectorStore,
@@ -26,7 +26,8 @@ def convert_docs_to_json(embedding_results: List[NodeEmbeddingResult]) -> List[D
             "id": embedding_result.id,
             "text": embedding_result.node.get_text(),
             # "source": embedding_result.node.source,
-            # "source_id": ...,
+            # NOTE: this is the doc_id to reference document
+            "source_id": embedding_result.doc_id,
             # "url": "...",
             # "created_at": ...,
             # "author": "..."",
@@ -142,9 +143,11 @@ class ChatGPTRetrievalPluginClient(VectorStore):
                 result_id = result["id"]
                 result_txt = result["text"]
                 result_score = result["score"]
+                result_ref_doc_id = result["source_id"]
                 node = Node(
                     doc_id=result_id,
                     text=result_txt,
+                    relationships={DocumentRelationship.SOURCE: result_ref_doc_id},
                 )
                 nodes.append(node)
                 similarities.append(result_score)
diff --git a/gpt_index/vector_stores/pinecone.py b/gpt_index/vector_stores/pinecone.py
index e3929b58dcee676644aed4398fc03f240ccfbdcb..d0d56e98f89579e0b6c6bc72de8a4a9487d7d143 100644
--- a/gpt_index/vector_stores/pinecone.py
+++ b/gpt_index/vector_stores/pinecone.py
@@ -6,7 +6,7 @@ An index that that is built on top of an existing vector store.
 
 from typing import Any, Dict, List, Optional, cast
 
-from gpt_index.data_structs.node_v2 import Node
+from gpt_index.data_structs.node_v2 import Node, DocumentRelationship
 from gpt_index.vector_stores.types import (
     NodeEmbeddingResult,
     VectorStore,
@@ -120,7 +120,9 @@ class PineconeVectorStore(VectorStore):
 
             metadata = {
                 "text": node.get_text(),
+                # NOTE: this is the reference to source doc
                 "doc_id": result.doc_id,
+                "id": new_id,
             }
             if node.extra_info:
                 # TODO: check if overlap with default metadata keys
@@ -197,9 +199,14 @@ class PineconeVectorStore(VectorStore):
             extra_info = get_node_info_from_metadata(match.metadata, "extra_info")
             node_info = get_node_info_from_metadata(match.metadata, "node_info")
             doc_id = match.metadata["doc_id"]
+            id = match.metadata["id"]
 
             node = Node(
-                text=text, extra_info=extra_info, node_info=node_info, doc_id=doc_id
+                text=text,
+                extra_info=extra_info,
+                node_info=node_info,
+                doc_id=id,
+                relationships={DocumentRelationship.SOURCE: doc_id},
             )
             top_k_ids.append(match.id)
             top_k_nodes.append(node)
diff --git a/gpt_index/vector_stores/qdrant.py b/gpt_index/vector_stores/qdrant.py
index 1ba6c366ae064650299b9736c9e6f4ddfb379cd7..0b2fd8efdc63a93e41c8eaddb3f60b279c6e702d 100644
--- a/gpt_index/vector_stores/qdrant.py
+++ b/gpt_index/vector_stores/qdrant.py
@@ -207,6 +207,7 @@ class QdrantVectorStore(VectorStore):
         for point in response:
             payload = cast(Payload, point.payload)
             node = Node(
+                doc_id=str(point.id),
                 text=payload.get("text"),
                 extra_info=payload.get("extra_info"),
                 relationships={
diff --git a/gpt_index/vector_stores/types.py b/gpt_index/vector_stores/types.py
index 5e1c84409a944f0c7931f6ea471d4c73bfb65be5..9b7bad5a258573aed0604259da112632e8b4899b 100644
--- a/gpt_index/vector_stores/types.py
+++ b/gpt_index/vector_stores/types.py
@@ -15,6 +15,7 @@ class NodeEmbeddingResult:
         id (str): Node id
         node (Node): Node
         embedding (List[float]): Embedding
+        doc_id (str): Document id
 
     """
 
diff --git a/tests/indices/vector_store/test_base.py b/tests/indices/vector_store/test_base.py
index a47f6808a9b5df7fe3d2a877914b7b4ad6927118..e2c729bcb1f898616bb1d555f5c74b0d45fa5170 100644
--- a/tests/indices/vector_store/test_base.py
+++ b/tests/indices/vector_store/test_base.py
@@ -7,6 +7,7 @@ from unittest.mock import MagicMock, patch
 import numpy as np
 import pytest
 
+from gpt_index.data_structs.node_v2 import Node
 from gpt_index.embeddings.openai import OpenAIEmbedding
 from gpt_index.indices.vector_store.vector_indices import (
     GPTFaissIndex,
@@ -16,6 +17,7 @@ from gpt_index.readers.schema.base import Document
 from gpt_index.vector_stores.simple import SimpleVectorStore
 from tests.mock_utils.mock_decorator import patch_common
 from tests.mock_utils.mock_prompts import MOCK_REFINE_PROMPT, MOCK_TEXT_QA_PROMPT
+from gpt_index.data_structs.node_v2 import DocumentRelationship
 
 
 @pytest.fixture
@@ -632,3 +634,100 @@ def test_simple_async(
         vector_store = cast(SimpleVectorStore, index._vector_store)
         embedding = vector_store.get(text_id)
         assert (node.text, embedding) in actual_node_tups
+
+
+@patch_common
+@patch.object(
+    OpenAIEmbedding, "_get_text_embedding", side_effect=mock_get_text_embedding
+)
+@patch.object(
+    OpenAIEmbedding, "_get_text_embeddings", side_effect=mock_get_text_embeddings
+)
+@patch.object(
+    OpenAIEmbedding, "get_query_embedding", side_effect=mock_get_query_embedding
+)
+def test_simple_check_ids(
+    _mock_query_embed: Any,
+    _mock_text_embeds: Any,
+    _mock_text_embed: Any,
+    _mock_init: Any,
+    _mock_predict: Any,
+    _mock_total_tokens_used: Any,
+    _mock_split_text_overlap: Any,
+    _mock_split_text: Any,
+    documents: List[Document],
+    struct_kwargs: Dict,
+) -> None:
+    """Test build GPTSimpleVectorIndex."""
+    index_kwargs, query_kwargs = struct_kwargs
+
+    ref_doc_id = "ref_doc_id_test"
+    source_rel = {DocumentRelationship.SOURCE: ref_doc_id}
+    nodes = [
+        Node("Hello world.", doc_id="node1", relationships=source_rel),
+        Node("This is a test.", doc_id="node2", relationships=source_rel),
+        Node("This is another test.", doc_id="node3", relationships=source_rel),
+        Node("This is a test v2.", doc_id="node4", relationships=source_rel),
+    ]
+    index = GPTSimpleVectorIndex(nodes, **index_kwargs)
+
+    # test query
+    query_str = "What is?"
+    response = index.query(query_str, **query_kwargs)
+    assert str(response) == ("What is?:This is another test.")
+    assert len(response.source_nodes) == 1
+    assert response.source_nodes[0].node.ref_doc_id == "ref_doc_id_test"
+    assert response.source_nodes[0].node.doc_id == "node3"
+    vector_store = cast(SimpleVectorStore, index._vector_store)
+    assert "node3" in vector_store._data.embedding_dict
+    assert "node3" in vector_store._data.text_id_to_doc_id
+
+
+@patch_common
+@patch.object(
+    OpenAIEmbedding, "_get_text_embedding", side_effect=mock_get_text_embedding
+)
+@patch.object(
+    OpenAIEmbedding, "_get_text_embeddings", side_effect=mock_get_text_embeddings
+)
+@patch.object(
+    OpenAIEmbedding, "get_query_embedding", side_effect=mock_get_query_embedding
+)
+def test_faiss_check_ids(
+    _mock_query_embed: Any,
+    _mock_texts_embed: Any,
+    _mock_text_embed: Any,
+    _mock_init: Any,
+    _mock_predict: Any,
+    _mock_total_tokens_used: Any,
+    _mock_split_text_overlap: Any,
+    _mock_split_text: Any,
+    documents: List[Document],
+    struct_kwargs: Dict,
+) -> None:
+    """Test embedding query."""
+    # NOTE: mock faiss import
+    sys.modules["faiss"] = MagicMock()
+    # NOTE: mock faiss index
+    faiss_index = MockFaissIndex()
+
+    index_kwargs, query_kwargs = struct_kwargs
+
+    ref_doc_id = "ref_doc_id_test"
+    source_rel = {DocumentRelationship.SOURCE: ref_doc_id}
+    nodes = [
+        Node("Hello world.", doc_id="node1", relationships=source_rel),
+        Node("This is a test.", doc_id="node2", relationships=source_rel),
+        Node("This is another test.", doc_id="node3", relationships=source_rel),
+        Node("This is a test v2.", doc_id="node4", relationships=source_rel),
+    ]
+
+    index = GPTFaissIndex(nodes, faiss_index=faiss_index, **index_kwargs)
+
+    # test query
+    query_str = "What is?"
+    response = index.query(query_str, **query_kwargs)
+    assert str(response) == ("What is?:This is another test.")
+    assert len(response.source_nodes) == 1
+    assert response.source_nodes[0].node.ref_doc_id == "ref_doc_id_test"
+    assert response.source_nodes[0].node.doc_id == "node3"