From a89a4c793a2c2cb916b53cecb695fd08df41c827 Mon Sep 17 00:00:00 2001
From: Andrei Fajardo <92402603+nerdai@users.noreply.github.com>
Date: Sat, 2 Dec 2023 15:23:32 -0500
Subject: [PATCH] make reference_context optional (#9266)

* make reference_context optional

* lint

* make entry to chlog
---
 CHANGELOG.md                      |  2 ++
 llama_index/llama_dataset/base.py | 20 +++++++++++++++++---
 llama_index/llama_dataset/rag.py  | 18 ++++++++++--------
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b0812b7a3ee0..af99da4f35f9f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 ### New Features
 
+- Make `reference_contexts` optional in `LabelledRagDataset` (#9266)
+- Re-organize `download` module (#9253)
 - Added document management to ingestion pipeline (#9135)
 - Add docs for `LabelledRagDataset` (#9228)
 
diff --git a/llama_index/llama_dataset/base.py b/llama_index/llama_dataset/base.py
index 353bb52f98bf8..5e25b4b7bf676 100644
--- a/llama_index/llama_dataset/base.py
+++ b/llama_index/llama_dataset/base.py
@@ -3,7 +3,7 @@
 import json
 from abc import abstractmethod
 from enum import Enum
-from typing import List, Optional, Type
+from typing import List, Optional, Type, Union
 
 import tqdm
 from pandas import DataFrame as PandasDataFrame
@@ -58,10 +58,17 @@ def class_name(self) -> str:
 
 class BaseLlamaPredictionDataset(BaseModel):
     _prediction_type: Type[BaseLlamaExamplePrediction] = BaseLlamaExamplePrediction  # type: ignore[misc]
-    predictions: Optional[List[BaseLlamaExamplePrediction]] = Field(
-        default=None, description="Predictions on train_examples."
+    predictions: List[BaseLlamaExamplePrediction] = Field(
+        default=list, description="Predictions on train_examples."
     )
 
+    def __getitem__(self, val: Union[slice, int]) -> List[BaseLlamaExamplePrediction]:
+        """Enable slicing and indexing.
+
+        Returns the desired slice on `predictions`.
+        """
+        return self.predictions[val]
+
     @abstractmethod
     def to_pandas(self) -> PandasDataFrame:
         """Create pandas dataframe."""
@@ -99,6 +106,13 @@ class BaseLlamaDataset(BaseModel):
         default=[], description="Data examples of this dataset."
     )
 
+    def __getitem__(self, val: Union[slice, int]) -> List[BaseLlamaDataExample]:
+        """Enable slicing and indexing.
+
+        Returns the desired slice on `examples`.
+        """
+        return self.examples[val]
+
     @abstractmethod
     def to_pandas(self) -> PandasDataFrame:
         """Create pandas dataframe."""
diff --git a/llama_index/llama_dataset/rag.py b/llama_index/llama_dataset/rag.py
index 14dd504b4413d..1d96a3d56b4a2 100644
--- a/llama_index/llama_dataset/rag.py
+++ b/llama_index/llama_dataset/rag.py
@@ -19,16 +19,17 @@ class RagExamplePrediction(BaseLlamaExamplePrediction):
     """RAG example prediction class.
 
     Args:
-        response: str
-        contexts: List[str]
+        response (str): The response generated by the LLM.
+        contexts (Optional[List[str]]): The retrieved context (text) for generating
+                                        response.
     """
 
     response: str = Field(
         default_factory=str,
         description="The generated (predicted) response that can be compared to a reference (ground-truth) answer.",
     )
-    contexts: List[str] = Field(
-        default_factory=List,
+    contexts: Optional[List[str]] = Field(
+        default_factory=None,
         description="The contexts in raw text form used to generate the response.",
     )
 
@@ -45,10 +46,11 @@ class LabelledRagDataExample(BaseLlamaDataExample):
 
     Args:
         query (str): The user query
-        kind (LlamaRagDataExampleKind): The example is generated by human or ai
-        reference_contexts (List[str] or List[TextNode]): The contexts used for response
+        query_by (CreatedBy): Query generated by human or ai (model-name)
+        reference_contexts (Optional[List[str]]): The contexts used for response
         reference_answer ([str]): Reference answer to the query. An answer
                                     that would receive full marks upon evaluation.
+        reference_answer_by: The reference answer generated by human or ai (model-name).
     """
 
     query: str = Field(
@@ -57,8 +59,8 @@ class LabelledRagDataExample(BaseLlamaDataExample):
     query_by: Optional[CreatedBy] = Field(
         default=None, description="What generated the query."
     )
-    reference_contexts: List[str] = Field(
-        default_factory=List,
+    reference_contexts: Optional[List[str]] = Field(
+        default_factory=None,
         description="The contexts used to generate the reference answer.",
     )
     reference_answer: str = Field(