This metric evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Higher scores indicate better precision.
POST
/
legacy
/
ragas_context_precision
/
evaluate
import langwatch
df = langwatch.datasets.get_dataset("dataset-id").to_pandas()
experiment = langwatch.experiment.init("my-experiment")
for index, row in experiment.loop(df.iterrows()):
# your execution code here
experiment.evaluate(
"legacy/ragas_context_precision",
index=index,
data={
"input": row["input"],
"contexts": row["contexts"],
"expected_output": row["expected_output"],
},
settings={}
)