from playgent import Playgentclient = Playgent(api_key="your-api-key")agent = client.agents.create( name="Customer Support Agent", provider="openai", system_prompt="You are a helpful customer support agent...", default_scorers=["relevance", "faithfulness"])
test_case = client.test_cases.create( name="Refund Request", agent_id=agent.id, turns=[{ "input": {"text": "I want a refund for order #1234"}, "expected_behavior": "Agent should ask for order details", "scorers": ["relevance", "completeness"] }])
run = client.runs.create(test_case_id=test_case.id)# Get resultsresult = client.runs.get(run.id)print(f"Pass rate: {result.summary.turns_passed}/{len(result.turns)}")
Run comprehensive evaluations using built-in RAG, safety, agentic, and multi-turn metrics:
# Evaluate a single responseevaluation = client.evaluate( input="What is your refund policy?", output="Returns accepted within 30 days for full refund.", context=["Policy: 30 day returns for full refund"], scorers=[ "answer_relevancy", # RAG metric "faithfulness", # RAG metric "bias", # Safety metric "toxicity" # Safety metric ])print(f"Overall pass: {evaluation.overall_pass}")for scorer, result in evaluation.results.items(): print(f"{scorer}: {result.score:.2f}")
Playgent provides 27 built-in metrics including RAG (RAGAS), safety
checks, agentic workflows, and multi-turn conversations. See all
metrics →