Documentation Index
Fetch the complete documentation index at: https://docs.keywordsai.co/llms.txt
Use this file to discover all available pages before exploring further.
Overview
The Datasets API allows you to create, manage, and organize collections of logs for analysis, evaluation, and machine learning workflows. Datasets serve as containers for grouping related conversations and interactions.
Key Features
- Create and manage datasets for organizing logs
- Add and remove logs from datasets
- Run evaluations on dataset contents
- Generate evaluation reports and analytics
- List dataset contents with filtering
- Update dataset metadata and descriptions
Quick Start
from keywordsai import KeywordsAI
client = KeywordsAI(api_key="your-api-key")
# Create a dataset
dataset = client.datasets.create(
name="Customer Support Conversations",
description="Collection of customer support interactions for analysis"
)
# Add logs to the dataset
client.datasets.add_logs_to_dataset(
dataset_id=dataset.id,
log_ids=["log_123", "log_456", "log_789"]
)
print(f"Created dataset {dataset.id} with logs")
Available Methods
Core Dataset Operations
| Method | Description |
|---|
create() | Create a new dataset |
list() | List all datasets |
get() | Retrieve a specific dataset |
update() | Update dataset information |
delete() | Delete a dataset |
Log Management
| Method | Description |
|---|
add_logs_to_dataset() | Add logs to a dataset |
remove_logs_from_dataset() | Remove logs from a dataset |
list_dataset_logs() | List logs in a dataset |
Evaluation Operations
| Method | Description |
|---|
run_dataset_evaluation() | Run evaluation on dataset |
get_evaluation_report() | Get evaluation results |
list_evaluation_reports() | List all evaluation reports |
Asynchronous Methods
All methods have asynchronous counterparts with the a prefix:
acreate(), alist(), aget(), aupdate(), adelete()
aadd_logs_to_dataset(), aremove_logs_from_dataset(), alist_dataset_logs()
arun_dataset_evaluation(), aget_evaluation_report(), alist_evaluation_reports()
Dataset Structure
A dataset contains the following information:
{
"id": "dataset_123456",
"name": "Customer Support Dataset",
"description": "Collection of customer support conversations",
"log_count": 150,
"created_at": "2024-01-15T10:30:00Z",
"updated_at": "2024-01-16T14:20:00Z",
"metadata": {
"category": "support",
"version": "1.0",
"tags": ["customer-service", "qa"]
}
}
Common Workflows
1. Dataset Creation and Population
# Create a dataset
dataset = client.datasets.create(
name="Product Q&A Dataset",
description="Questions and answers about our products",
metadata={
"category": "product_support",
"version": "1.0"
}
)
# Get relevant logs
logs = client.logs.list(
metadata_filter={"category": "product_questions"},
limit=100
)
# Add logs to dataset
log_ids = [log.id for log in logs]
client.datasets.add_logs_to_dataset(
dataset_id=dataset.id,
log_ids=log_ids
)
print(f"Added {len(log_ids)} logs to dataset {dataset.name}")
2. Dataset Evaluation
# Run evaluation on the dataset
evaluation = client.datasets.run_dataset_evaluation(
dataset_id=dataset.id,
evaluator_ids=["evaluator_123", "evaluator_456"]
)
print(f"Started evaluation {evaluation.id}")
# Get evaluation results
report = client.datasets.get_evaluation_report(
dataset_id=dataset.id,
evaluation_id=evaluation.id
)
print(f"Evaluation score: {report.overall_score}")
3. Dataset Analysis
# Get dataset logs for analysis
dataset_logs = client.datasets.list_dataset_logs(
dataset_id=dataset.id,
limit=500
)
# Analyze the dataset
total_tokens = sum(log.total_tokens or 0 for log in dataset_logs)
average_cost = sum(log.cost or 0 for log in dataset_logs) / len(dataset_logs)
print(f"Dataset Analysis:")
print(f" Total logs: {len(dataset_logs)}")
print(f" Total tokens: {total_tokens:,}")
print(f" Average cost per log: ${average_cost:.4f}")
Advanced Use Cases
Batch Dataset Operations
import asyncio
async def create_multiple_datasets(dataset_configs):
tasks = []
for config in dataset_configs:
task = client.datasets.acreate(**config)
tasks.append(task)
datasets = await asyncio.gather(*tasks)
return datasets
# Create multiple datasets
configs = [
{"name": "Training Set", "description": "Training data"},
{"name": "Validation Set", "description": "Validation data"},
{"name": "Test Set", "description": "Test data"}
]
datasets = asyncio.run(create_multiple_datasets(configs))
print(f"Created {len(datasets)} datasets")
Dataset Versioning
def create_dataset_version(base_dataset_id, version_name):
# Get the base dataset
base_dataset = client.datasets.get(dataset_id=base_dataset_id)
# Create new version
new_dataset = client.datasets.create(
name=f"{base_dataset.name} - {version_name}",
description=f"Version {version_name} of {base_dataset.name}",
metadata={
**base_dataset.metadata,
"version": version_name,
"parent_dataset_id": base_dataset_id
}
)
# Copy logs from base dataset
base_logs = client.datasets.list_dataset_logs(
dataset_id=base_dataset_id,
limit=1000
)
if base_logs:
log_ids = [log.id for log in base_logs]
client.datasets.add_logs_to_dataset(
dataset_id=new_dataset.id,
log_ids=log_ids
)
return new_dataset
# Create a new version
v2_dataset = create_dataset_version("dataset_123", "v2.0")
Dataset Quality Monitoring
def monitor_dataset_quality(dataset_id):
# Get dataset logs
logs = client.datasets.list_dataset_logs(
dataset_id=dataset_id,
limit=1000
)
quality_metrics = {
"total_logs": len(logs),
"empty_responses": 0,
"high_latency_logs": 0,
"high_cost_logs": 0,
"average_tokens": 0,
"average_cost": 0
}
total_tokens = 0
total_cost = 0
for log in logs:
# Check for empty responses
for msg in log.messages:
if msg["role"] == "assistant" and not msg["content"].strip():
quality_metrics["empty_responses"] += 1
break
# Check latency
if log.latency and log.latency > 5.0:
quality_metrics["high_latency_logs"] += 1
# Check cost
if log.cost and log.cost > 0.01:
quality_metrics["high_cost_logs"] += 1
# Accumulate metrics
total_tokens += log.total_tokens or 0
total_cost += log.cost or 0
if logs:
quality_metrics["average_tokens"] = total_tokens / len(logs)
quality_metrics["average_cost"] = total_cost / len(logs)
# Calculate quality score
quality_score = 100
quality_score -= (quality_metrics["empty_responses"] / len(logs)) * 50
quality_score -= (quality_metrics["high_latency_logs"] / len(logs)) * 20
quality_score -= (quality_metrics["high_cost_logs"] / len(logs)) * 10
quality_metrics["quality_score"] = max(0, quality_score)
return quality_metrics
# Monitor dataset quality
quality = monitor_dataset_quality("dataset_123")
print(f"Dataset Quality Score: {quality['quality_score']:.1f}/100")
Best Practices
1. Organize Datasets by Purpose
# Good - clear purpose and naming
training_dataset = client.datasets.create(
name="GPT-4 Training Data - Customer Support",
description="High-quality customer support conversations for training",
metadata={
"purpose": "training",
"domain": "customer_support",
"quality_threshold": "high"
}
)
# Rich metadata for better organization
dataset = client.datasets.create(
name="Product Documentation Q&A",
description="Questions and answers about product documentation",
metadata={
"category": "documentation",
"language": "english",
"product_version": "2.1",
"created_by": "data_team",
"tags": ["documentation", "qa", "product"]
}
)
3. Implement Dataset Validation
def validate_dataset_logs(dataset_id):
logs = client.datasets.list_dataset_logs(dataset_id=dataset_id)
validation_results = {
"valid_logs": 0,
"invalid_logs": 0,
"issues": []
}
for log in logs:
is_valid = True
# Check message structure
if not log.messages or len(log.messages) < 2:
validation_results["issues"].append(f"Log {log.id}: Insufficient messages")
is_valid = False
# Check for required roles
roles = [msg["role"] for msg in log.messages]
if "user" not in roles or "assistant" not in roles:
validation_results["issues"].append(f"Log {log.id}: Missing required roles")
is_valid = False
if is_valid:
validation_results["valid_logs"] += 1
else:
validation_results["invalid_logs"] += 1
return validation_results
# Validate dataset
validation = validate_dataset_logs("dataset_123")
print(f"Validation: {validation['valid_logs']} valid, {validation['invalid_logs']} invalid")
4. Regular Dataset Maintenance
def cleanup_dataset(dataset_id):
# Remove logs with issues
logs = client.datasets.list_dataset_logs(dataset_id=dataset_id)
logs_to_remove = []
for log in logs:
# Remove logs with empty responses
has_empty_response = any(
msg["role"] == "assistant" and not msg["content"].strip()
for msg in log.messages
)
if has_empty_response:
logs_to_remove.append(log.id)
if logs_to_remove:
client.datasets.remove_logs_from_dataset(
dataset_id=dataset_id,
log_ids=logs_to_remove
)
print(f"Removed {len(logs_to_remove)} problematic logs")
return len(logs_to_remove)
# Clean up dataset
removed_count = cleanup_dataset("dataset_123")
Error Handling
from keywordsai.exceptions import (
NotFoundError,
ValidationError,
RateLimitError
)
def safe_dataset_operation(operation, **kwargs):
try:
return operation(**kwargs)
except NotFoundError as e:
print(f"Resource not found: {e}")
return None
except ValidationError as e:
print(f"Validation error: {e}")
return None
except RateLimitError:
print("Rate limit exceeded. Please retry later.")
return None
except Exception as e:
print(f"Unexpected error: {e}")
return None
# Use safe operations
dataset = safe_dataset_operation(
client.datasets.create,
name="Test Dataset",
description="Test description"
)
Next Steps