This guide shows how to compare AI API performance across different providers. Perfect for testing response times, reliability, and choosing the best AI provider for your needs.
Use Cases
- Compare AI API response times
- Test different AI providers under load
- Validate API reliability and uptime
- Measure cost-effectiveness of different providers
Simple Implementation
from locust import task, HttpUser
import json
import random
import time
class AIComparisonUser(HttpUser):
def on_start(self):
# API configurations
self.openai_key = "your-openai-api-key"
self.claude_key = "your-claude-api-key"
self.gemini_key = "your-gemini-api-key"
# Test prompts for comparison
self.test_prompts = [
"Write a short product description for a wireless headphone.",
"Explain quantum computing in simple terms.",
"Create a brief email response thanking a customer.",
"Summarize the benefits of renewable energy.",
"Write a creative story opening in 2 sentences."
]
@task(2)
def test_openai_gpt35(self):
"""Test OpenAI GPT-3.5 Turbo"""
prompt = random.choice(self.test_prompts)
headers = {
"Authorization": f"Bearer {self.openai_key}",
"Content-Type": "application/json"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 150,
"temperature": 0.7
}
start_time = time.time()
with self.client.post(
"https://api.openai.com/v1/chat/completions",
json=payload,
headers=headers,
name="OpenAI GPT-3.5"
) as response:
response_time = (time.time() - start_time) * 1000
if response.status_code == 200:
data = response.json()
if "choices" in data and data["choices"]:
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("total_tokens", 0)
print(f"OpenAI: {len(content)} chars, {tokens} tokens, {response_time:.0f}ms")
else:
response.failure("No choices in OpenAI response")
elif response.status_code == 429:
response.failure("OpenAI rate limit exceeded")
else:
response.failure(f"OpenAI error: {response.status_code}")
@task(2)
def test_claude(self):
"""Test Anthropic Claude"""
prompt = random.choice(self.test_prompts)
headers = {
"x-api-key": self.claude_key,
"Content-Type": "application/json",
"anthropic-version": "2023-06-01"
}
payload = {
"model": "claude-3-haiku-20240307",
"max_tokens": 150,
"messages": [{"role": "user", "content": prompt}]
}
start_time = time.time()
with self.client.post(
"https://api.anthropic.com/v1/messages",
json=payload,
headers=headers,
name="Claude Haiku"
) as response:
response_time = (time.time() - start_time) * 1000
if response.status_code == 200:
data = response.json()
if "content" in data and data["content"]:
content = data["content"][0]["text"]
tokens = data.get("usage", {}).get("input_tokens", 0) + data.get("usage", {}).get("output_tokens", 0)
print(f"Claude: {len(content)} chars, {tokens} tokens, {response_time:.0f}ms")
else:
response.failure("No content in Claude response")
elif response.status_code == 429:
response.failure("Claude rate limit exceeded")
else:
response.failure(f"Claude error: {response.status_code}")
@task(2)
def test_gemini(self):
"""Test Google Gemini"""
prompt = random.choice(self.test_prompts)
params = {"key": self.gemini_key}
payload = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {
"maxOutputTokens": 150,
"temperature": 0.7
}
}
start_time = time.time()
with self.client.post(
"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent",
json=payload,
params=params,
name="Gemini Pro"
) as response:
response_time = (time.time() - start_time) * 1000
if response.status_code == 200:
data = response.json()
if "candidates" in data and data["candidates"]:
content = data["candidates"][0]["content"]["parts"][0]["text"]
# Gemini doesn't return token count in basic API
print(f"Gemini: {len(content)} chars, {response_time:.0f}ms")
else:
response.failure("No candidates in Gemini response")
elif response.status_code == 429:
response.failure("Gemini rate limit exceeded")
else:
response.failure(f"Gemini error: {response.status_code}")
@task(1)
def test_openai_gpt4(self):
"""Test OpenAI GPT-4 (if available)"""
prompt = random.choice(self.test_prompts)
headers = {
"Authorization": f"Bearer {self.openai_key}",
"Content-Type": "application/json"
}
payload = {
"model": "gpt-4",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 150,
"temperature": 0.7
}
start_time = time.time()
with self.client.post(
"https://api.openai.com/v1/chat/completions",
json=payload,
headers=headers,
name="OpenAI GPT-4"
) as response:
response_time = (time.time() - start_time) * 1000
if response.status_code == 200:
data = response.json()
if "choices" in data and data["choices"]:
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("total_tokens", 0)
print(f"GPT-4: {len(content)} chars, {tokens} tokens, {response_time:.0f}ms")
elif response.status_code == 429:
response.failure("GPT-4 rate limit exceeded")
elif response.status_code == 404:
response.failure("GPT-4 not available (need API access)")
else:
response.failure(f"GPT-4 error: {response.status_code}")
Setup Instructions
-
Get API keys from each provider:
-
Replace the API key placeholders with your actual keys
-
Start with low user counts to avoid hitting rate limits
What This Tests
- Response Times: Compare how fast each AI responds
- Rate Limits: Test how each provider handles concurrent requests
- Reliability: Check which APIs are most stable under load
- Output Quality: Compare response length and coherence
Performance Comparison
Typical results you might see:
- GPT-3.5: Fast responses (~1-3 seconds), good for high volume
- Claude: Moderate speed (~2-4 seconds), excellent quality
- Gemini: Variable speed (~1-5 seconds), good for creative tasks
- GPT-4: Slower (~3-8 seconds), highest quality but expensive
Rate Limits & Costs
- OpenAI: Tier-based limits, pay per token
- Claude: Message-based limits, pay per token
- Gemini: Requests per minute limits, generous free tier
Common Issues
- API Keys: Ensure all keys are valid and have sufficient credits
- Rate Limits: Each provider has different limits
- Model Availability: GPT-4 requires special access