Architecture
- Search: Query web for relevant content
- Extract: Get key information from results
- Generate: Create comprehensive answer with citations
Example
Copy
from cheapestinference import CheapestInference
client = CheapestInference()
def ai_search(query):
# 1. Search web (use your preferred search API)
search_results = search_web(query)
# 2. Create context from results
context = "\n\n".join([
f"Source: {r['url']}\n{r['content']}"
for r in search_results
])
# 3. Generate answer
response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
messages=[
{
"role": "system",
"content": "Answer based on provided sources. Include citations."
},
{
"role": "user",
"content": f"Query: {query}\n\nSources:\n{context}"
}
],
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
ai_search("What is quantum computing?")