Skip to main content

Architecture

  1. Search: Query web for relevant content
  2. Extract: Get key information from results
  3. Generate: Create comprehensive answer with citations

Example

from cheapestinference import CheapestInference

client = CheapestInference()

def ai_search(query):
    # 1. Search web (use your preferred search API)
    search_results = search_web(query)
    
    # 2. Create context from results
    context = "\n\n".join([
        f"Source: {r['url']}\n{r['content']}"
        for r in search_results
    ])
    
    # 3. Generate answer
    response = client.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
        messages=[
            {
                "role": "system",
                "content": "Answer based on provided sources. Include citations."
            },
            {
                "role": "user",
                "content": f"Query: {query}\n\nSources:\n{context}"
            }
        ],
        stream=True
    )
    
    for chunk in response:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="")

ai_search("What is quantum computing?")