Setup

Install the required packages:

pip install orgo anthropic

Set up your API keys:

# Export as environment variables
export ORGO_API_KEY=your_orgo_api_key
export ANTHROPIC_API_KEY=your_anthropic_api_key

# Or in Python
import os
os.environ["ORGO_API_KEY"] = "your_orgo_api_key"
os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_api_key"

Simple Integration

The simplest way to use Orgo with Claude is through the built-in prompt() method:

from orgo import Computer

# Initialize a computer
computer = Computer()

# Let Claude control the computer with natural language
computer.prompt("Open Firefox and search for pictures of cats")

This approach handles all the complexity of the agent loop automatically, making it easy to get started.

Customizing the Prompt Method

You can customize the prompt experience with various parameters:

# Create a progress callback
def progress_callback(event_type, event_data):
    if event_type == "text":
        print(f"Claude: {event_data}")
    elif event_type == "tool_use":
        print(f"Action: {event_data['action']}")
    elif event_type == "thinking":
        print(f"Thinking: {event_data}")

# Use Claude with custom parameters
messages = computer.prompt(
    instruction="Find and download the latest Claude paper from Anthropic's website",
    model="claude-3-7-sonnet-20250219",  # The model to use
    display_width=1280,  # Set screen resolution
    display_height=800,
    callback=progress_callback,  # Track progress
    thinking_enabled=True,  # Enable Claude's "thinking" capability (Claude 3.7)
    max_iterations=15,  # Limit the number of agent loops
    max_tokens=4096,  # Maximum tokens for Claude responses
    api_key="your_anthropic_api_key"  # Override environment variable
)

Advanced Integration

For more control, you can implement your own agent loop using the Anthropic API directly:

import anthropic
from orgo import Computer

def create_agent_loop(instruction, model="claude-3-7-sonnet-20250219"):
    # Initialize components
    computer = Computer() 
    client = anthropic.Anthropic()
    
    try:
        # Initialize conversation
        messages = [{"role": "user", "content": instruction}]
        
        # Define tools
        tools = [
            {
                "type": "computer_20250124",  # For Claude 3.7 Sonnet
                "name": "computer",
                "display_width_px": 1024,
                "display_height_px": 768,
                "display_number": 1
            }
        ]
        
        # Start the conversation with Claude
        response = client.beta.messages.create(
            model=model,
            messages=messages,
            tools=tools,
            betas=["computer-use-2025-01-24"],
            max_tokens=4096
        )
        
        # Add Claude's response to conversation history
        messages.append({"role": "assistant", "content": response.content})
        
        # Continue the loop until Claude stops requesting tools
        iteration = 0
        max_iterations = 20
        
        while iteration < max_iterations:
            iteration += 1
            
            # Process all tool requests from Claude
            tool_results = []
            
            for block in response.content:
                if block.type == "tool_use":
                    # Execute the requested tool action
                    result = execute_tool_action(computer, block)
                    
                    # Format the result for Claude
                    tool_results.append({
                        "type": "tool_result",
                        "tool_use_id": block.id,
                        "content": [result]
                    })
            
            # If no tools were requested, Claude is done
            if not tool_results:
                break
                
            # Send the tool results back to Claude
            messages.append({"role": "user", "content": tool_results})
            
            # Get Claude's next response
            response = client.beta.messages.create(
                model=model,
                messages=messages,
                tools=tools,
                betas=["computer-use-2025-01-24"],
                max_tokens=4096
            )
            
            # Add Claude's response to conversation history
            messages.append({"role": "assistant", "content": response.content})
        
        return messages
        
    finally:
        # Always clean up
        computer.shutdown()

def execute_tool_action(computer, tool_block):
    """Execute a tool action based on Claude's request."""
    action = tool_block.input.get("action")
    
    try:
        if action == "screenshot":
            # Capture a screenshot and return as base64
            image_data = computer.screenshot_base64()
            return {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/jpeg",
                    "data": image_data
                }
            }
            
        elif action == "left_click":
            x, y = tool_block.input["coordinate"]
            computer.left_click(x, y)
            return {"type": "text", "text": f"Clicked at ({x}, {y})"}
            
        elif action == "right_click":
            x, y = tool_block.input["coordinate"]
            computer.right_click(x, y)
            return {"type": "text", "text": f"Right-clicked at ({x}, {y})"}
            
        elif action == "double_click":
            x, y = tool_block.input["coordinate"]
            computer.double_click(x, y)
            return {"type": "text", "text": f"Double-clicked at ({x}, {y})"}
            
        elif action == "type":
            text = tool_block.input["text"]
            computer.type(text)
            return {"type": "text", "text": f"Typed: {text}"}
            
        elif action == "key":
            key = tool_block.input["text"]
            computer.key(key)
            return {"type": "text", "text": f"Pressed: {key}"}
            
        elif action == "scroll":
            direction = tool_block.input.get("scroll_direction", "down")
            amount = tool_block.input.get("scroll_amount", 1)
            computer.scroll(direction, amount)
            return {"type": "text", "text": f"Scrolled {direction} by {amount}"}
            
        elif action == "wait":
            duration = tool_block.input.get("duration", 1)
            computer.wait(duration)
            return {"type": "text", "text": f"Waited for {duration} seconds"}
            
        else:
            return {"type": "text", "text": f"Unsupported action: {action}"}
            
    except Exception as e:
        return {"type": "text", "text": f"Error executing {action}: {str(e)}"}

Using Claude’s Thinking Capability

Claude 3.7 Sonnet can provide its reasoning process through the thinking parameter:

import anthropic
from orgo import Computer

# Initialize components
computer = Computer()
client = anthropic.Anthropic()

# Start a conversation with thinking enabled
response = client.beta.messages.create(
    model="claude-3-7-sonnet-20250219",
    messages=[{"role": "user", "content": "Find an image of a cat on the web"}],
    tools=[{
        "type": "computer_20250124",
        "name": "computer",
        "display_width_px": 1024,
        "display_height_px": 768,
        "display_number": 1
    }],
    betas=["computer-use-2025-01-24"],
    thinking={"type": "enabled", "budget_tokens": 1024}  # Enable thinking
)

# Access the thinking content
for block in response.content:
    if block.type == "thinking":
        print("Claude's reasoning:")
        print(block.thinking)

Tool Compatibility

Orgo provides a complete set of methods corresponding to Claude’s computer use tools:

Claude Tool ActionOrgo MethodDescription
screenshotcomputer.screenshot()Capture the screen (returns PIL Image)
screenshotcomputer.screenshot_base64()Capture the screen (returns base64 string)
left_clickcomputer.left_click(x, y)Left click at coordinates
right_clickcomputer.right_click(x, y)Right click at coordinates
double_clickcomputer.double_click(x, y)Double click at coordinates
typecomputer.type(text)Type text
keycomputer.key(key_sequence)Press keys (e.g., “Enter”, “ctrl+c”)
scrollcomputer.scroll(direction, amount)Scroll in specified direction
waitcomputer.wait(seconds)Wait for specified seconds

Claude 3.7 vs 3.5 Sonnet

When using different Claude models, make sure to use the appropriate tool type:

  • For Claude 3.7 Sonnet: "type": "computer_20250124"
  • For Claude 3.5 Sonnet: "type": "computer_20241022"

And use the corresponding beta flag:

  • For Claude 3.7 Sonnet: betas=["computer-use-2025-01-24"]
  • For Claude 3.5 Sonnet: betas=["computer-use-2024-10-22"]