Setup
Install the required packages:
pip install orgo anthropic
Set up your API keys:
# Export as environment variables
export ORGO_API_KEY=your_orgo_api_key
export ANTHROPIC_API_KEY=your_anthropic_api_key
# Or in Python
import os
os.environ["ORGO_API_KEY"] = "your_orgo_api_key"
os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_api_key"
Simple Integration
The simplest way to use Orgo with Claude is through the built-in prompt()
method:
from orgo import Computer
# Initialize a computer
computer = Computer()
# Let Claude control the computer with natural language
computer.prompt("Open Firefox and search for pictures of cats")
This approach handles all the complexity of the agent loop automatically, making it easy to get started.
Customizing the Prompt Method
You can customize the prompt experience with various parameters:
# Create a progress callback
def progress_callback(event_type, event_data):
if event_type == "text":
print(f"Claude: {event_data}")
elif event_type == "tool_use":
print(f"Action: {event_data['action']}")
elif event_type == "thinking":
print(f"Thinking: {event_data}")
# Use Claude with custom parameters
messages = computer.prompt(
instruction="Find and download the latest Claude paper from Anthropic's website",
model="claude-3-7-sonnet-20250219", # The model to use
display_width=1280, # Set screen resolution
display_height=800,
callback=progress_callback, # Track progress
thinking_enabled=True, # Enable Claude's "thinking" capability (Claude 3.7)
max_iterations=15, # Limit the number of agent loops
max_tokens=4096, # Maximum tokens for Claude responses
api_key="your_anthropic_api_key" # Override environment variable
)
Advanced Integration
For more control, you can implement your own agent loop using the Anthropic API directly:
import anthropic
from orgo import Computer
def create_agent_loop(instruction, model="claude-3-7-sonnet-20250219"):
# Initialize components
computer = Computer()
client = anthropic.Anthropic()
try:
# Initialize conversation
messages = [{"role": "user", "content": instruction}]
# Define tools
tools = [
{
"type": "computer_20250124", # For Claude 3.7 Sonnet
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
"display_number": 1
}
]
# Start the conversation with Claude
response = client.beta.messages.create(
model=model,
messages=messages,
tools=tools,
betas=["computer-use-2025-01-24"],
max_tokens=4096
)
# Add Claude's response to conversation history
messages.append({"role": "assistant", "content": response.content})
# Continue the loop until Claude stops requesting tools
iteration = 0
max_iterations = 20
while iteration < max_iterations:
iteration += 1
# Process all tool requests from Claude
tool_results = []
for block in response.content:
if block.type == "tool_use":
# Execute the requested tool action
result = execute_tool_action(computer, block)
# Format the result for Claude
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": [result]
})
# If no tools were requested, Claude is done
if not tool_results:
break
# Send the tool results back to Claude
messages.append({"role": "user", "content": tool_results})
# Get Claude's next response
response = client.beta.messages.create(
model=model,
messages=messages,
tools=tools,
betas=["computer-use-2025-01-24"],
max_tokens=4096
)
# Add Claude's response to conversation history
messages.append({"role": "assistant", "content": response.content})
return messages
finally:
# Always clean up
computer.shutdown()
def execute_tool_action(computer, tool_block):
"""Execute a tool action based on Claude's request."""
action = tool_block.input.get("action")
try:
if action == "screenshot":
# Capture a screenshot and return as base64
image_data = computer.screenshot_base64()
return {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image_data
}
}
elif action == "left_click":
x, y = tool_block.input["coordinate"]
computer.left_click(x, y)
return {"type": "text", "text": f"Clicked at ({x}, {y})"}
elif action == "right_click":
x, y = tool_block.input["coordinate"]
computer.right_click(x, y)
return {"type": "text", "text": f"Right-clicked at ({x}, {y})"}
elif action == "double_click":
x, y = tool_block.input["coordinate"]
computer.double_click(x, y)
return {"type": "text", "text": f"Double-clicked at ({x}, {y})"}
elif action == "type":
text = tool_block.input["text"]
computer.type(text)
return {"type": "text", "text": f"Typed: {text}"}
elif action == "key":
key = tool_block.input["text"]
computer.key(key)
return {"type": "text", "text": f"Pressed: {key}"}
elif action == "scroll":
direction = tool_block.input.get("scroll_direction", "down")
amount = tool_block.input.get("scroll_amount", 1)
computer.scroll(direction, amount)
return {"type": "text", "text": f"Scrolled {direction} by {amount}"}
elif action == "wait":
duration = tool_block.input.get("duration", 1)
computer.wait(duration)
return {"type": "text", "text": f"Waited for {duration} seconds"}
else:
return {"type": "text", "text": f"Unsupported action: {action}"}
except Exception as e:
return {"type": "text", "text": f"Error executing {action}: {str(e)}"}
Using Claude’s Thinking Capability
Claude 3.7 Sonnet can provide its reasoning process through the thinking parameter:
import anthropic
from orgo import Computer
# Initialize components
computer = Computer()
client = anthropic.Anthropic()
# Start a conversation with thinking enabled
response = client.beta.messages.create(
model="claude-3-7-sonnet-20250219",
messages=[{"role": "user", "content": "Find an image of a cat on the web"}],
tools=[{
"type": "computer_20250124",
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
"display_number": 1
}],
betas=["computer-use-2025-01-24"],
thinking={"type": "enabled", "budget_tokens": 1024} # Enable thinking
)
# Access the thinking content
for block in response.content:
if block.type == "thinking":
print("Claude's reasoning:")
print(block.thinking)
Orgo provides a complete set of methods corresponding to Claude’s computer use tools:
Claude Tool Action | Orgo Method | Description |
---|
screenshot | computer.screenshot() | Capture the screen (returns PIL Image) |
screenshot | computer.screenshot_base64() | Capture the screen (returns base64 string) |
left_click | computer.left_click(x, y) | Left click at coordinates |
right_click | computer.right_click(x, y) | Right click at coordinates |
double_click | computer.double_click(x, y) | Double click at coordinates |
type | computer.type(text) | Type text |
key | computer.key(key_sequence) | Press keys (e.g., “Enter”, “ctrl+c”) |
scroll | computer.scroll(direction, amount) | Scroll in specified direction |
wait | computer.wait(seconds) | Wait for specified seconds |
Claude 3.7 vs 3.5 Sonnet
When using different Claude models, make sure to use the appropriate tool type:
- For Claude 3.7 Sonnet:
"type": "computer_20250124"
- For Claude 3.5 Sonnet:
"type": "computer_20241022"
And use the corresponding beta flag:
- For Claude 3.7 Sonnet:
betas=["computer-use-2025-01-24"]
- For Claude 3.5 Sonnet:
betas=["computer-use-2024-10-22"]