import anthropic
from orgo import Computer
def create_agent_loop(instruction, model="claude-sonnet-4-20250514"):
# Initialize components
computer = Computer()
client = anthropic.Anthropic()
try:
# Initialize conversation
messages = [{"role": "user", "content": instruction}]
# Define tools
tools = [
{
"type": "computer_20250124", # For Claude 3.7+
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
"display_number": 1
}
]
# Start the conversation with Claude
response = client.beta.messages.create(
model=model,
messages=messages,
tools=tools,
betas=["computer-use-2025-01-24"],
max_tokens=4096
)
# Add Claude's response to conversation history
messages.append({"role": "assistant", "content": response.content})
# Continue the loop until Claude stops requesting tools
iteration = 0
max_iterations = 20
while iteration < max_iterations:
iteration += 1
# Process all tool requests from Claude
tool_results = []
for block in response.content:
if block.type == "tool_use":
# Execute the requested tool action
result = execute_tool_action(computer, block)
# Format the result for Claude
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": [result]
})
# If no tools were requested, Claude is done
if not tool_results:
break
# Send the tool results back to Claude
messages.append({"role": "user", "content": tool_results})
# Get Claude's next response
response = client.beta.messages.create(
model=model,
messages=messages,
tools=tools,
betas=["computer-use-2025-01-24"],
max_tokens=4096
)
# Add Claude's response to conversation history
messages.append({"role": "assistant", "content": response.content})
return messages
finally:
# Clean up
computer.destroy()
def execute_tool_action(computer, tool_block):
"""Execute a tool action based on Claude's request."""
action = tool_block.input.get("action")
try:
if action == "screenshot":
# Capture a screenshot and return as base64
image_data = computer.screenshot_base64()
return {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image_data
}
}
elif action == "left_click":
x, y = tool_block.input["coordinate"]
computer.left_click(x, y)
return {"type": "text", "text": f"Clicked at ({x}, {y})"}
elif action == "right_click":
x, y = tool_block.input["coordinate"]
computer.right_click(x, y)
return {"type": "text", "text": f"Right-clicked at ({x}, {y})"}
elif action == "double_click":
x, y = tool_block.input["coordinate"]
computer.double_click(x, y)
return {"type": "text", "text": f"Double-clicked at ({x}, {y})"}
elif action == "type":
text = tool_block.input["text"]
computer.type(text)
return {"type": "text", "text": f"Typed: {text}"}
elif action == "key":
key = tool_block.input["text"]
computer.key(key)
return {"type": "text", "text": f"Pressed: {key}"}
elif action == "scroll":
direction = tool_block.input.get("scroll_direction", "down")
amount = tool_block.input.get("scroll_amount", 1)
computer.scroll(direction, amount)
return {"type": "text", "text": f"Scrolled {direction} by {amount}"}
elif action == "wait":
duration = tool_block.input.get("duration", 1)
computer.wait(duration)
return {"type": "text", "text": f"Waited for {duration} seconds"}
else:
return {"type": "text", "text": f"Unsupported action: {action}"}
except Exception as e:
return {"type": "text", "text": f"Error executing {action}: {str(e)}"}