import time
import base64
from openai import OpenAI
from orgo import Computer
from dotenv import load_dotenv
load_dotenv()
def run_computer_task(task, project_id=None):
"""Execute a task using OpenAI Computer Use with Orgo."""
# Initialize OpenAI client and Orgo computer
client = OpenAI()
computer = Computer(project_id=project_id)
print(f"🖥️ Computer ID: {computer.project_id}")
# Create initial request with the task
response = client.responses.create(
model="computer-use-preview",
tools=[{
"type": "computer_use_preview",
"display_width": 1024,
"display_height": 768,
"environment": "linux" # Orgo provides Linux desktops
}],
input=[{
"role": "user",
"content": [{
"type": "input_text",
"text": f"""IMPORTANT: You are controlling a Linux desktop.
- Always double-click desktop icons to open applications
- Use keyboard shortcuts as single commands (e.g., 'ctrl+c' not separate keys)
Task: {task}"""
}]
}],
reasoning={"summary": "concise"}, # Show reasoning steps
truncation="auto" # Required for computer use
)
# Main agent loop
while True:
# Display progress
for item in response.output:
if item.type == "reasoning" and hasattr(item, "summary"):
for summary in item.summary:
if hasattr(summary, "text"):
print(f"💭 {summary.text}")
elif item.type == "text" and hasattr(item, "text"):
print(f"💬 {item.text}")
# Get computer actions from response
actions = [item for item in response.output if item.type == "computer_call"]
# If no actions, task is complete
if not actions:
print("✓ Task completed")
break
# Execute the action
action = actions[0]
print(f"→ {action.action.type}")
execute_action(computer, action.action)
time.sleep(1) # Allow UI to update
# Capture screenshot and continue
screenshot = computer.screenshot_base64()
response = client.responses.create(
model="computer-use-preview",
previous_response_id=response.id, # Link to previous response
tools=[{
"type": "computer_use_preview",
"display_width": 1024,
"display_height": 768,
"environment": "linux"
}],
input=[{
"call_id": action.call_id,
"type": "computer_call_output",
"output": {
"type": "input_image",
"image_url": f"data:image/png;base64,{screenshot}"
}
}],
reasoning={"summary": "concise"},
truncation="auto"
)
return computer
def execute_action(computer, action):
"""Execute computer actions using Orgo."""
match action.type:
case "click":
# Handle left/right clicks
if getattr(action, 'button', 'left') == "right":
computer.right_click(action.x, action.y)
else:
computer.left_click(action.x, action.y)
case "double_click":
computer.double_click(action.x, action.y)
case "type":
computer.type(action.text)
case "key" | "keypress":
# Handle single keys or key combinations
keys = getattr(action, 'keys', [getattr(action, 'key', [])])
if len(keys) > 1:
# Multiple keys = keyboard shortcut
computer.key('+'.join(keys).lower())
else:
# Single key press
for key in keys:
computer.key(key)
case "scroll":
# Convert scroll amount to direction
scroll_y = getattr(action, 'scroll_y', 0)
direction = "down" if scroll_y > 0 else "up"
computer.scroll(direction, abs(scroll_y) // 100)
case "wait":
computer.wait(getattr(action, 'seconds', 2))
case "screenshot":
# Screenshot is taken automatically in the loop
pass
if __name__ == "__main__":
# Example usage
computer = run_computer_task("Open a terminal and list files")
# Always clean up
computer.destroy()