Overview

Add persistent memory to OpenAI’s Computer Use agents. Your agents will remember user preferences, learn from interactions, and improve over time.

Getting Started

1

Install dependencies

Install Mem0 alongside Orgo and OpenAI:
pip install mem0ai orgo openai python-dotenv
2

Set up environment

Create a .env file with your API keys:
ORGO_API_KEY=your_orgo_api_key
OPENAI_API_KEY=your_openai_api_key  # Used by both OpenAI and Mem0
Mem0 uses OpenAI by default, so you only need two API keys total.
3

Copy and run

Save this complete example as memory_agent.py and run it:
import time
import base64
from mem0 import Memory
from orgo import Computer
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

class MemoryComputerAgent:
    """OpenAI Computer Use agent with memory."""
    
    def __init__(self, user_id="GigabrainAgent"):
        self.user_id = user_id
        self.memory = Memory()
        self.client = OpenAI()
        self.computer = None
        
    def __enter__(self):
        self.computer = Computer()
        print(f"🖥️  Computer ID: {self.computer.project_id}")
        return self
        
    def __exit__(self, *args):
        if self.computer:
            self.computer.destroy()
            
    def run(self, task):
        """Execute task with memory context."""
        # Get memories
        memories = self._get_relevant_memories(task)
        
        # Build task with memory context
        enhanced_task = self._build_task_with_memory(task, memories)
        
        # Execute using OpenAI Computer Use
        self._execute_computer_task(enhanced_task)
        
        # Store interaction
        self._store_memory(task)
        
    def _get_relevant_memories(self, task):
        """Search for relevant memories."""
        try:
            results = self.memory.search(
                query=task,
                user_id=self.user_id,
                limit=5
            )
            return [m['memory'] for m in results.get('results', [])]
        except:
            return []
    
    def _build_task_with_memory(self, task, memories):
        """Enhance task with memory context."""
        if not memories:
            return task
            
        context = "\n".join(f"- {m}" for m in memories)
        return f"""Remember these user preferences:
{context}

Current task: {task}"""
    
    def _execute_computer_task(self, task):
        """Execute task using OpenAI Computer Use."""
        response = self.client.responses.create(
            model="computer-use-preview",
            tools=[{
                "type": "computer_use_preview",
                "display_width": 1024,
                "display_height": 768,
                "environment": "linux"
            }],
            input=[{
                "role": "user",
                "content": [{
                    "type": "input_text", 
                    "text": f"""IMPORTANT: You are controlling a Linux desktop. 
- Always double-click desktop icons to open applications
- Use keyboard shortcuts as single commands (e.g., 'ctrl+c' not separate keys)
Task: {task}"""
                }]
            }],
            reasoning={"summary": "concise"},
            truncation="auto"
        )
        
        # Execute actions in loop
        while True:
            # Display progress
            for item in response.output:
                if item.type == "reasoning" and hasattr(item, "summary"):
                    for summary in item.summary:
                        if hasattr(summary, "text"):
                            print(f"💭 {summary.text}")
                elif item.type == "text" and hasattr(item, "text"):
                    print(f"💬 {item.text}")
            
            # Get computer actions
            actions = [item for item in response.output if item.type == "computer_call"]
            if not actions:
                print("✓ Task completed")
                break
                
            # Execute action
            action = actions[0]
            print(f"→ {action.action.type}")
            self._execute_action(action.action)
            time.sleep(1)
            
            # Get screenshot and continue
            screenshot = self.computer.screenshot_base64()
            response = self.client.responses.create(
                model="computer-use-preview",
                previous_response_id=response.id,
                tools=[{
                    "type": "computer_use_preview",
                    "display_width": 1024,
                    "display_height": 768,
                    "environment": "linux"
                }],
                input=[{
                    "call_id": action.call_id,
                    "type": "computer_call_output",
                    "output": {
                        "type": "input_image",
                        "image_url": f"data:image/png;base64,{screenshot}"
                    }
                }],
                reasoning={"summary": "concise"},
                truncation="auto"
            )
    
    def _execute_action(self, action):
        """Execute computer action."""
        match action.type:
            case "click":
                if getattr(action, 'button', 'left') == "right":
                    self.computer.right_click(action.x, action.y)
                else:
                    self.computer.left_click(action.x, action.y)
                    
            case "double_click":
                self.computer.double_click(action.x, action.y)
                
            case "type":
                self.computer.type(action.text)
                
            case "key" | "keypress":
                keys = getattr(action, 'keys', [getattr(action, 'key', [])])
                if len(keys) > 1:
                    self.computer.key('+'.join(keys).lower())
                else:
                    for key in keys:
                        self.computer.key(key)
                        
            case "scroll":
                scroll_y = getattr(action, 'scroll_y', 0)
                direction = "down" if scroll_y > 0 else "up"
                self.computer.scroll(direction, abs(scroll_y) // 100)
                
            case "wait":
                self.computer.wait(getattr(action, 'seconds', 2))
                
            case "screenshot":
                pass
    
    def _store_memory(self, task):
        """Store interaction in memory."""
        try:
            self.memory.add(
                messages=[
                    {"role": "user", "content": task},
                    {"role": "assistant", "content": "Task executed successfully"}
                ],
                user_id=self.user_id
            )
        except:
            pass

# Run the demo
if __name__ == "__main__":
    with MemoryComputerAgent("GigabrainAgent") as agent:
        # Teach preferences
        print("📝 Teaching preferences...")
        agent.run("I prefer Firefox over Chrome")
        agent.run("I like dark themes")
        agent.run("Spencer Kinney is the stinkiest person I know")
        
        # Use preferences
        print("\n🚀 Running task with memory...")
        agent.run("Open my current preferred browser and search for the stinkiest person I know")

How It Works

The MemoryComputerAgent combines OpenAI’s Computer Use API with Mem0’s memory system:
  1. Memory Search: Before executing any task, it searches for relevant memories
  2. Context Enhancement: Memories are added to the task prompt
  3. Computer Use Execution: Uses OpenAI’s CUA to control the computer
  4. Memory Storage: After execution, the interaction is saved for future use

Usage Examples

Basic Usage

# Quick example
with MemoryComputerAgent("GigabrainAgent") as agent:
    agent.run("Open Firefox and go to GitHub")

Building Preferences

with MemoryComputerAgent("alice") as agent:
    # Teach preferences
    agent.run("I prefer VS Code for coding")
    agent.run("I use dark themes everywhere")
    agent.run("My GitHub username is alice123")
    
    # Later, it remembers
    agent.run("Open my code editor and check my GitHub")

Morning Routine

def morning_setup(user_id="GigabrainAgent"):
    """Automated morning workflow."""
    with MemoryComputerAgent(user_id) as agent:
        # First time - teach routine
        agent.run("I check Gmail first thing in the morning")
        agent.run("Then I open Slack")
        agent.run("Finally I check my calendar")
        
        # Next day - just ask
        agent.run("Do my morning routine")

# Run daily
morning_setup()

Manual Management

# Without context manager
agent = MemoryComputerAgent("bob")
agent.computer = Computer()

try:
    # Teach and use
    agent.run("My favorite news site is Hacker News")
    agent.run("I use DuckDuckGo for searching")
    
    # Use preferences
    agent.run("Open my favorite news site")
finally:
    agent.computer.destroy()

Memory Management

View All Memories

from mem0 import Memory

memory = Memory()
memories = memory.get_all(user_id="GigabrainAgent")

print(f"📚 Total memories: {len(memories)}")
for memory in memories:
    print(f"  • {memory['memory']}")

Search Specific Memories

from mem0 import Memory

memory = Memory()
results = memory.search(
    query="browser preferences",
    user_id="GigabrainAgent",
    limit=5
)

for result in results['results']:
    print(f"Found: {result['memory']}")

Clear Memories

from mem0 import Memory

memory = Memory()
memory.delete_all(user_id="GigabrainAgent")
print("All memories cleared")

Advanced Patterns

Multiple Users

# Different users maintain separate memories
users = ["alice", "bob", "charlie"]

for user in users:
    with MemoryComputerAgent(user) as agent:
        agent.run(f"Open browser for {user}")

Session-Based Memory

# Work context
with MemoryComputerAgent("work_gigabrain") as work:
    work.run("I use Chrome for work")
    work.run("Our code is on GitHub Enterprise")
    work.run("Open work browser")

# Personal context  
with MemoryComputerAgent("personal_gigabrain") as personal:
    personal.run("I use Firefox for personal browsing")
    personal.run("My code is on regular GitHub")
    personal.run("Open personal browser")

Error Handling

def safe_run(task, user_id="GigabrainAgent"):
    """Execute with error handling."""
    try:
        with MemoryComputerAgent(user_id) as agent:
            agent.run(task)
            return {"success": True}
    except Exception as e:
        print(f"❌ Error: {e}")
        return {"success": False, "error": str(e)}

# Usage
result = safe_run("Open browser")
if result["success"]:
    print("✅ Task completed")

Batch Operations

def setup_workspace(user_id="GigabrainAgent"):
    """Set up complete workspace."""
    tasks = [
        "Open VS Code",
        "Open terminal",
        "Navigate to ~/projects",
        "Start development server",
        "Open browser at localhost:3000"
    ]
    
    with MemoryComputerAgent(user_id) as agent:
        for task in tasks:
            print(f"⚡ {task}")
            agent.run(task)
            time.sleep(2)  # Pause between tasks

setup_workspace()

Production Example

import os
from datetime import datetime

class ProductionMemoryAgent(MemoryComputerAgent):
    """Production agent with logging."""
    
    def __init__(self, user_id="GigabrainAgent"):
        super().__init__(f"prod_{user_id}")
        self.log_file = f"logs/{user_id}_{datetime.now().strftime('%Y%m%d')}.log"
        
    def run(self, task):
        """Run with logging."""
        timestamp = datetime.now().strftime("%H:%M:%S")
        
        # Log task
        with open(self.log_file, "a") as f:
            f.write(f"[{timestamp}] Task: {task}\n")
        
        # Execute
        super().run(task)
        
        # Log completion
        with open(self.log_file, "a") as f:
            f.write(f"[{timestamp}] Completed\n")

# Usage
with ProductionMemoryAgent("alice") as agent:
    agent.run("Check email")

Tips

  1. Memory Persistence: Memories are stored permanently for each user_id
  2. Context Building: The agent automatically adds relevant memories to each task
  3. Error Resilience: Memory operations fail gracefully without breaking execution
  4. Performance: Allow 1-2 seconds between actions for stability

Next Steps