Build AI agents that remember user preferences with OpenAI Computer Use
Install dependencies
pip install mem0ai orgo openai python-dotenv
Set up environment
.env
file with your API keys:ORGO_API_KEY=your_orgo_api_key
OPENAI_API_KEY=your_openai_api_key # Used by both OpenAI and Mem0
Copy and run
memory_agent.py
and run it:import time
import base64
from mem0 import Memory
from orgo import Computer
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
class MemoryComputerAgent:
"""OpenAI Computer Use agent with memory."""
def __init__(self, user_id="GigabrainAgent"):
self.user_id = user_id
self.memory = Memory()
self.client = OpenAI()
self.computer = None
def __enter__(self):
self.computer = Computer()
print(f"🖥️ Computer ID: {self.computer.project_id}")
return self
def __exit__(self, *args):
if self.computer:
self.computer.destroy()
def run(self, task):
"""Execute task with memory context."""
# Get memories
memories = self._get_relevant_memories(task)
# Build task with memory context
enhanced_task = self._build_task_with_memory(task, memories)
# Execute using OpenAI Computer Use
self._execute_computer_task(enhanced_task)
# Store interaction
self._store_memory(task)
def _get_relevant_memories(self, task):
"""Search for relevant memories."""
try:
results = self.memory.search(
query=task,
user_id=self.user_id,
limit=5
)
return [m['memory'] for m in results.get('results', [])]
except:
return []
def _build_task_with_memory(self, task, memories):
"""Enhance task with memory context."""
if not memories:
return task
context = "\n".join(f"- {m}" for m in memories)
return f"""Remember these user preferences:
{context}
Current task: {task}"""
def _execute_computer_task(self, task):
"""Execute task using OpenAI Computer Use."""
response = self.client.responses.create(
model="computer-use-preview",
tools=[{
"type": "computer_use_preview",
"display_width": 1024,
"display_height": 768,
"environment": "linux"
}],
input=[{
"role": "user",
"content": [{
"type": "input_text",
"text": f"""IMPORTANT: You are controlling a Linux desktop.
- Always double-click desktop icons to open applications
- Use keyboard shortcuts as single commands (e.g., 'ctrl+c' not separate keys)
Task: {task}"""
}]
}],
reasoning={"summary": "concise"},
truncation="auto"
)
# Execute actions in loop
while True:
# Display progress
for item in response.output:
if item.type == "reasoning" and hasattr(item, "summary"):
for summary in item.summary:
if hasattr(summary, "text"):
print(f"💭 {summary.text}")
elif item.type == "text" and hasattr(item, "text"):
print(f"💬 {item.text}")
# Get computer actions
actions = [item for item in response.output if item.type == "computer_call"]
if not actions:
print("✓ Task completed")
break
# Execute action
action = actions[0]
print(f"→ {action.action.type}")
self._execute_action(action.action)
time.sleep(1)
# Get screenshot and continue
screenshot = self.computer.screenshot_base64()
response = self.client.responses.create(
model="computer-use-preview",
previous_response_id=response.id,
tools=[{
"type": "computer_use_preview",
"display_width": 1024,
"display_height": 768,
"environment": "linux"
}],
input=[{
"call_id": action.call_id,
"type": "computer_call_output",
"output": {
"type": "input_image",
"image_url": f"data:image/png;base64,{screenshot}"
}
}],
reasoning={"summary": "concise"},
truncation="auto"
)
def _execute_action(self, action):
"""Execute computer action."""
match action.type:
case "click":
if getattr(action, 'button', 'left') == "right":
self.computer.right_click(action.x, action.y)
else:
self.computer.left_click(action.x, action.y)
case "double_click":
self.computer.double_click(action.x, action.y)
case "type":
self.computer.type(action.text)
case "key" | "keypress":
keys = getattr(action, 'keys', [getattr(action, 'key', [])])
if len(keys) > 1:
self.computer.key('+'.join(keys).lower())
else:
for key in keys:
self.computer.key(key)
case "scroll":
scroll_y = getattr(action, 'scroll_y', 0)
direction = "down" if scroll_y > 0 else "up"
self.computer.scroll(direction, abs(scroll_y) // 100)
case "wait":
self.computer.wait(getattr(action, 'seconds', 2))
case "screenshot":
pass
def _store_memory(self, task):
"""Store interaction in memory."""
try:
self.memory.add(
messages=[
{"role": "user", "content": task},
{"role": "assistant", "content": "Task executed successfully"}
],
user_id=self.user_id
)
except:
pass
# Run the demo
if __name__ == "__main__":
with MemoryComputerAgent("GigabrainAgent") as agent:
# Teach preferences
print("📝 Teaching preferences...")
agent.run("I prefer Firefox over Chrome")
agent.run("I like dark themes")
agent.run("Spencer Kinney is the stinkiest person I know")
# Use preferences
print("\n🚀 Running task with memory...")
agent.run("Open my current preferred browser and search for the stinkiest person I know")
MemoryComputerAgent
combines OpenAI’s Computer Use API with Mem0’s memory system:
# Quick example
with MemoryComputerAgent("GigabrainAgent") as agent:
agent.run("Open Firefox and go to GitHub")
with MemoryComputerAgent("alice") as agent:
# Teach preferences
agent.run("I prefer VS Code for coding")
agent.run("I use dark themes everywhere")
agent.run("My GitHub username is alice123")
# Later, it remembers
agent.run("Open my code editor and check my GitHub")
def morning_setup(user_id="GigabrainAgent"):
"""Automated morning workflow."""
with MemoryComputerAgent(user_id) as agent:
# First time - teach routine
agent.run("I check Gmail first thing in the morning")
agent.run("Then I open Slack")
agent.run("Finally I check my calendar")
# Next day - just ask
agent.run("Do my morning routine")
# Run daily
morning_setup()
# Without context manager
agent = MemoryComputerAgent("bob")
agent.computer = Computer()
try:
# Teach and use
agent.run("My favorite news site is Hacker News")
agent.run("I use DuckDuckGo for searching")
# Use preferences
agent.run("Open my favorite news site")
finally:
agent.computer.destroy()
from mem0 import Memory
memory = Memory()
memories = memory.get_all(user_id="GigabrainAgent")
print(f"📚 Total memories: {len(memories)}")
for memory in memories:
print(f" • {memory['memory']}")
from mem0 import Memory
memory = Memory()
results = memory.search(
query="browser preferences",
user_id="GigabrainAgent",
limit=5
)
for result in results['results']:
print(f"Found: {result['memory']}")
from mem0 import Memory
memory = Memory()
memory.delete_all(user_id="GigabrainAgent")
print("All memories cleared")
# Different users maintain separate memories
users = ["alice", "bob", "charlie"]
for user in users:
with MemoryComputerAgent(user) as agent:
agent.run(f"Open browser for {user}")
# Work context
with MemoryComputerAgent("work_gigabrain") as work:
work.run("I use Chrome for work")
work.run("Our code is on GitHub Enterprise")
work.run("Open work browser")
# Personal context
with MemoryComputerAgent("personal_gigabrain") as personal:
personal.run("I use Firefox for personal browsing")
personal.run("My code is on regular GitHub")
personal.run("Open personal browser")
def safe_run(task, user_id="GigabrainAgent"):
"""Execute with error handling."""
try:
with MemoryComputerAgent(user_id) as agent:
agent.run(task)
return {"success": True}
except Exception as e:
print(f"❌ Error: {e}")
return {"success": False, "error": str(e)}
# Usage
result = safe_run("Open browser")
if result["success"]:
print("✅ Task completed")
def setup_workspace(user_id="GigabrainAgent"):
"""Set up complete workspace."""
tasks = [
"Open VS Code",
"Open terminal",
"Navigate to ~/projects",
"Start development server",
"Open browser at localhost:3000"
]
with MemoryComputerAgent(user_id) as agent:
for task in tasks:
print(f"⚡ {task}")
agent.run(task)
time.sleep(2) # Pause between tasks
setup_workspace()
import os
from datetime import datetime
class ProductionMemoryAgent(MemoryComputerAgent):
"""Production agent with logging."""
def __init__(self, user_id="GigabrainAgent"):
super().__init__(f"prod_{user_id}")
self.log_file = f"logs/{user_id}_{datetime.now().strftime('%Y%m%d')}.log"
def run(self, task):
"""Run with logging."""
timestamp = datetime.now().strftime("%H:%M:%S")
# Log task
with open(self.log_file, "a") as f:
f.write(f"[{timestamp}] Task: {task}\n")
# Execute
super().run(task)
# Log completion
with open(self.log_file, "a") as f:
f.write(f"[{timestamp}] Completed\n")
# Usage
with ProductionMemoryAgent("alice") as agent:
agent.run("Check email")