Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.venv
venv
__pycache__/
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.14
27 changes: 26 additions & 1 deletion README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ MacPilot is a state-of-the-art macOS UI automation framework that combines nativ

### 🧠 Core Intelligence
- **GPT Integration** - Natural language instruction processing
- **Local MLX-LM** - Run instructions entirely on-device with Apple Silicon
- **Vision Framework** - Advanced UI element detection
- **State Awareness** - Real-time system state tracking
- **Pattern Recognition** - Learned UI interaction patterns
Expand Down Expand Up @@ -131,6 +132,30 @@ async def handle_login(username: str, password: str):
]
```

## 🍏 Local Apple Silicon LLMs

You can run MacPilot's planning brain entirely on-device with [MLX‑LM](https://github.com/ml-explore/mlx-examples/tree/main/llms/mlx_lm) on Apple Silicon.

1. `pip install "mlx-lm @ git+https://github.com/ml-explore/mlx-lm.git@main"`
2. Switch the provider:
```bash
export LLM_PROVIDER=mlx
export MLX_MODEL=mlx-community/Meta-Llama-3-8B-Instruct-4bit
export MLX_MAX_OUTPUT_TOKENS=768 # optional
export MLX_TEMPERATURE=0.1 # optional
```
3. Or via YAML config:
```yaml
llm_provider: mlx
mlx_model: mlx-community/Meta-Llama-3-8B-Instruct-4bit
mlx_max_output_tokens: 768
mlx_temperature: 0.1
```

MacPilot relies on the bleeding-edge `mlx-lm` tool-call handling, so we install it directly from the `mlx-lm` GitHub repository to stay on the latest commit.

When `llm_provider` is `mlx`, MacPilot loads the configured model once and keeps all inference local; otherwise it uses the configured OpenAI-compatible endpoint.

## 📋 Todo & Roadmap

### High Priority
Expand All @@ -140,7 +165,7 @@ async def handle_login(username: str, password: str):
- [ ] Visual workflow builder

- [ ] Core Features
- [ ] Local LLM support
- [x] Local LLM support
- [ ] Improved error recovery
- [ ] Performance optimizations

Expand Down
3 changes: 1 addition & 2 deletions automation_framework/actors/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC
from typing import Dict, Any, ClassVar
from core.metaclasses.actor_meta import ActorStackMeta
from ..core.metaclasses.actor_meta import ActorStackMeta


class ActorStack(ABC, metaclass=ActorStackMeta):
Expand All @@ -23,4 +23,3 @@ def execute_action(self, action: str, **kwargs) -> bool:
def validate_state(self) -> bool:
"""Validate actor stack is in valid state."""
raise NotImplementedError

16 changes: 9 additions & 7 deletions automation_framework/actors/chrome/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import json
from pathlib import Path
from ..base import ActorStack
from models.pydantic_models import ValidationResult
from ...models.pydantic_models import ValidationResult

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -278,14 +278,14 @@ async def get_tab_info(self) -> List[Dict[str, str]]:
result = await self._run_apple_script(script)
return json.loads(result) if result else []

async def validate_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
async def validate_action_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
"""Validate Chrome-specific state."""
failures = []
warnings = []

try:
# Basic state check
if not await self.validate_browser_state():
if not self.validate_state():
failures.append("Browser not in valid state")

# Check expected state if provided
Expand All @@ -297,12 +297,14 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di

return ValidationResult(
success=len(failures) == 0,
failures=failures,
warnings=warnings
goal_achieved=len(failures) == 0,
issues=failures,
recommendations=warnings,
)
except Exception as e:
return ValidationResult(
success=False,
failures=[f"Validation error: {str(e)}"],
warnings=[]
goal_achieved=False,
issues=[f"Validation error: {str(e)}"],
recommendations=[],
)
14 changes: 8 additions & 6 deletions automation_framework/actors/finder/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess
import json

from models.pydantic_models import ValidationResult
from ...models.pydantic_models import ValidationResult

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -228,7 +228,7 @@ async def _get_selection(self) -> List[Dict[str, Any]]:
result = await self._run_apple_script(script)
return json.loads(result) if result else []

async def validate_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
async def validate_action_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
"""Validate Finder-specific state."""
failures = []
warnings = []
Expand All @@ -247,12 +247,14 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di

return ValidationResult(
success=len(failures) == 0,
failures=failures,
warnings=warnings
goal_achieved=len(failures) == 0,
issues=failures,
recommendations=warnings,
)
except Exception as e:
return ValidationResult(
success=False,
failures=[f"Validation error: {str(e)}"],
warnings=[]
goal_achieved=False,
issues=[f"Validation error: {str(e)}"],
recommendations=[],
)
25 changes: 14 additions & 11 deletions automation_framework/actors/generic/mouse_keyboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
)
import AppKit
from ..base import ActorStack
from models.pydantic_models import ValidationResult
from ...models.pydantic_models import ValidationResult

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -108,7 +108,11 @@ async def execute_action(self, action: str, **kwargs) -> Any:
logger.error(f"Error executing action {action}: {e}")
return False

async def validate_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
def validate_state(self) -> bool:
"""Basic runtime readiness check used by the execution engine."""
return self._event_source is not None

async def validate_action_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
"""Validate the actor's state."""
failures = []
warnings = []
Expand All @@ -120,9 +124,8 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di
return ValidationResult(
success=False,
goal_achieved=False,
failures=failures,
warnings=warnings,
details={"event_source": None}
issues=failures,
recommendations=warnings,
)

# Get current states
Expand Down Expand Up @@ -160,19 +163,17 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di
return ValidationResult(
success=success,
goal_achieved=goal_achieved,
failures=failures,
warnings=warnings,
details=details
issues=failures,
recommendations=warnings,
)

except Exception as e:
logger.error(f"Validation error: {e}")
return ValidationResult(
success=False,
goal_achieved=False,
failures=[f"Validation error: {str(e)}"],
warnings=warnings,
details={}
issues=[f"Validation error: {str(e)}"],
recommendations=warnings,
)

async def _click(self, x: int, y: int) -> bool:
Expand Down Expand Up @@ -375,6 +376,8 @@ async def _find_window(self, name: str) -> Optional[Dict[str, Any]]:
async def _get_screen_bounds(self) -> Tuple[float, float, float, float]:
"""Get main screen bounds."""
screen = NSScreen.mainScreen()
if screen is None:
return (0.0, 0.0, 0.0, 0.0)

frame = screen.frame()
return (frame.origin.x, frame.origin.y, frame.size.width, frame.size.height)
Expand Down
15 changes: 15 additions & 0 deletions automation_framework/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
class AutomationConfig:
"""Configuration settings for automation framework."""
openai_api_key: str = "asdasd"
openai_base_url: str = "http://0.0.0.0:8080/v1"
llm_provider: str = "openai"
max_retries: int = 3
timeout: int = 30
screenshot_dir: Path = Path("/var/lib/automation/screenshots")
Expand All @@ -27,13 +29,18 @@ class AutomationConfig:
models_dir: Optional[Path] = Path("./models")
openai_key: str = "asdasd"
BASE_URL: str = "http://0.0.0.0:8080/v1"
mlx_model: str = "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
mlx_max_output_tokens: int = 512
mlx_temperature: float = 0.2

@classmethod
def from_env(cls) -> "AutomationConfig":
"""Create config from environment variables."""
try:
return cls(
openai_api_key=os.getenv("OPENAI_API_KEY", "asdas"),
openai_base_url=os.getenv("OPENAI_BASE_URL", "http://0.0.0.0:8080/v1"),
llm_provider=os.getenv("LLM_PROVIDER", "openai"),
max_retries=int(os.getenv("MAX_RETRIES", "3")),
timeout=int(os.getenv("TIMEOUT", "30")),
screenshot_dir=Path(os.getenv("SCREENSHOT_DIR", "/var/lib/automation/screenshots")),
Expand All @@ -42,6 +49,9 @@ def from_env(cls) -> "AutomationConfig":
max_concurrent_actions=int(os.getenv("MAX_CONCURRENT_ACTIONS", "5")),
database_url=os.getenv("DATABASE_URL", "sqlite:///./automation.db"),
models_dir=Path(os.getenv("MODELS_DIR", "./models")),
mlx_model=os.getenv("MLX_MODEL", "mlx-community/Meta-Llama-3-8B-Instruct-4bit"),
mlx_max_output_tokens=int(os.getenv("MLX_MAX_OUTPUT_TOKENS", "512")),
mlx_temperature=float(os.getenv("MLX_TEMPERATURE", "0.2")),
)
except (ValueError, TypeError) as e:
logger.error(f"Invalid environment variable: {e}")
Expand All @@ -59,6 +69,8 @@ def from_yaml(cls, path: Path) -> "AutomationConfig":

return cls(
openai_api_key=data.get("openai_api_key"),
openai_base_url=data.get("openai_base_url", "http://0.0.0.0:8080/v1"),
llm_provider=data.get("llm_provider", "openai"),
max_retries=data.get("max_retries", 3),
timeout=data.get("timeout", 30),
screenshot_dir=Path(data.get("screenshot_dir", "/var/lib/automation/screenshots")),
Expand All @@ -67,6 +79,9 @@ def from_yaml(cls, path: Path) -> "AutomationConfig":
max_concurrent_actions=data.get("max_concurrent_actions", 5),
database_url=data.get("database_url", "sqlite:///./automation.db"),
models_dir=Path(data.get("models_dir", "./models")),
mlx_model=data.get("mlx_model", "mlx-community/Meta-Llama-3-8B-Instruct-4bit"),
mlx_max_output_tokens=data.get("mlx_max_output_tokens", 512),
mlx_temperature=data.get("mlx_temperature", 0.2),
)
except (ValueError, TypeError, AttributeError) as e:
logger.error(f"Invalid config file format: {e}")
Expand Down
7 changes: 6 additions & 1 deletion automation_framework/core/metaclasses/actor_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ def __new__(mcs, name, bases, namespace):
if name == 'ActorStack':
return cls

# Register concrete actor stack
# Register concrete actor stack by both class name and actor logical name.
# Runtime code resolves actors by logical name ("chrome", "finder", etc.),
# while some tests assert class-name registration.
mcs._registry[name] = cls
actor_name = getattr(cls, "name", None)
if isinstance(actor_name, str) and actor_name:
mcs._registry[actor_name] = cls

# Validate required methods
required_methods = {'get_capabilities', 'execute_action', 'validate_state'}
Expand Down
20 changes: 10 additions & 10 deletions automation_framework/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@
from rich.console import Console
from rich.logging import RichHandler

from services.state.manager import StateManager
from core.metaclasses.actor_meta import ActorStackMeta
from actors.generic.mouse_keyboard import GenericActorStack
from actors.chrome.browser import ChromeActorStack
from actors.finder.filesystem import FinderActorStack
from services.instruction.processor import InstructionProcessor
from services.orchestration.orchestrator import Orchestrator
from core.config import AutomationConfig
from services.macos_ui_service.macos_ui_service import MacOSUIService
from .services.state.manager import StateManager
from .core.metaclasses.actor_meta import ActorStackMeta
from .actors.generic.mouse_keyboard import GenericActorStack
from .actors.chrome.browser import ChromeActorStack
from .actors.finder.filesystem import FinderActorStack
from .services.instruction.processor import InstructionProcessor
from .services.orchestration.orchestrator import Orchestrator
from .core.config import AutomationConfig
from .services.macos_ui_service.macos_ui_service import MacOSUIService
import rich
from utils.decorators import log_execution
from .utils.decorators import log_execution

# Setup logging
logging.basicConfig(
Expand Down
Loading