adeelahmad · Teagan42 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 .venv
 venv
+__pycache__/
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.14
diff --git a/README.MD b/README.MD
@@ -34,6 +34,7 @@ MacPilot is a state-of-the-art macOS UI automation framework that combines nativ
 
 ### 🧠 Core Intelligence
 - **GPT Integration** - Natural language instruction processing
+- **Local MLX-LM** - Run instructions entirely on-device with Apple Silicon
 - **Vision Framework** - Advanced UI element detection
 - **State Awareness** - Real-time system state tracking
 - **Pattern Recognition** - Learned UI interaction patterns
@@ -131,6 +132,30 @@ async def handle_login(username: str, password: str):
     ]
 ```
 
+## 🍏 Local Apple Silicon LLMs
+
+You can run MacPilot's planning brain entirely on-device with [MLX‑LM](https://github.com/ml-explore/mlx-examples/tree/main/llms/mlx_lm) on Apple Silicon.
+
+1. `pip install "mlx-lm @ git+https://github.com/ml-explore/mlx-lm.git@main"`
+2. Switch the provider:
+   ```bash
+   export LLM_PROVIDER=mlx
+   export MLX_MODEL=mlx-community/Meta-Llama-3-8B-Instruct-4bit
+   export MLX_MAX_OUTPUT_TOKENS=768    # optional
+   export MLX_TEMPERATURE=0.1          # optional
+   ```
+3. Or via YAML config:
+   ```yaml
+   llm_provider: mlx
+   mlx_model: mlx-community/Meta-Llama-3-8B-Instruct-4bit
+   mlx_max_output_tokens: 768
+   mlx_temperature: 0.1
+   ```
+
+MacPilot relies on the bleeding-edge `mlx-lm` tool-call handling, so we install it directly from the `mlx-lm` GitHub repository to stay on the latest commit.
+
+When `llm_provider` is `mlx`, MacPilot loads the configured model once and keeps all inference local; otherwise it uses the configured OpenAI-compatible endpoint.
+
 ## 📋 Todo & Roadmap
 
 ### High Priority
@@ -140,7 +165,7 @@ async def handle_login(username: str, password: str):
   - [ ] Visual workflow builder
 
 - [ ] Core Features
-  - [ ] Local LLM support
+  - [x] Local LLM support
   - [ ] Improved error recovery
   - [ ] Performance optimizations
 

diff --git a/automation_framework/actors/base.py b/automation_framework/actors/base.py
@@ -1,6 +1,6 @@
 from abc import ABC
 from typing import Dict, Any, ClassVar
-from core.metaclasses.actor_meta import ActorStackMeta
+from ..core.metaclasses.actor_meta import ActorStackMeta
 
 
 class ActorStack(ABC, metaclass=ActorStackMeta):
@@ -23,4 +23,3 @@ def execute_action(self, action: str, **kwargs) -> bool:
     def validate_state(self) -> bool:
         """Validate actor stack is in valid state."""
         raise NotImplementedError
-
diff --git a/automation_framework/actors/chrome/browser.py b/automation_framework/actors/chrome/browser.py
@@ -6,7 +6,7 @@
 import json
 from pathlib import Path
 from ..base import ActorStack
-from models.pydantic_models import ValidationResult
+from ...models.pydantic_models import ValidationResult
 
 logger = logging.getLogger(__name__)
 
@@ -278,14 +278,14 @@ async def get_tab_info(self) -> List[Dict[str, str]]:
         result = await self._run_apple_script(script)
         return json.loads(result) if result else []
 
-    async def validate_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
+    async def validate_action_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
         """Validate Chrome-specific state."""
         failures = []
         warnings = []
 
         try:
             # Basic state check
-            if not await self.validate_browser_state():
+            if not self.validate_state():
                 failures.append("Browser not in valid state")
 
             # Check expected state if provided
@@ -297,12 +297,14 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di
 
             return ValidationResult(
                 success=len(failures) == 0,
-                failures=failures,
-                warnings=warnings
+                goal_achieved=len(failures) == 0,
+                issues=failures,
+                recommendations=warnings,
             )
         except Exception as e:
             return ValidationResult(
                 success=False,
-                failures=[f"Validation error: {str(e)}"],
-                warnings=[]
+                goal_achieved=False,
+                issues=[f"Validation error: {str(e)}"],
+                recommendations=[],
             )
diff --git a/automation_framework/actors/finder/filesystem.py b/automation_framework/actors/finder/filesystem.py
@@ -7,7 +7,7 @@
 import subprocess
 import json
 
-from models.pydantic_models import ValidationResult
+from ...models.pydantic_models import ValidationResult
 
 logger = logging.getLogger(__name__)
 
@@ -228,7 +228,7 @@ async def _get_selection(self) -> List[Dict[str, Any]]:
         result = await self._run_apple_script(script)
         return json.loads(result) if result else []
 
-    async def validate_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
+    async def validate_action_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
         """Validate Finder-specific state."""
         failures = []
         warnings = []
@@ -247,12 +247,14 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di
 
             return ValidationResult(
                 success=len(failures) == 0,
-                failures=failures,
-                warnings=warnings
+                goal_achieved=len(failures) == 0,
+                issues=failures,
+                recommendations=warnings,
             )
         except Exception as e:
             return ValidationResult(
                 success=False,
-                failures=[f"Validation error: {str(e)}"],
-                warnings=[]
+                goal_achieved=False,
+                issues=[f"Validation error: {str(e)}"],
+                recommendations=[],
             )
diff --git a/automation_framework/actors/generic/mouse_keyboard.py b/automation_framework/actors/generic/mouse_keyboard.py
@@ -10,7 +10,7 @@
 )
 import AppKit
 from ..base import ActorStack
-from models.pydantic_models import ValidationResult
+from ...models.pydantic_models import ValidationResult
 
 logger = logging.getLogger(__name__)
 
@@ -108,7 +108,11 @@ async def execute_action(self, action: str, **kwargs) -> Any:
             logger.error(f"Error executing action {action}: {e}")
             return False
 
-    async def validate_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
+    def validate_state(self) -> bool:
+        """Basic runtime readiness check used by the execution engine."""
+        return self._event_source is not None
+
+    async def validate_action_state(self, current_state: Dict[str, Any], expected_state: Dict[str, Any]) -> ValidationResult:
         """Validate the actor's state."""
         failures = []
         warnings = []
@@ -120,9 +124,8 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di
                 return ValidationResult(
                     success=False,
                     goal_achieved=False,
-                    failures=failures,
-                    warnings=warnings,
-                    details={"event_source": None}
+                    issues=failures,
+                    recommendations=warnings,
                 )
 
             # Get current states
@@ -160,19 +163,17 @@ async def validate_state(self, current_state: Dict[str, Any], expected_state: Di
             return ValidationResult(
                 success=success,
                 goal_achieved=goal_achieved,
-                failures=failures,
-                warnings=warnings,
-                details=details
+                issues=failures,
+                recommendations=warnings,
             )
 
         except Exception as e:
             logger.error(f"Validation error: {e}")
             return ValidationResult(
                 success=False,
                 goal_achieved=False,
-                failures=[f"Validation error: {str(e)}"],
-                warnings=warnings,
-                details={}
+                issues=[f"Validation error: {str(e)}"],
+                recommendations=warnings,
             )
 
     async def _click(self, x: int, y: int) -> bool:
@@ -375,6 +376,8 @@ async def _find_window(self, name: str) -> Optional[Dict[str, Any]]:
     async def _get_screen_bounds(self) -> Tuple[float, float, float, float]:
         """Get main screen bounds."""
         screen = NSScreen.mainScreen()
+        if screen is None:
+            return (0.0, 0.0, 0.0, 0.0)
 
         frame = screen.frame()
         return (frame.origin.x, frame.origin.y, frame.size.width, frame.size.height)

diff --git a/automation_framework/core/config.py b/automation_framework/core/config.py
@@ -17,6 +17,8 @@
 class AutomationConfig:
     """Configuration settings for automation framework."""
     openai_api_key: str = "asdasd"
+    openai_base_url: str = "http://0.0.0.0:8080/v1"
+    llm_provider: str = "openai"
     max_retries: int = 3
     timeout: int = 30
     screenshot_dir: Path = Path("/var/lib/automation/screenshots")
@@ -27,13 +29,18 @@ class AutomationConfig:
     models_dir: Optional[Path] = Path("./models")
     openai_key: str = "asdasd"
     BASE_URL: str = "http://0.0.0.0:8080/v1"
+    mlx_model: str = "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
+    mlx_max_output_tokens: int = 512
+    mlx_temperature: float = 0.2
 
     @classmethod
     def from_env(cls) -> "AutomationConfig":
         """Create config from environment variables."""
         try:
             return cls(
                 openai_api_key=os.getenv("OPENAI_API_KEY", "asdas"),
+                openai_base_url=os.getenv("OPENAI_BASE_URL", "http://0.0.0.0:8080/v1"),
+                llm_provider=os.getenv("LLM_PROVIDER", "openai"),
                 max_retries=int(os.getenv("MAX_RETRIES", "3")),
                 timeout=int(os.getenv("TIMEOUT", "30")),
                 screenshot_dir=Path(os.getenv("SCREENSHOT_DIR", "/var/lib/automation/screenshots")),
@@ -42,6 +49,9 @@ def from_env(cls) -> "AutomationConfig":
                 max_concurrent_actions=int(os.getenv("MAX_CONCURRENT_ACTIONS", "5")),
                 database_url=os.getenv("DATABASE_URL", "sqlite:///./automation.db"),
                 models_dir=Path(os.getenv("MODELS_DIR", "./models")),
+                mlx_model=os.getenv("MLX_MODEL", "mlx-community/Meta-Llama-3-8B-Instruct-4bit"),
+                mlx_max_output_tokens=int(os.getenv("MLX_MAX_OUTPUT_TOKENS", "512")),
+                mlx_temperature=float(os.getenv("MLX_TEMPERATURE", "0.2")),
             )
         except (ValueError, TypeError) as e:
             logger.error(f"Invalid environment variable: {e}")
@@ -59,6 +69,8 @@ def from_yaml(cls, path: Path) -> "AutomationConfig":
 
             return cls(
                 openai_api_key=data.get("openai_api_key"),
+                openai_base_url=data.get("openai_base_url", "http://0.0.0.0:8080/v1"),
+                llm_provider=data.get("llm_provider", "openai"),
                 max_retries=data.get("max_retries", 3),
                 timeout=data.get("timeout", 30),
                 screenshot_dir=Path(data.get("screenshot_dir", "/var/lib/automation/screenshots")),
@@ -67,6 +79,9 @@ def from_yaml(cls, path: Path) -> "AutomationConfig":
                 max_concurrent_actions=data.get("max_concurrent_actions", 5),
                 database_url=data.get("database_url", "sqlite:///./automation.db"),
                 models_dir=Path(data.get("models_dir", "./models")),
+                mlx_model=data.get("mlx_model", "mlx-community/Meta-Llama-3-8B-Instruct-4bit"),
+                mlx_max_output_tokens=data.get("mlx_max_output_tokens", 512),
+                mlx_temperature=data.get("mlx_temperature", 0.2),
             )
         except (ValueError, TypeError, AttributeError) as e:
             logger.error(f"Invalid config file format: {e}")

diff --git a/automation_framework/core/metaclasses/actor_meta.py b/automation_framework/core/metaclasses/actor_meta.py
@@ -20,8 +20,13 @@ def __new__(mcs, name, bases, namespace):
         if name == 'ActorStack':
             return cls
 
-        # Register concrete actor stack
+        # Register concrete actor stack by both class name and actor logical name.
+        # Runtime code resolves actors by logical name ("chrome", "finder", etc.),
+        # while some tests assert class-name registration.
         mcs._registry[name] = cls
+        actor_name = getattr(cls, "name", None)
+        if isinstance(actor_name, str) and actor_name:
+            mcs._registry[actor_name] = cls
 
         # Validate required methods
         required_methods = {'get_capabilities', 'execute_action', 'validate_state'}

diff --git a/automation_framework/main.py b/automation_framework/main.py
@@ -7,17 +7,17 @@
 from rich.console import Console
 from rich.logging import RichHandler
 
-from services.state.manager import StateManager
-from core.metaclasses.actor_meta import ActorStackMeta
-from actors.generic.mouse_keyboard import GenericActorStack
-from actors.chrome.browser import ChromeActorStack
-from actors.finder.filesystem import FinderActorStack
-from services.instruction.processor import InstructionProcessor
-from services.orchestration.orchestrator import Orchestrator
-from core.config import AutomationConfig
-from services.macos_ui_service.macos_ui_service import MacOSUIService
+from .services.state.manager import StateManager
+from .core.metaclasses.actor_meta import ActorStackMeta
+from .actors.generic.mouse_keyboard import GenericActorStack
+from .actors.chrome.browser import ChromeActorStack
+from .actors.finder.filesystem import FinderActorStack
+from .services.instruction.processor import InstructionProcessor
+from .services.orchestration.orchestrator import Orchestrator
+from .core.config import AutomationConfig
+from .services.macos_ui_service.macos_ui_service import MacOSUIService
 import rich
-from utils.decorators import log_execution
+from .utils.decorators import log_execution
 
 # Setup logging
 logging.basicConfig(