add Lua sandbox with timer system (milestones 1-5 complete)

2026-01-18 14:28:44 +01:00
parent 2c36ac005d
commit a4ecb0f132
36 changed files with 10884 additions and 0 deletions
--- a/sandbox-test/CMakeLists.txt
+++ b/sandbox-test/CMakeLists.txt
@@ -0,0 +1,54 @@
+cmake_minimum_required(VERSION 3.22.1)
+project(sandbox-test)
+
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Find dependencies via vcpkg
+find_package(Lua REQUIRED)
+find_package(nlohmann_json CONFIG REQUIRED)
+
+# Sandbox library (the code being tested)
+add_library(mosis-sandbox STATIC
+    ../src/main/cpp/sandbox/lua_sandbox.cpp
+    ../src/main/cpp/sandbox/permission_gate.cpp
+    ../src/main/cpp/sandbox/audit_log.cpp
+    ../src/main/cpp/sandbox/rate_limiter.cpp
+    ../src/main/cpp/sandbox/path_sandbox.cpp
+    ../src/main/cpp/sandbox/timer_manager.cpp
+)
+target_include_directories(mosis-sandbox PUBLIC
+    ../src/main/cpp/sandbox
+    ${LUA_INCLUDE_DIR}
+)
+target_link_libraries(mosis-sandbox PUBLIC
+    ${LUA_LIBRARIES}
+)
+
+# Test executable
+add_executable(sandbox-test
+    src/main.cpp
+    src/test_harness.cpp
+)
+
+target_include_directories(sandbox-test PRIVATE
+    src
+    ../src/main/cpp/sandbox
+)
+
+target_link_libraries(sandbox-test PRIVATE
+    mosis-sandbox
+    nlohmann_json::nlohmann_json
+)
+
+# Copy test scripts to build directory
+add_custom_command(TARGET sandbox-test POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_directory
+        ${CMAKE_CURRENT_SOURCE_DIR}/scripts
+        $<TARGET_FILE_DIR:sandbox-test>/scripts
+)
+
+# Windows-specific
+if(WIN32)
+    target_compile_definitions(sandbox-test PRIVATE _CRT_SECURE_NO_WARNINGS)
+endif()
--- a/sandbox-test/README.md
+++ b/sandbox-test/README.md
@@ -0,0 +1,132 @@
+# Sandbox Security Tests
+
+Automated tests for the Mosis Lua sandbox security implementation.
+
+## Prerequisites
+
+- CMake 3.22+
+- vcpkg with packages: `lua`, `nlohmann-json`
+- MSVC or compatible C++23 compiler
+
+## Build
+
+```bash
+# From sandbox-test directory
+cd D:\Dev\Mosis\MosisService\sandbox-test
+
+# Configure with vcpkg
+cmake -B build -DCMAKE_TOOLCHAIN_FILE=%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake
+
+# Build
+cmake --build build --config Debug
+```
+
+## Run Tests
+
+### Run All Tests (Uber Command)
+
+```bash
+# Windows
+.\run_tests.bat
+
+# Or directly
+.\build\Debug\sandbox-test.exe
+```
+
+### Run Specific Test
+
+```bash
+.\build\Debug\sandbox-test.exe --test DangerousGlobals
+.\build\Debug\sandbox-test.exe --test Memory
+.\build\Debug\sandbox-test.exe --test CPU
+```
+
+### Custom Output File
+
+```bash
+.\build\Debug\sandbox-test.exe --output my_results.json
+```
+
+## Test List
+
+| Test Name | Description | Script |
+|-----------|-------------|--------|
+| `DangerousGlobalsRemoved` | Verifies os, io, debug, etc. are nil | `test_globals_removed.lua` |
+| `BytecodeRejected` | Verifies binary Lua chunks are rejected | (C++ only) |
+| `MemoryLimitEnforced` | Verifies memory allocation limit works | `test_memory_limit.lua` |
+| `CPULimitEnforced` | Verifies instruction count limit works | `test_cpu_limit.lua` |
+| `MetatableProtected` | Verifies _G and string metatable are frozen | `test_metatable_protected.lua` |
+| `SafeOperationsWork` | Verifies normal Lua operations still work | `test_safe_operations.lua` |
+| `StringDumpRemoved` | Verifies string.dump is nil | `test_string_dump_removed.lua` |
+| `MemoryTracking` | Verifies memory usage is tracked | (C++ only) |
+| `InstructionCounting` | Verifies instruction count is tracked | (C++ only) |
+| `MultipleLoads` | Verifies multiple scripts can be loaded | (C++ only) |
+| `ErrorRecovery` | Verifies sandbox recovers from errors | (C++ only) |
+
+## Output Format
+
+Tests produce a JSON report at `test_results.json`:
+
+```json
+{
+  "name": "Lua Sandbox Security Tests",
+  "timestamp": "2024-01-15T10:30:00Z",
+  "summary": {
+    "passed": 11,
+    "failed": 0,
+    "total": 11
+  },
+  "tests": [
+    {
+      "name": "DangerousGlobalsRemoved",
+      "status": "passed",
+      "duration_ms": 5
+    }
+  ]
+}
+```
+
+## Exit Codes
+
+- `0` - All tests passed
+- `1` - One or more tests failed
+
+## Adding New Tests
+
+1. Create Lua script in `scripts/` directory
+2. Add C++ test function in `main.cpp`:
+   ```cpp
+   bool Test_MyNewTest(std::string& error_msg) {
+       LuaSandbox sandbox(TestContext());
+       // ... test logic
+       return true;
+   }
+   ```
+3. Register in `main()`:
+   ```cpp
+   harness.AddTest("MyNewTest", Test_MyNewTest);
+   ```
+
+## Debugging Failed Tests
+
+1. Run specific test: `--test TestName`
+2. Check Lua script in `scripts/` for expected behavior
+3. Check `test_results.json` for error details
+4. Add print statements to Lua scripts (output goes to console)
+
+## CI Integration
+
+```bash
+# In CI script
+cd sandbox-test
+cmake -B build -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake
+cmake --build build --config Release
+./build/Release/sandbox-test.exe --output ci_results.json
+
+# Check exit code
+if [ $? -ne 0 ]; then
+    echo "Sandbox tests failed!"
+    cat ci_results.json
+    exit 1
+fi
+```
--- a/sandbox-test/run_tests.bat
+++ b/sandbox-test/run_tests.bat
@@ -0,0 +1,59 @@
+@echo off
+setlocal
+
+echo ========================================
+echo    MOSIS SANDBOX TEST RUNNER
+echo ========================================
+echo.
+
+REM Check if build exists
+if not exist "build\Debug\sandbox-test.exe" (
+    echo Build not found. Building...
+    echo.
+
+    REM Check VCPKG_ROOT
+    if "%VCPKG_ROOT%"=="" (
+        echo ERROR: VCPKG_ROOT environment variable not set
+        exit /b 1
+    )
+
+    REM Configure
+    echo Configuring CMake...
+    cmake -B build -DCMAKE_TOOLCHAIN_FILE=%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake
+    if errorlevel 1 (
+        echo ERROR: CMake configure failed
+        exit /b 1
+    )
+
+    REM Build
+    echo Building...
+    cmake --build build --config Debug
+    if errorlevel 1 (
+        echo ERROR: Build failed
+        exit /b 1
+    )
+    echo.
+)
+
+REM Run tests
+echo Running tests...
+echo.
+
+cd build\Debug
+sandbox-test.exe %*
+set TEST_RESULT=%errorlevel%
+
+cd ..\..
+
+echo.
+if %TEST_RESULT% equ 0 (
+    echo ========================================
+    echo    ALL TESTS PASSED
+    echo ========================================
+) else (
+    echo ========================================
+    echo    SOME TESTS FAILED
+    echo ========================================
+)
+
+exit /b %TEST_RESULT%
--- a/sandbox-test/run_tests.ps1
+++ b/sandbox-test/run_tests.ps1
@@ -0,0 +1,67 @@
+#!/usr/bin/env pwsh
+# Sandbox Test Runner for PowerShell
+
+Write-Host "========================================"
+Write-Host "    MOSIS SANDBOX TEST RUNNER"
+Write-Host "========================================"
+Write-Host ""
+
+$ErrorActionPreference = "Stop"
+$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
+Push-Location $scriptDir
+
+try {
+    # Check if build exists
+    if (-not (Test-Path "build/Debug/sandbox-test.exe")) {
+        Write-Host "Build not found. Building..."
+        Write-Host ""
+
+        # Check VCPKG_ROOT
+        if (-not $env:VCPKG_ROOT) {
+            Write-Error "VCPKG_ROOT environment variable not set"
+            exit 1
+        }
+
+        # Configure
+        Write-Host "Configuring CMake..."
+        cmake -B build "-DCMAKE_TOOLCHAIN_FILE=$env:VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake"
+        if ($LASTEXITCODE -ne 0) {
+            Write-Error "CMake configure failed"
+            exit 1
+        }
+
+        # Build
+        Write-Host "Building..."
+        cmake --build build --config Debug
+        if ($LASTEXITCODE -ne 0) {
+            Write-Error "Build failed"
+            exit 1
+        }
+        Write-Host ""
+    }
+
+    # Run tests
+    Write-Host "Running tests..."
+    Write-Host ""
+
+    Push-Location "build/Debug"
+    & ./sandbox-test.exe @args
+    $testResult = $LASTEXITCODE
+    Pop-Location
+
+    Write-Host ""
+    if ($testResult -eq 0) {
+        Write-Host "========================================"
+        Write-Host "    ALL TESTS PASSED"
+        Write-Host "========================================"
+    } else {
+        Write-Host "========================================"
+        Write-Host "    SOME TESTS FAILED"
+        Write-Host "========================================"
+    }
+
+    exit $testResult
+}
+finally {
+    Pop-Location
+}
--- a/sandbox-test/scripts/scripts/test_module.lua
+++ b/sandbox-test/scripts/scripts/test_module.lua
@@ -0,0 +1,11 @@
+-- Test module for SafeRequire tests
+local M = {}
+
+M.value = 42
+M.name = "test_module"
+
+function M.add(a, b)
+    return a + b
+end
+
+return M
--- a/sandbox-test/scripts/test_bytecode_rejected.lua
+++ b/sandbox-test/scripts/test_bytecode_rejected.lua
@@ -0,0 +1,5 @@
+-- This script tests that text loading works
+-- The actual bytecode rejection test is done from C++ side
+-- by attempting to load a bytecode string directly
+
+print("PASS: Text loading works")
--- a/sandbox-test/scripts/test_cpu_limit.lua
+++ b/sandbox-test/scripts/test_cpu_limit.lua
@@ -0,0 +1,12 @@
+-- This script runs an infinite loop
+-- It should be stopped by the instruction limit hook
+
+local count = 0
+
+while true do
+    count = count + 1
+    -- This loop should be interrupted by instruction hook
+end
+
+-- Should never reach here
+error("FAIL: CPU limit not enforced - loop completed")
--- a/sandbox-test/scripts/test_globals_removed.lua
+++ b/sandbox-test/scripts/test_globals_removed.lua
@@ -0,0 +1,26 @@
+-- Test that dangerous globals are nil
+-- This script should run successfully if sandbox is properly configured
+-- Note: 'require' is intentionally NOT in this list because the sandbox
+-- provides a safe version when app_path is configured
+
+local dangerous = {
+    "os", "io", "debug", "package", "ffi", "jit",
+    "dofile", "loadfile", "load", "loadstring",
+    "rawget", "rawset", "rawequal", "rawlen",
+    "collectgarbage", "newproxy"
+}
+
+local failed = {}
+
+for _, name in ipairs(dangerous) do
+    local value = _G[name]
+    if value ~= nil then
+        table.insert(failed, name .. " (is " .. type(value) .. ")")
+    end
+end
+
+if #failed > 0 then
+    error("FAIL: These globals should be nil: " .. table.concat(failed, ", "))
+end
+
+print("PASS: All dangerous globals removed")
--- a/sandbox-test/scripts/test_memory_limit.lua
+++ b/sandbox-test/scripts/test_memory_limit.lua
@@ -0,0 +1,20 @@
+-- This script intentionally tries to exhaust memory
+-- When run with a 512KB limit, it should fail before completing
+
+local t = {}
+local i = 0
+
+while true do
+    i = i + 1
+    -- Each string is 100KB
+    t[i] = string.rep("x", 100000)
+
+    -- Safety check - if we get past 100 iterations with 512KB limit,
+    -- something is wrong
+    if i > 100 then
+        error("FAIL: Should have hit memory limit by now (allocated ~10MB)")
+    end
+end
+
+-- Should never reach here
+error("FAIL: Memory limit not enforced")
--- a/sandbox-test/scripts/test_metatable_protected.lua
+++ b/sandbox-test/scripts/test_metatable_protected.lua
@@ -0,0 +1,33 @@
+-- Test that metatables are protected from manipulation
+
+-- Test 1: String metatable should return protection value, not actual metatable
+local mt = getmetatable("")
+if mt ~= "string" then
+    error("FAIL: string metatable should return 'string', got " .. tostring(mt))
+end
+
+-- Test 2: Cannot add new globals
+local ok, err = pcall(function()
+    _G.my_new_global = "test"
+end)
+if ok then
+    error("FAIL: Should not be able to add new globals")
+end
+
+-- Test 3: Cannot modify existing globals
+local ok2, err2 = pcall(function()
+    _G.print = nil
+end)
+if ok2 then
+    error("FAIL: Should not be able to modify print")
+end
+
+-- Test 4: Cannot replace math table
+local ok3, err3 = pcall(function()
+    _G.math = {}
+end)
+if ok3 then
+    error("FAIL: Should not be able to replace math")
+end
+
+print("PASS: Metatables protected")
--- a/sandbox-test/scripts/test_safe_operations.lua
+++ b/sandbox-test/scripts/test_safe_operations.lua
@@ -0,0 +1,158 @@
+-- Test that safe/normal Lua operations still work correctly
+
+local function check(cond, msg)
+    if not cond then
+        error("FAIL: " .. msg)
+    end
+end
+
+-- ============================================
+-- MATH OPERATIONS
+-- ============================================
+local x = math.sin(1.5) + math.floor(3.7)
+check(type(x) == "number", "Math operations failed")
+
+check(math.abs(-5) == 5, "math.abs failed")
+check(math.max(1, 2, 3) == 3, "math.max failed")
+check(math.min(1, 2, 3) == 1, "math.min failed")
+check(math.floor(3.9) == 3, "math.floor failed")
+check(math.ceil(3.1) == 4, "math.ceil failed")
+
+-- ============================================
+-- STRING OPERATIONS
+-- ============================================
+local s = string.format("hello %d", 42)
+check(s == "hello 42", "string.format failed")
+
+local upper = string.upper("test")
+check(upper == "TEST", "string.upper failed")
+
+local lower = string.lower("TEST")
+check(lower == "test", "string.lower failed")
+
+local sub = string.sub("hello", 2, 4)
+check(sub == "ell", "string.sub failed")
+
+local len = string.len("hello")
+check(len == 5, "string.len failed")
+
+local rep = string.rep("ab", 3)
+check(rep == "ababab", "string.rep failed")
+
+local rev = string.reverse("hello")
+check(rev == "olleh", "string.reverse failed")
+
+-- ============================================
+-- TABLE OPERATIONS
+-- ============================================
+local t = {1, 2, 3}
+table.insert(t, 4)
+check(#t == 4, "table.insert failed")
+check(t[4] == 4, "table.insert value failed")
+
+local removed = table.remove(t)
+check(removed == 4, "table.remove failed")
+check(#t == 3, "table.remove length failed")
+
+local t2 = {3, 1, 2}
+table.sort(t2)
+check(t2[1] == 1 and t2[2] == 2 and t2[3] == 3, "table.sort failed")
+
+local concat = table.concat({"a", "b", "c"}, ",")
+check(concat == "a,b,c", "table.concat failed")
+
+-- ============================================
+-- ITERATION
+-- ============================================
+local count = 0
+for i, v in ipairs({1, 2, 3, 4}) do
+    count = count + 1
+end
+check(count == 4, "ipairs iteration failed")
+
+count = 0
+for k, v in pairs({a=1, b=2, c=3}) do
+    count = count + 1
+end
+check(count == 3, "pairs iteration failed")
+
+-- next function
+local t3 = {a=1, b=2}
+local k, v = next(t3)
+check(k ~= nil and v ~= nil, "next function failed")
+
+-- ============================================
+-- ERROR HANDLING
+-- ============================================
+local ok, err = pcall(function()
+    error("test error")
+end)
+check(not ok, "pcall should return false for error")
+check(err:find("test error"), "Error message should contain 'test error'")
+
+local ok2, result = pcall(function()
+    return 42
+end)
+check(ok2 and result == 42, "pcall should return success value")
+
+-- xpcall with traceback
+local ok3, err3 = xpcall(function()
+    error("xpcall test")
+end, function(e)
+    return "caught: " .. tostring(e)
+end)
+check(not ok3, "xpcall should return false for error")
+check(err3:find("caught"), "xpcall error handler should run")
+
+-- ============================================
+-- TYPE CHECKS
+-- ============================================
+check(type({}) == "table", "type table failed")
+check(type("") == "string", "type string failed")
+check(type(123) == "number", "type number failed")
+check(type(true) == "boolean", "type boolean failed")
+check(type(nil) == "nil", "type nil failed")
+check(type(function() end) == "function", "type function failed")
+
+-- ============================================
+-- CONVERSION
+-- ============================================
+check(tonumber("42") == 42, "tonumber string failed")
+check(tonumber("3.14") == 3.14, "tonumber float failed")
+check(tonumber("abc") == nil, "tonumber invalid failed")
+check(tonumber(42) == 42, "tonumber number failed")
+
+check(tostring(42) == "42", "tostring number failed")
+check(tostring(true) == "true", "tostring boolean failed")
+check(type(tostring({})) == "string", "tostring table failed")
+
+-- ============================================
+-- SELECT
+-- ============================================
+local a, b = select(2, 1, 2, 3)
+check(a == 2 and b == 3, "select failed")
+check(select("#", 1, 2, 3, 4) == 4, "select # failed")
+
+-- ============================================
+-- ASSERT
+-- ============================================
+local ok4, err4 = pcall(function()
+    assert(true, "should not fail")
+end)
+check(ok4, "assert true failed")
+
+local ok5, err5 = pcall(function()
+    assert(false, "intentional fail")
+end)
+check(not ok5, "assert false should fail")
+check(err5:find("intentional fail"), "assert message wrong")
+
+-- ============================================
+-- UTF8 (if available)
+-- ============================================
+if utf8 then
+    local len = utf8.len("hello")
+    check(len == 5, "utf8.len failed")
+end
+
+print("PASS: All safe operations work correctly")
--- a/sandbox-test/scripts/test_string_dump_removed.lua
+++ b/sandbox-test/scripts/test_string_dump_removed.lua
@@ -0,0 +1,18 @@
+-- Test that string.dump is removed
+-- string.dump can be used to create bytecode from functions,
+-- which could be used to bypass sandbox restrictions
+
+if string.dump ~= nil then
+    error("FAIL: string.dump should be nil but exists")
+end
+
+-- Also verify string table exists and other functions work
+if string.upper == nil then
+    error("FAIL: string.upper should exist")
+end
+
+if string.format == nil then
+    error("FAIL: string.format should exist")
+end
+
+print("PASS: string.dump removed, other string functions intact")
--- a/sandbox-test/src/main.cpp
+++ b/sandbox-test/src/main.cpp
@@ -0,0 +1,984 @@
+#include "test_harness.h"
+#include "lua_sandbox.h"
+#include "permission_gate.h"
+#include <lua.hpp>
+#include <iostream>
+#include "audit_log.h"
+#include "rate_limiter.h"
+#include "path_sandbox.h"
+#include "timer_manager.h"
+#include <filesystem>
+#include <fstream>
+#include <sstream>
+#include <thread>
+#include <chrono>
+
+// Get path to scripts directory
+std::string GetScriptsDir() {
+    // Scripts are copied to build directory by CMake
+    return "scripts";
+}
+
+// Helper to create test context
+SandboxContext TestContext() {
+    return SandboxContext{
+        .app_id = "test.app",
+        .app_path = ".",
+        .permissions = {},
+        .is_system_app = false
+    };
+}
+
+// Helper to read file contents
+std::string ReadFile(const std::string& path) {
+    std::ifstream f(path);
+    if (!f) return "";
+    std::stringstream ss;
+    ss << f.rdbuf();
+    return ss.str();
+}
+
+// Helper to setup a _test table in real _G for timer tests
+// This allows test scripts to store state without triggering the proxy's __newindex
+void SetupTestTable(lua_State* L) {
+    lua_rawgeti(L, LUA_REGISTRYINDEX, LUA_RIDX_GLOBALS);
+    if (lua_getmetatable(L, -1)) {
+        lua_getfield(L, -1, "__index");
+        if (lua_istable(L, -1)) {
+            // Found real _G through proxy's __index
+            lua_newtable(L);  // Create _test table
+            lua_setfield(L, -2, "_test");
+            lua_pop(L, 3);  // pop real _G, metatable, proxy
+            return;
+        }
+        lua_pop(L, 2);  // pop __index, metatable
+    }
+    // No proxy, use directly
+    lua_newtable(L);
+    lua_setfield(L, -2, "_test");
+    lua_pop(L, 1);  // pop _G
+}
+
+//=============================================================================
+// TEST DEFINITIONS
+//=============================================================================
+
+bool Test_DangerousGlobalsRemoved(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+    std::string script = ReadFile(GetScriptsDir() + "/test_globals_removed.lua");
+    EXPECT_FALSE(script.empty());
+    EXPECT_TRUE(sandbox.LoadString(script, "test_globals_removed.lua"));
+    return true;
+}
+
+bool Test_BytecodeRejected(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+
+    // Lua 5.4 bytecode signature
+    std::string bytecode = "\x1bLua\x54\x00\x19\x93\r\n\x1a\n";
+
+    EXPECT_FALSE(sandbox.LoadString(bytecode, "bytecode_test"));
+
+    // Error should mention binary/bytecode
+    std::string err = sandbox.GetLastError();
+    bool mentions_binary = (err.find("binary") != std::string::npos ||
+                           err.find("attempt to load") != std::string::npos ||
+                           err.find("text") != std::string::npos);
+    EXPECT_TRUE(mentions_binary);
+
+    return true;
+}
+
+bool Test_MemoryLimitEnforced(std::string& error_msg) {
+    SandboxLimits limits;
+    limits.memory_bytes = 512 * 1024;  // 512 KB - very small
+
+    LuaSandbox sandbox(TestContext(), limits);
+
+    std::string script = ReadFile(GetScriptsDir() + "/test_memory_limit.lua");
+    EXPECT_FALSE(script.empty());
+
+    // Should fail due to memory exhaustion
+    EXPECT_FALSE(sandbox.LoadString(script, "test_memory_limit.lua"));
+
+    return true;
+}
+
+bool Test_CPULimitEnforced(std::string& error_msg) {
+    SandboxLimits limits;
+    limits.instructions_per_call = 10000;  // Very low
+
+    LuaSandbox sandbox(TestContext(), limits);
+
+    std::string script = ReadFile(GetScriptsDir() + "/test_cpu_limit.lua");
+    EXPECT_FALSE(script.empty());
+
+    // Should fail due to instruction limit
+    EXPECT_FALSE(sandbox.LoadString(script, "test_cpu_limit.lua"));
+
+    // Error should mention instructions
+    std::string err = sandbox.GetLastError();
+    EXPECT_CONTAINS(err, "instruction");
+
+    return true;
+}
+
+bool Test_MetatableProtected(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+    std::string script = ReadFile(GetScriptsDir() + "/test_metatable_protected.lua");
+    EXPECT_FALSE(script.empty());
+    if (!sandbox.LoadString(script, "test_metatable_protected.lua")) {
+        error_msg = "Script failed: " + sandbox.GetLastError();
+        return false;
+    }
+    return true;
+}
+
+bool Test_SafeOperationsWork(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+    std::string script = ReadFile(GetScriptsDir() + "/test_safe_operations.lua");
+    EXPECT_FALSE(script.empty());
+    EXPECT_TRUE(sandbox.LoadString(script, "test_safe_operations.lua"));
+    return true;
+}
+
+bool Test_StringDumpRemoved(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+    std::string script = ReadFile(GetScriptsDir() + "/test_string_dump_removed.lua");
+    EXPECT_FALSE(script.empty());
+    EXPECT_TRUE(sandbox.LoadString(script, "test_string_dump_removed.lua"));
+    return true;
+}
+
+bool Test_MemoryTracking(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+
+    // Initially should have some baseline memory
+    size_t initial = sandbox.GetMemoryUsed();
+    EXPECT_TRUE(initial > 0);
+
+    // Allocate some data
+    sandbox.LoadString("local t = {}; for i=1,1000 do t[i] = string.rep('x', 100) end", "alloc");
+
+    // Memory should have increased
+    size_t after = sandbox.GetMemoryUsed();
+    EXPECT_TRUE(after > initial);
+
+    return true;
+}
+
+bool Test_InstructionCounting(std::string& error_msg) {
+    SandboxLimits limits;
+    limits.instructions_per_call = 1000000;  // 1M instructions
+
+    LuaSandbox sandbox(TestContext(), limits);
+
+    // Run some code
+    sandbox.LoadString("for i=1,10000 do local x = i * 2 end", "counting");
+
+    // Should have used some instructions
+    EXPECT_TRUE(sandbox.GetInstructionsUsed() > 0);
+
+    return true;
+}
+
+bool Test_MultipleLoads(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+
+    // Should be able to load multiple scripts
+    EXPECT_TRUE(sandbox.LoadString("local a = 1", "script1"));
+    EXPECT_TRUE(sandbox.LoadString("local b = 2", "script2"));
+    EXPECT_TRUE(sandbox.LoadString("local c = 3", "script3"));
+
+    return true;
+}
+
+bool Test_ErrorRecovery(std::string& error_msg) {
+    LuaSandbox sandbox(TestContext());
+
+    // Script with error
+    EXPECT_FALSE(sandbox.LoadString("error('test error')", "error_script"));
+
+    // Should still be able to run more code after error
+    EXPECT_TRUE(sandbox.LoadString("local x = 1", "after_error"));
+
+    return true;
+}
+
+//=============================================================================
+// PERMISSION SYSTEM TESTS (Milestone 2)
+//=============================================================================
+
+bool Test_NormalPermissionAutoGranted(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    ctx.permissions = {"internet", "vibrate"};  // Declare normal permissions
+
+    mosis::PermissionGate gate(ctx);
+
+    // Normal permissions should be auto-granted when declared
+    EXPECT_TRUE(gate.HasPermission("internet"));
+    EXPECT_TRUE(gate.HasPermission("vibrate"));
+
+    return true;
+}
+
+bool Test_DangerousPermissionRequiresGrant(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    ctx.permissions = {"camera"};  // Declare dangerous permission
+
+    mosis::PermissionGate gate(ctx);
+
+    // Not granted yet (regular app)
+    EXPECT_FALSE(gate.HasPermission("camera"));
+
+    // Grant at runtime
+    gate.GrantPermission("camera");
+
+    // Now should have it
+    EXPECT_TRUE(gate.HasPermission("camera"));
+
+    // Revoke
+    gate.RevokePermission("camera");
+    EXPECT_FALSE(gate.HasPermission("camera"));
+
+    return true;
+}
+
+bool Test_SignaturePermissionSystemOnly(std::string& error_msg) {
+    // Non-system app
+    SandboxContext ctx = TestContext();
+    ctx.permissions = {"system.settings"};
+    ctx.is_system_app = false;
+
+    mosis::PermissionGate gate(ctx);
+    EXPECT_FALSE(gate.HasPermission("system.settings"));
+
+    // System app
+    SandboxContext sys_ctx = TestContext();
+    sys_ctx.permissions = {"system.settings"};
+    sys_ctx.is_system_app = true;
+
+    mosis::PermissionGate sys_gate(sys_ctx);
+    EXPECT_TRUE(sys_gate.HasPermission("system.settings"));
+
+    return true;
+}
+
+bool Test_UserGestureTracking(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    mosis::PermissionGate gate(ctx);
+
+    // No recent gesture
+    EXPECT_FALSE(gate.HasRecentUserGesture(5000));
+
+    // Record gesture
+    gate.RecordUserGesture();
+
+    // Should have recent gesture
+    EXPECT_TRUE(gate.HasRecentUserGesture(5000));
+
+    // Wait for gesture to expire (use short window)
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    EXPECT_FALSE(gate.HasRecentUserGesture(50));  // 50ms window, we waited 100ms
+
+    return true;
+}
+
+bool Test_UndeclaredPermissionDenied(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    ctx.permissions = {};  // No permissions declared
+
+    mosis::PermissionGate gate(ctx);
+
+    // Even normal permissions need to be declared
+    EXPECT_FALSE(gate.HasPermission("internet"));
+
+    // Dangerous permissions also denied
+    EXPECT_FALSE(gate.HasPermission("camera"));
+
+    return true;
+}
+
+bool Test_SystemAppGetsDangerousAuto(std::string& error_msg) {
+    // System apps get dangerous permissions automatically (no runtime grant needed)
+    SandboxContext ctx = TestContext();
+    ctx.permissions = {"camera", "microphone"};
+    ctx.is_system_app = true;
+
+    mosis::PermissionGate gate(ctx);
+
+    // System app should have dangerous perms without explicit grant
+    EXPECT_TRUE(gate.HasPermission("camera"));
+    EXPECT_TRUE(gate.HasPermission("microphone"));
+
+    return true;
+}
+
+bool Test_PermissionCategoryCheck(std::string& error_msg) {
+    // Check that permission categories are correct
+    EXPECT_TRUE(mosis::PermissionGate::GetCategory("internet") == mosis::PermissionCategory::Normal);
+    EXPECT_TRUE(mosis::PermissionGate::GetCategory("camera") == mosis::PermissionCategory::Dangerous);
+    EXPECT_TRUE(mosis::PermissionGate::GetCategory("system.settings") == mosis::PermissionCategory::Signature);
+
+    // Unknown permissions default to Dangerous
+    EXPECT_TRUE(mosis::PermissionGate::GetCategory("unknown.perm") == mosis::PermissionCategory::Dangerous);
+
+    return true;
+}
+
+//=============================================================================
+// AUDIT LOG TESTS (Milestone 3)
+//=============================================================================
+
+bool Test_AuditLogBasic(std::string& error_msg) {
+    mosis::AuditLog log(1000);
+
+    log.Log(mosis::AuditEvent::AppStart, "test.app", "App started");
+    log.Log(mosis::AuditEvent::PermissionCheck, "test.app", "camera", true);
+    log.Log(mosis::AuditEvent::PermissionDenied, "test.app", "microphone", false);
+
+    auto entries = log.GetEntries(10);
+    EXPECT_TRUE(entries.size() == 3);
+
+    auto app_entries = log.GetEntriesForApp("test.app", 10);
+    EXPECT_TRUE(app_entries.size() == 3);
+
+    // Check event filtering
+    auto denied_entries = log.GetEntriesByEvent(mosis::AuditEvent::PermissionDenied, 10);
+    EXPECT_TRUE(denied_entries.size() == 1);
+
+    return true;
+}
+
+bool Test_AuditLogRingBuffer(std::string& error_msg) {
+    mosis::AuditLog log(100);  // Small buffer
+
+    // Log more than capacity
+    for (int i = 0; i < 200; i++) {
+        log.Log(mosis::AuditEvent::Custom, "test.app", std::to_string(i));
+    }
+
+    // Should only have latest 100 stored
+    auto entries = log.GetEntries(200);
+    EXPECT_TRUE(entries.size() == 100);
+
+    // Total logged should be 200
+    EXPECT_TRUE(log.GetTotalEntries() == 200);
+
+    // Most recent should be "199"
+    EXPECT_TRUE(entries[0].details == "199");
+
+    return true;
+}
+
+bool Test_AuditLogThreadSafe(std::string& error_msg) {
+    mosis::AuditLog log(10000);
+
+    // Spawn multiple threads logging concurrently
+    std::vector<std::thread> threads;
+    for (int t = 0; t < 4; t++) {
+        threads.emplace_back([&log, t]() {
+            for (int i = 0; i < 1000; i++) {
+                log.Log(mosis::AuditEvent::Custom, "app" + std::to_string(t), std::to_string(i));
+            }
+        });
+    }
+
+    for (auto& thread : threads) {
+        thread.join();
+    }
+
+    // Should have logged 4000 entries
+    EXPECT_TRUE(log.GetTotalEntries() == 4000);
+
+    return true;
+}
+
+//=============================================================================
+// RATE LIMITER TESTS (Milestone 3)
+//=============================================================================
+
+bool Test_RateLimiterBasic(std::string& error_msg) {
+    mosis::RateLimiter limiter;
+
+    // Should succeed initially (has tokens)
+    EXPECT_TRUE(limiter.Check("test.app", "network.request"));
+
+    return true;
+}
+
+bool Test_RateLimiterExhaustion(std::string& error_msg) {
+    mosis::RateLimiter limiter;
+    limiter.SetLimit("test.op", {0.0, 5.0});  // 5 tokens, no refill
+
+    // Use all tokens
+    for (int i = 0; i < 5; i++) {
+        EXPECT_TRUE(limiter.Check("test.app", "test.op"));
+    }
+
+    // Should be denied now
+    EXPECT_FALSE(limiter.Check("test.app", "test.op"));
+
+    return true;
+}
+
+bool Test_RateLimiterRefill(std::string& error_msg) {
+    mosis::RateLimiter limiter;
+    limiter.SetLimit("test.op", {1000.0, 1.0});  // 1000/sec, max 1 token
+
+    // Use the token
+    EXPECT_TRUE(limiter.Check("test.app", "test.op"));
+    EXPECT_FALSE(limiter.Check("test.app", "test.op"));
+
+    // Wait a bit for refill (2ms = ~2 tokens at 1000/sec, but max is 1)
+    std::this_thread::sleep_for(std::chrono::milliseconds(5));
+
+    // Should have token again
+    EXPECT_TRUE(limiter.Check("test.app", "test.op"));
+
+    return true;
+}
+
+bool Test_RateLimiterAppIsolation(std::string& error_msg) {
+    mosis::RateLimiter limiter;
+    limiter.SetLimit("test.op", {0.0, 1.0});  // 1 token, no refill
+
+    // App 1 uses its token
+    EXPECT_TRUE(limiter.Check("app1", "test.op"));
+    EXPECT_FALSE(limiter.Check("app1", "test.op"));
+
+    // App 2 should still have its token
+    EXPECT_TRUE(limiter.Check("app2", "test.op"));
+
+    return true;
+}
+
+bool Test_RateLimiterReset(std::string& error_msg) {
+    mosis::RateLimiter limiter;
+    limiter.SetLimit("test.op", {0.0, 2.0});  // 2 tokens, no refill
+
+    // Use all tokens
+    EXPECT_TRUE(limiter.Check("test.app", "test.op"));
+    EXPECT_TRUE(limiter.Check("test.app", "test.op"));
+    EXPECT_FALSE(limiter.Check("test.app", "test.op"));
+
+    // Reset the app
+    limiter.ResetApp("test.app");
+
+    // Should have tokens again
+    EXPECT_TRUE(limiter.Check("test.app", "test.op"));
+
+    return true;
+}
+
+bool Test_RateLimiterNoConfig(std::string& error_msg) {
+    mosis::RateLimiter limiter;
+
+    // Operation with no config should always succeed
+    for (int i = 0; i < 100; i++) {
+        EXPECT_TRUE(limiter.Check("test.app", "unconfigured.operation"));
+    }
+
+    return true;
+}
+
+//=============================================================================
+// PATH SANDBOX TESTS (Milestone 4)
+//=============================================================================
+
+bool Test_PathRejectsTraversal(std::string& error_msg) {
+    mosis::PathSandbox sandbox("D:/test/app");
+
+    EXPECT_TRUE(mosis::PathSandbox::ContainsTraversal("../etc/passwd"));
+    EXPECT_TRUE(mosis::PathSandbox::ContainsTraversal("foo/../../../bar"));
+    EXPECT_TRUE(mosis::PathSandbox::ContainsTraversal("..\\windows\\system32"));
+    EXPECT_TRUE(mosis::PathSandbox::ContainsTraversal("data/.."));
+    EXPECT_TRUE(mosis::PathSandbox::ContainsTraversal(".."));
+
+    // Should not match ".." in filenames
+    EXPECT_FALSE(mosis::PathSandbox::ContainsTraversal("file..txt"));
+    EXPECT_FALSE(mosis::PathSandbox::ContainsTraversal("test...name"));
+
+    std::string canonical;
+    EXPECT_FALSE(sandbox.ValidatePath("../etc/passwd", canonical));
+    EXPECT_FALSE(sandbox.ValidatePath("data/../../../etc/passwd", canonical));
+
+    return true;
+}
+
+bool Test_PathRejectsAbsolute(std::string& error_msg) {
+    EXPECT_TRUE(mosis::PathSandbox::IsAbsolutePath("/etc/passwd"));
+    EXPECT_TRUE(mosis::PathSandbox::IsAbsolutePath("C:\\Windows\\System32"));
+    EXPECT_TRUE(mosis::PathSandbox::IsAbsolutePath("D:/test/file.txt"));
+    EXPECT_TRUE(mosis::PathSandbox::IsAbsolutePath("\\\\server\\share"));
+    EXPECT_TRUE(mosis::PathSandbox::IsAbsolutePath("//server/share"));
+
+    EXPECT_FALSE(mosis::PathSandbox::IsAbsolutePath("scripts/utils.lua"));
+    EXPECT_FALSE(mosis::PathSandbox::IsAbsolutePath("./data/file.txt"));
+    EXPECT_FALSE(mosis::PathSandbox::IsAbsolutePath("data/config.json"));
+
+    mosis::PathSandbox sandbox("D:/test/app");
+    std::string canonical;
+    EXPECT_FALSE(sandbox.ValidatePath("/etc/passwd", canonical));
+    EXPECT_FALSE(sandbox.ValidatePath("C:\\Windows\\System32\\file.dll", canonical));
+
+    return true;
+}
+
+bool Test_PathAcceptsValid(std::string& error_msg) {
+    mosis::PathSandbox sandbox(GetScriptsDir());
+
+    std::string canonical;
+    EXPECT_TRUE(sandbox.ValidatePath("test_globals_removed.lua", canonical));
+    EXPECT_TRUE(sandbox.ValidatePath("./test_memory_limit.lua", canonical));
+
+    return true;
+}
+
+bool Test_ModuleNameValidation(std::string& error_msg) {
+    // Valid names
+    EXPECT_TRUE(mosis::PathSandbox::IsValidModuleName("utils"));
+    EXPECT_TRUE(mosis::PathSandbox::IsValidModuleName("my_module"));
+    EXPECT_TRUE(mosis::PathSandbox::IsValidModuleName("ui.button"));
+    EXPECT_TRUE(mosis::PathSandbox::IsValidModuleName("a.b.c"));
+    EXPECT_TRUE(mosis::PathSandbox::IsValidModuleName("Module123"));
+
+    // Invalid names
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName(""));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName(".utils"));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("utils."));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("ui..button"));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("../evil"));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("/etc/passwd"));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("foo;bar"));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("foo/bar"));
+    EXPECT_FALSE(mosis::PathSandbox::IsValidModuleName("foo\\bar"));
+
+    return true;
+}
+
+bool Test_ModuleToPath(std::string& error_msg) {
+    EXPECT_TRUE(mosis::PathSandbox::ModuleToPath("utils") == "scripts/utils.lua");
+    EXPECT_TRUE(mosis::PathSandbox::ModuleToPath("ui.button") == "scripts/ui/button.lua");
+    EXPECT_TRUE(mosis::PathSandbox::ModuleToPath("a.b.c") == "scripts/a/b/c.lua");
+
+    return true;
+}
+
+bool Test_SafeRequireLoads(std::string& error_msg) {
+    // Create sandbox with scripts directory as app path
+    // The test_module.lua is in scripts/scripts/ so after ModuleToPath
+    // it becomes scripts/scripts/test_module.lua
+    // Safe require is auto-registered by LuaSandbox when app_path is set
+    SandboxContext ctx = TestContext();
+    ctx.app_path = GetScriptsDir();  // "scripts"
+
+    LuaSandbox sandbox(ctx);
+
+    // Should be able to require a test module
+    std::string script =
+        "local m = require('test_module')\n"
+        "if m.value ~= 42 then\n"
+        "    error('module value mismatch')\n"
+        "end\n"
+        "return true\n";
+
+    if (!sandbox.LoadString(script, "require_test")) {
+        error_msg = "Failed to load module: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_SafeRequireCaches(std::string& error_msg) {
+    // Safe require is auto-registered by LuaSandbox when app_path is set
+    SandboxContext ctx = TestContext();
+    ctx.app_path = GetScriptsDir();
+
+    LuaSandbox sandbox(ctx);
+
+    std::string script =
+        "local m1 = require('test_module')\n"
+        "local m2 = require('test_module')\n"
+        "if m1 ~= m2 then\n"
+        "    error('modules should be same (cached)')\n"
+        "end\n"
+        "return true\n";
+
+    if (!sandbox.LoadString(script, "cache_test")) {
+        error_msg = "Cache test failed: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_SafeRequireRejectsInvalid(std::string& error_msg) {
+    // Safe require is auto-registered by LuaSandbox when app_path is set
+    SandboxContext ctx = TestContext();
+    ctx.app_path = GetScriptsDir();
+
+    LuaSandbox sandbox(ctx);
+
+    // Should reject path traversal in module name
+    EXPECT_FALSE(sandbox.LoadString("require('../evil')", "evil_require"));
+
+    // Should reject absolute paths
+    EXPECT_FALSE(sandbox.LoadString("require('/etc/passwd')", "abs_require"));
+
+    // Should reject special characters
+    EXPECT_FALSE(sandbox.LoadString("require('foo;bar')", "special_require"));
+
+    // Should reject empty
+    EXPECT_FALSE(sandbox.LoadString("require('')", "empty_require"));
+
+    return true;
+}
+
+//=============================================================================
+// TIMER MANAGER TESTS (Milestone 5)
+//=============================================================================
+
+bool Test_SetTimeoutFires(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    // Setup _test table for storing state (bypasses proxy __newindex)
+    SetupTestTable(sandbox.GetState());
+
+    // Register timer API
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    // Set a timeout that modifies _test table
+    std::string script =
+        "_test.fired = false\n"
+        "setTimeout(function() _test.fired = true end, 50)\n";
+
+    if (!sandbox.LoadString(script, "timeout_test")) {
+        error_msg = "Failed to set timeout: " + sandbox.GetLastError();
+        return false;
+    }
+
+    // Process timers after delay
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    manager.ProcessTimers();
+
+    // Check if callback fired
+    if (!sandbox.LoadString("assert(_test.fired == true, 'callback did not fire')", "check")) {
+        error_msg = "Timeout callback did not fire: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_SetIntervalFires(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    // Setup _test table for storing state
+    SetupTestTable(sandbox.GetState());
+
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    std::string script =
+        "_test.count = 0\n"
+        "setInterval(function() _test.count = _test.count + 1 end, 30)\n";
+
+    if (!sandbox.LoadString(script, "interval_test")) {
+        error_msg = "Failed to set interval: " + sandbox.GetLastError();
+        return false;
+    }
+
+    // Process multiple times
+    for (int i = 0; i < 5; i++) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(40));
+        manager.ProcessTimers();
+    }
+
+    // Should have fired multiple times
+    if (!sandbox.LoadString("assert(_test.count >= 3, 'interval fired only ' .. _test.count .. ' times')", "check")) {
+        error_msg = "Interval did not fire enough times: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_ClearTimeoutCancels(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    // Setup _test table for storing state
+    SetupTestTable(sandbox.GetState());
+
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    std::string script =
+        "_test.fired = false\n"
+        "local id = setTimeout(function() _test.fired = true end, 100)\n"
+        "clearTimeout(id)\n";
+
+    if (!sandbox.LoadString(script, "clear_test")) {
+        error_msg = "Failed to clear timeout: " + sandbox.GetLastError();
+        return false;
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(150));
+    manager.ProcessTimers();
+
+    // Should NOT have fired
+    if (!sandbox.LoadString("assert(_test.fired == false, 'callback should not have fired')", "check")) {
+        error_msg = "Cancelled timeout still fired: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_ClearIntervalCancels(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    // Setup _test table for storing state
+    SetupTestTable(sandbox.GetState());
+
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    // Store both count and interval ID in _test table so they persist across LoadString calls
+    std::string script =
+        "_test.count = 0\n"
+        "_test.id = setInterval(function() _test.count = _test.count + 1 end, 30)\n";
+
+    if (!sandbox.LoadString(script, "interval_setup")) {
+        error_msg = "Failed to set interval: " + sandbox.GetLastError();
+        return false;
+    }
+
+    // Let it fire once
+    std::this_thread::sleep_for(std::chrono::milliseconds(40));
+    manager.ProcessTimers();
+
+    // Now cancel it
+    sandbox.LoadString("clearInterval(_test.id)", "cancel");
+
+    // Wait and process more
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    manager.ProcessTimers();
+
+    // Should have fired only once (or maybe twice due to timing)
+    if (!sandbox.LoadString("assert(_test.count <= 2, 'interval fired too many times: ' .. _test.count)", "check")) {
+        error_msg = "Interval kept firing after cancel: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_TimerLimitEnforced(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    // Try to create more than MAX_TIMERS_PER_APP (100) timers
+    std::string script =
+        "created = 0\n"
+        "for i = 1, 150 do\n"
+        "    local ok, err = pcall(function()\n"
+        "        setTimeout(function() end, 1000000)\n"
+        "    end)\n"
+        "    if ok then created = created + 1 end\n"
+        "end\n";
+
+    sandbox.LoadString(script, "limit_test");
+
+    // Should be capped at MAX_TIMERS_PER_APP
+    size_t count = manager.GetTimerCount(ctx.app_id);
+    if (count > 100) {
+        error_msg = "Timer limit not enforced: " + std::to_string(count) + " timers created";
+        return false;
+    }
+
+    return true;
+}
+
+bool Test_ClearAppTimersCleanup(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    std::string script =
+        "for i = 1, 10 do\n"
+        "    setTimeout(function() end, 1000000)\n"
+        "end\n";
+
+    sandbox.LoadString(script, "cleanup_test");
+
+    size_t before = manager.GetTimerCount(ctx.app_id);
+    EXPECT_TRUE(before == 10);
+
+    // Clear all timers for app (simulating app stop)
+    manager.ClearAppTimers(ctx.app_id);
+
+    size_t after = manager.GetTimerCount(ctx.app_id);
+    EXPECT_TRUE(after == 0);
+
+    return true;
+}
+
+bool Test_MinIntervalEnforced(std::string& error_msg) {
+    SandboxContext ctx = TestContext();
+    LuaSandbox sandbox(ctx);
+    // Manager must be declared AFTER sandbox so it's destroyed BEFORE sandbox
+    mosis::TimerManager manager;
+
+    // Setup _test table for storing state
+    SetupTestTable(sandbox.GetState());
+
+    mosis::RegisterTimerAPI(sandbox.GetState(), &manager, ctx.app_id);
+
+    // Try to set interval less than minimum (10ms)
+    std::string script =
+        "_test.count = 0\n"
+        "setInterval(function() _test.count = _test.count + 1 end, 1)\n";  // 1ms, should be clamped to 10ms
+
+    sandbox.LoadString(script, "min_interval_test");
+
+    // With 1ms interval, in 50ms we'd get 50 callbacks
+    // With 10ms minimum, we should get ~5
+    std::this_thread::sleep_for(std::chrono::milliseconds(55));
+    for (int i = 0; i < 10; i++) {
+        manager.ProcessTimers();
+    }
+
+    if (!sandbox.LoadString("assert(_test.count <= 10, 'interval fired too often: ' .. _test.count)", "check")) {
+        error_msg = "Minimum interval not enforced: " + sandbox.GetLastError();
+        return false;
+    }
+
+    return true;
+}
+
+//=============================================================================
+// MAIN
+//=============================================================================
+
+int main(int argc, char* argv[]) {
+    std::string filter;
+    std::string output_file = "test_results.json";
+
+    // Parse args
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg == "--test" && i + 1 < argc) {
+            filter = argv[++i];
+        } else if (arg == "--output" && i + 1 < argc) {
+            output_file = argv[++i];
+        } else if (arg == "--help") {
+            std::cout << "Usage: sandbox-test [options]\n";
+            std::cout << "Options:\n";
+            std::cout << "  --test <name>    Run only tests containing <name>\n";
+            std::cout << "  --output <file>  Write JSON report to <file>\n";
+            std::cout << "  --help           Show this help\n";
+            return 0;
+        }
+    }
+
+    std::cout << "========================================\n";
+    std::cout << "    LUA SANDBOX SECURITY TESTS\n";
+    std::cout << "========================================\n\n";
+
+    // Check scripts directory exists
+    if (!std::filesystem::exists(GetScriptsDir())) {
+        std::cerr << "ERROR: Scripts directory not found: " << GetScriptsDir() << "\n";
+        std::cerr << "Make sure to run from the build directory.\n";
+        return 1;
+    }
+
+    // Register tests
+    TestHarness harness;
+
+    // Milestone 1: Core Sandbox
+    harness.AddTest("DangerousGlobalsRemoved", Test_DangerousGlobalsRemoved);
+    harness.AddTest("BytecodeRejected", Test_BytecodeRejected);
+    harness.AddTest("MemoryLimitEnforced", Test_MemoryLimitEnforced);
+    harness.AddTest("CPULimitEnforced", Test_CPULimitEnforced);
+    harness.AddTest("MetatableProtected", Test_MetatableProtected);
+    harness.AddTest("SafeOperationsWork", Test_SafeOperationsWork);
+    harness.AddTest("StringDumpRemoved", Test_StringDumpRemoved);
+    harness.AddTest("MemoryTracking", Test_MemoryTracking);
+    harness.AddTest("InstructionCounting", Test_InstructionCounting);
+    harness.AddTest("MultipleLoads", Test_MultipleLoads);
+    harness.AddTest("ErrorRecovery", Test_ErrorRecovery);
+
+    // Milestone 2: Permission System
+    harness.AddTest("NormalPermissionAutoGranted", Test_NormalPermissionAutoGranted);
+    harness.AddTest("DangerousPermissionRequiresGrant", Test_DangerousPermissionRequiresGrant);
+    harness.AddTest("SignaturePermissionSystemOnly", Test_SignaturePermissionSystemOnly);
+    harness.AddTest("UserGestureTracking", Test_UserGestureTracking);
+    harness.AddTest("UndeclaredPermissionDenied", Test_UndeclaredPermissionDenied);
+    harness.AddTest("SystemAppGetsDangerousAuto", Test_SystemAppGetsDangerousAuto);
+    harness.AddTest("PermissionCategoryCheck", Test_PermissionCategoryCheck);
+
+    // Milestone 3: Audit Logging & Rate Limiting
+    harness.AddTest("AuditLogBasic", Test_AuditLogBasic);
+    harness.AddTest("AuditLogRingBuffer", Test_AuditLogRingBuffer);
+    harness.AddTest("AuditLogThreadSafe", Test_AuditLogThreadSafe);
+    harness.AddTest("RateLimiterBasic", Test_RateLimiterBasic);
+    harness.AddTest("RateLimiterExhaustion", Test_RateLimiterExhaustion);
+    harness.AddTest("RateLimiterRefill", Test_RateLimiterRefill);
+    harness.AddTest("RateLimiterAppIsolation", Test_RateLimiterAppIsolation);
+    harness.AddTest("RateLimiterReset", Test_RateLimiterReset);
+    harness.AddTest("RateLimiterNoConfig", Test_RateLimiterNoConfig);
+
+    // Milestone 4: Safe Path & Require
+    harness.AddTest("PathRejectsTraversal", Test_PathRejectsTraversal);
+    harness.AddTest("PathRejectsAbsolute", Test_PathRejectsAbsolute);
+    harness.AddTest("PathAcceptsValid", Test_PathAcceptsValid);
+    harness.AddTest("ModuleNameValidation", Test_ModuleNameValidation);
+    harness.AddTest("ModuleToPath", Test_ModuleToPath);
+    harness.AddTest("SafeRequireLoads", Test_SafeRequireLoads);
+    harness.AddTest("SafeRequireCaches", Test_SafeRequireCaches);
+    harness.AddTest("SafeRequireRejectsInvalid", Test_SafeRequireRejectsInvalid);
+
+    // Milestone 5: Timer & Callback System
+    harness.AddTest("SetTimeoutFires", Test_SetTimeoutFires);
+    harness.AddTest("SetIntervalFires", Test_SetIntervalFires);
+    harness.AddTest("ClearTimeoutCancels", Test_ClearTimeoutCancels);
+    harness.AddTest("ClearIntervalCancels", Test_ClearIntervalCancels);
+    harness.AddTest("TimerLimitEnforced", Test_TimerLimitEnforced);
+    harness.AddTest("ClearAppTimersCleanup", Test_ClearAppTimersCleanup);
+    harness.AddTest("MinIntervalEnforced", Test_MinIntervalEnforced);
+
+    // Run tests
+    auto results = harness.Run(filter);
+
+    // Output
+    harness.PrintResults(results);
+    harness.WriteJsonReport(results, output_file);
+
+    std::cout << "\nJSON report written to: " << output_file << "\n";
+
+    // Return non-zero if any tests failed
+    int failed = 0;
+    for (const auto& r : results) {
+        if (!r.passed) failed++;
+    }
+
+    return failed > 0 ? 1 : 0;
+}
--- a/sandbox-test/src/test_harness.cpp
+++ b/sandbox-test/src/test_harness.cpp
@@ -0,0 +1,128 @@
+#include "test_harness.h"
+#include <nlohmann/json.hpp>
+#include <fstream>
+#include <iomanip>
+#include <ctime>
+
+void TestHarness::AddTest(const std::string& name, std::function<bool(std::string&)> func) {
+    m_tests.push_back({name, func});
+}
+
+std::vector<TestResult> TestHarness::Run(const std::string& filter) {
+    std::vector<TestResult> results;
+
+    for (const auto& test : m_tests) {
+        // Filter check
+        if (!filter.empty() && test.name.find(filter) == std::string::npos) {
+            continue;
+        }
+
+        TestResult result;
+        result.name = test.name;
+
+        std::cout << "Running: " << test.name << "... " << std::flush;
+
+        auto start = std::chrono::steady_clock::now();
+
+        try {
+            std::string error;
+            result.passed = test.func(error);
+            result.error_message = error;
+        } catch (const std::exception& e) {
+            result.passed = false;
+            result.error_message = std::string("Exception: ") + e.what();
+        } catch (...) {
+            result.passed = false;
+            result.error_message = "Unknown exception";
+        }
+
+        auto end = std::chrono::steady_clock::now();
+        result.duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+
+        if (result.passed) {
+            std::cout << "PASSED (" << result.duration_ms << "ms)\n";
+        } else {
+            std::cout << "FAILED\n";
+            std::cout << "  Error: " << result.error_message << "\n";
+        }
+
+        results.push_back(result);
+    }
+
+    return results;
+}
+
+void TestHarness::WriteJsonReport(const std::vector<TestResult>& results, const std::string& path) {
+    nlohmann::json report;
+
+    // Get timestamp
+    auto now = std::chrono::system_clock::now();
+    auto time = std::chrono::system_clock::to_time_t(now);
+    std::stringstream ss;
+    ss << std::put_time(std::gmtime(&time), "%Y-%m-%dT%H:%M:%SZ");
+
+    report["name"] = "Lua Sandbox Security Tests";
+    report["timestamp"] = ss.str();
+
+    int passed = 0, failed = 0;
+    for (const auto& r : results) {
+        if (r.passed) passed++;
+        else failed++;
+    }
+
+    report["summary"]["passed"] = passed;
+    report["summary"]["failed"] = failed;
+    report["summary"]["total"] = static_cast<int>(results.size());
+
+    nlohmann::json tests = nlohmann::json::array();
+    for (const auto& r : results) {
+        nlohmann::json t;
+        t["name"] = r.name;
+        t["status"] = r.passed ? "passed" : "failed";
+        t["duration_ms"] = r.duration_ms;
+        if (!r.passed && !r.error_message.empty()) {
+            t["error"] = r.error_message;
+        }
+        tests.push_back(t);
+    }
+    report["tests"] = tests;
+
+    std::ofstream f(path);
+    f << report.dump(2);
+}
+
+void TestHarness::PrintResults(const std::vector<TestResult>& results) {
+    std::cout << "\n";
+    std::cout << "========================================\n";
+    std::cout << "           TEST RESULTS\n";
+    std::cout << "========================================\n\n";
+
+    int passed = 0, failed = 0;
+    for (const auto& r : results) {
+        if (r.passed) passed++;
+        else failed++;
+    }
+
+    std::cout << "Total:  " << results.size() << "\n";
+    std::cout << "Passed: " << passed << "\n";
+    std::cout << "Failed: " << failed << "\n\n";
+
+    if (failed > 0) {
+        std::cout << "FAILED TESTS:\n";
+        for (const auto& r : results) {
+            if (!r.passed) {
+                std::cout << "  - " << r.name << "\n";
+                std::cout << "    " << r.error_message << "\n";
+            }
+        }
+        std::cout << "\n";
+    }
+
+    if (failed == 0) {
+        std::cout << "ALL TESTS PASSED!\n";
+    } else {
+        std::cout << "SOME TESTS FAILED!\n";
+    }
+
+    std::cout << "========================================\n";
+}
--- a/sandbox-test/src/test_harness.h
+++ b/sandbox-test/src/test_harness.h
@@ -0,0 +1,85 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <functional>
+#include <chrono>
+#include <iostream>
+
+// Simple test result
+struct TestResult {
+    std::string name;
+    bool passed;
+    std::string error_message;
+    int64_t duration_ms;
+};
+
+// Test case definition
+struct TestCase {
+    std::string name;
+    std::function<bool(std::string&)> func;  // Returns true if passed, error in string
+};
+
+// Test runner
+class TestHarness {
+public:
+    void AddTest(const std::string& name, std::function<bool(std::string&)> func);
+
+    // Run all tests or filter by name
+    std::vector<TestResult> Run(const std::string& filter = "");
+
+    // Output results as JSON
+    void WriteJsonReport(const std::vector<TestResult>& results, const std::string& path);
+
+    // Print results to console
+    void PrintResults(const std::vector<TestResult>& results);
+
+private:
+    std::vector<TestCase> m_tests;
+};
+
+// Assertion macros
+#define EXPECT_TRUE(cond) \
+    do { \
+        if (!(cond)) { \
+            error_msg = std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
+                        ": EXPECT_TRUE(" #cond ") failed"; \
+            return false; \
+        } \
+    } while(0)
+
+#define EXPECT_FALSE(cond) \
+    do { \
+        if (cond) { \
+            error_msg = std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
+                        ": EXPECT_FALSE(" #cond ") failed"; \
+            return false; \
+        } \
+    } while(0)
+
+#define EXPECT_EQ(a, b) \
+    do { \
+        if ((a) != (b)) { \
+            error_msg = std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
+                        ": EXPECT_EQ failed: " + std::to_string(a) + " != " + std::to_string(b); \
+            return false; \
+        } \
+    } while(0)
+
+#define EXPECT_NE(a, b) \
+    do { \
+        if ((a) == (b)) { \
+            error_msg = std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
+                        ": EXPECT_NE failed: values are equal"; \
+            return false; \
+        } \
+    } while(0)
+
+#define EXPECT_CONTAINS(haystack, needle) \
+    do { \
+        if ((haystack).find(needle) == std::string::npos) { \
+            error_msg = std::string(__FILE__) + ":" + std::to_string(__LINE__) + \
+                        ": EXPECT_CONTAINS failed: '" + (haystack) + "' does not contain '" + (needle) + "'"; \
+            return false; \
+        } \
+    } while(0)
--- a/sandbox-test/test_results.json
+++ b/sandbox-test/test_results.json
@@ -0,0 +1,221 @@
+{
+  "name": "Lua Sandbox Security Tests",
+  "summary": {
+    "failed": 0,
+    "passed": 42,
+    "total": 42
+  },
+  "tests": [
+    {
+      "duration_ms": 0,
+      "name": "DangerousGlobalsRemoved",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "BytecodeRejected",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 2,
+      "name": "MemoryLimitEnforced",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "CPULimitEnforced",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "MetatableProtected",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "SafeOperationsWork",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "StringDumpRemoved",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "MemoryTracking",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "InstructionCounting",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "MultipleLoads",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "ErrorRecovery",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "NormalPermissionAutoGranted",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "DangerousPermissionRequiresGrant",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "SignaturePermissionSystemOnly",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 111,
+      "name": "UserGestureTracking",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "UndeclaredPermissionDenied",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "SystemAppGetsDangerousAuto",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "PermissionCategoryCheck",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "AuditLogBasic",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "AuditLogRingBuffer",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 14,
+      "name": "AuditLogThreadSafe",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "RateLimiterBasic",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "RateLimiterExhaustion",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 16,
+      "name": "RateLimiterRefill",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "RateLimiterAppIsolation",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "RateLimiterReset",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "RateLimiterNoConfig",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "PathRejectsTraversal",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "PathRejectsAbsolute",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "PathAcceptsValid",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "ModuleNameValidation",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "ModuleToPath",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "SafeRequireLoads",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "SafeRequireCaches",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "SafeRequireRejectsInvalid",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 107,
+      "name": "SetTimeoutFires",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 237,
+      "name": "SetIntervalFires",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 155,
+      "name": "ClearTimeoutCancels",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 158,
+      "name": "ClearIntervalCancels",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "TimerLimitEnforced",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 0,
+      "name": "ClearAppTimersCleanup",
+      "status": "passed"
+    },
+    {
+      "duration_ms": 62,
+      "name": "MinIntervalEnforced",
+      "status": "passed"
+    }
+  ],
+  "timestamp": "2026-01-18T13:19:38Z"
+}