init: restore source from @anthropic-ai/claude-code@2.1.88 sourcemap
This commit is contained in:
553
restored-src/node_modules/@ant/computer-use-mcp/src/deniedApps.ts
generated
vendored
Normal file
553
restored-src/node_modules/@ant/computer-use-mcp/src/deniedApps.ts
generated
vendored
Normal file
@@ -0,0 +1,553 @@
|
||||
/**
|
||||
* App category lookup for tiered CU permissions. Three categories land at a
|
||||
* restricted tier instead of `"full"`:
|
||||
*
|
||||
* - **browser** → `"read"` tier — visible in screenshots, NO interaction.
|
||||
* The model can read an already-open page but must use the Claude-in-Chrome
|
||||
* MCP for navigation/clicking/typing.
|
||||
* - **terminal** → `"click"` tier — visible + clickable, NO typing. The
|
||||
* model can click a Run button or scroll test output in an IDE, but can't
|
||||
* type into the integrated terminal. Use the Bash tool for shell work.
|
||||
* - **trading** → `"read"` tier — same restrictions as browsers, but no
|
||||
* CiC-MCP alternative exists. For platforms where a stray click can
|
||||
* execute a trade or send a message to a counterparty.
|
||||
*
|
||||
* Uncategorized apps default to `"full"`. See `getDefaultTierForApp`.
|
||||
*
|
||||
* Identification is two-layered:
|
||||
* 1. Bundle ID match (macOS-only; `InstalledApp.bundleId` is a
|
||||
* CFBundleIdentifier and meaningless on Windows). Fast, exact, the
|
||||
* primary mechanism while CU is darwin-gated.
|
||||
* 2. Display-name substring match (cross-platform fallback). Catches
|
||||
* unresolved requests ("Chrome" when Chrome isn't installed) AND will
|
||||
* be the primary mechanism on Windows/Linux where there's no bundle ID.
|
||||
* Windows-relevant names (PowerShell, cmd, Windows Terminal) are
|
||||
* included now so they activate the moment the darwin gate lifts.
|
||||
*
|
||||
* Keep this file **import-free** (like sentinelApps.ts) — the renderer may
|
||||
* import it via a package.json subpath export, and pulling in
|
||||
* `@modelcontextprotocol/sdk` (a devDep) through the index → mcpServer chain
|
||||
* would fail module resolution in Next.js. The `CuAppPermTier` type is
|
||||
* duplicated as a string literal below rather than imported.
|
||||
*/
|
||||
|
||||
export type DeniedCategory = "browser" | "terminal" | "trading";
|
||||
|
||||
/**
|
||||
* Map a category to its hardcoded tier. Return-type is the string-literal
|
||||
* union inline (this file is import-free; see header comment). The
|
||||
* authoritative type is `CuAppPermTier` in types.ts — keep in sync.
|
||||
*
|
||||
* Not bijective — both `"browser"` and `"trading"` map to `"read"`. Copy
|
||||
* that differs by category (the "use CiC" hint is browser-only) must check
|
||||
* the category, not just the tier.
|
||||
*/
|
||||
export function categoryToTier(
|
||||
category: DeniedCategory | null,
|
||||
): "read" | "click" | "full" {
|
||||
if (category === "browser" || category === "trading") return "read";
|
||||
if (category === "terminal") return "click";
|
||||
return "full";
|
||||
}
|
||||
|
||||
// ─── Bundle-ID deny sets (macOS) ─────────────────────────────────────────
|
||||
|
||||
const BROWSER_BUNDLE_IDS: ReadonlySet<string> = new Set([
|
||||
// Apple
|
||||
"com.apple.Safari",
|
||||
"com.apple.SafariTechnologyPreview",
|
||||
// Google
|
||||
"com.google.Chrome",
|
||||
"com.google.Chrome.beta",
|
||||
"com.google.Chrome.dev",
|
||||
"com.google.Chrome.canary",
|
||||
// Microsoft
|
||||
"com.microsoft.edgemac",
|
||||
"com.microsoft.edgemac.Beta",
|
||||
"com.microsoft.edgemac.Dev",
|
||||
"com.microsoft.edgemac.Canary",
|
||||
// Mozilla
|
||||
"org.mozilla.firefox",
|
||||
"org.mozilla.firefoxdeveloperedition",
|
||||
"org.mozilla.nightly",
|
||||
// Chromium-based
|
||||
"org.chromium.Chromium",
|
||||
"com.brave.Browser",
|
||||
"com.brave.Browser.beta",
|
||||
"com.brave.Browser.nightly",
|
||||
"com.operasoftware.Opera",
|
||||
"com.operasoftware.OperaGX",
|
||||
"com.operasoftware.OperaDeveloper",
|
||||
"com.vivaldi.Vivaldi",
|
||||
// The Browser Company
|
||||
"company.thebrowser.Browser", // Arc
|
||||
"company.thebrowser.dia", // Dia (agentic)
|
||||
// Privacy-focused
|
||||
"org.torproject.torbrowser",
|
||||
"com.duckduckgo.macos.browser",
|
||||
"ru.yandex.desktop.yandex-browser",
|
||||
// Agentic / AI browsers — newer entrants with LLM integrations
|
||||
"ai.perplexity.comet",
|
||||
"com.sigmaos.sigmaos.macos", // SigmaOS
|
||||
// Webkit-based misc
|
||||
"com.kagi.kagimacOS", // Orion
|
||||
]);
|
||||
|
||||
/**
|
||||
* Terminals + IDEs with integrated terminals. Supersets
|
||||
* `SHELL_ACCESS_BUNDLE_IDS` from sentinelApps.ts — terminals proceed to the
|
||||
* approval dialog at tier "click", and the sentinel warning renders
|
||||
* alongside the tier badge.
|
||||
*/
|
||||
const TERMINAL_BUNDLE_IDS: ReadonlySet<string> = new Set([
|
||||
// Dedicated terminals
|
||||
"com.apple.Terminal",
|
||||
"com.googlecode.iterm2",
|
||||
"dev.warp.Warp-Stable",
|
||||
"dev.warp.Warp-Beta",
|
||||
"com.github.wez.wezterm",
|
||||
"org.alacritty",
|
||||
"io.alacritty", // pre-v0.11.0 (renamed 2022-07) — kept for legacy installs
|
||||
"net.kovidgoyal.kitty",
|
||||
"co.zeit.hyper",
|
||||
"com.mitchellh.ghostty",
|
||||
"org.tabby",
|
||||
"com.termius-dmg.mac", // Termius
|
||||
// IDEs with integrated terminals — we can't distinguish "type in the
|
||||
// editor" from "type in the integrated terminal" via screenshot+click.
|
||||
// VS Code family
|
||||
"com.microsoft.VSCode",
|
||||
"com.microsoft.VSCodeInsiders",
|
||||
"com.vscodium", // VSCodium
|
||||
"com.todesktop.230313mzl4w4u92", // Cursor
|
||||
"com.exafunction.windsurf", // Windsurf / Codeium
|
||||
"dev.zed.Zed",
|
||||
"dev.zed.Zed-Preview",
|
||||
// JetBrains family (all have integrated terminals)
|
||||
"com.jetbrains.intellij",
|
||||
"com.jetbrains.intellij.ce",
|
||||
"com.jetbrains.pycharm",
|
||||
"com.jetbrains.pycharm.ce",
|
||||
"com.jetbrains.WebStorm",
|
||||
"com.jetbrains.CLion",
|
||||
"com.jetbrains.goland",
|
||||
"com.jetbrains.rubymine",
|
||||
"com.jetbrains.PhpStorm",
|
||||
"com.jetbrains.datagrip",
|
||||
"com.jetbrains.rider",
|
||||
"com.jetbrains.AppCode",
|
||||
"com.jetbrains.rustrover",
|
||||
"com.jetbrains.fleet",
|
||||
"com.google.android.studio", // Android Studio (JetBrains-based)
|
||||
// Other IDEs
|
||||
"com.axosoft.gitkraken", // GitKraken has an integrated terminal panel. Also keeps the "kraken" trading-substring from miscategorizing it — bundle-ID wins.
|
||||
"com.sublimetext.4",
|
||||
"com.sublimetext.3",
|
||||
"org.vim.MacVim",
|
||||
"com.neovim.neovim",
|
||||
"org.gnu.Emacs",
|
||||
// Xcode's previous carve-out (full tier for Interface Builder / simulator)
|
||||
// was reversed — at tier "click" IB and simulator taps still work (both are
|
||||
// plain clicks) while the integrated terminal is blocked from keyboard input.
|
||||
"com.apple.dt.Xcode",
|
||||
"org.eclipse.platform.ide",
|
||||
"org.netbeans.ide",
|
||||
"com.microsoft.visual-studio", // Visual Studio for Mac
|
||||
// AppleScript/automation execution surfaces — same threat as terminals:
|
||||
// type(script) → key("cmd+r") runs arbitrary code. Added after #28011
|
||||
// removed the osascript MCP server, making CU the only tool-call route
|
||||
// to AppleScript.
|
||||
"com.apple.ScriptEditor2",
|
||||
"com.apple.Automator",
|
||||
"com.apple.shortcuts",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Trading / crypto platforms — granted at tier `"read"` so the agent can see
|
||||
* balances and prices but can't click into an order, transfer, or IB chat.
|
||||
* Bundle IDs populated from Homebrew cask `uninstall.quit` stanzas as they're
|
||||
* verified; the name-substring fallback below is the primary check. Bloomberg
|
||||
* Terminal has no native macOS build per their FAQ (web/Citrix only).
|
||||
*
|
||||
* Budgeting/accounting apps (Quicken, YNAB, QuickBooks, etc.) are NOT listed
|
||||
* here — they default to tier `"full"`. The risk model for brokerage/crypto
|
||||
* (a stray click can execute a trade) doesn't apply to budgeting apps; the
|
||||
* Cowork system prompt carries the soft instruction to never execute trades
|
||||
* or transfer money on the user's behalf.
|
||||
*/
|
||||
const TRADING_BUNDLE_IDS: ReadonlySet<string> = new Set([
|
||||
// Verified via Homebrew quit/zap stanzas + mdls + electron-builder source.
|
||||
// Trading
|
||||
"com.webull.desktop.v1", // Webull (direct download, Qt)
|
||||
"com.webull.trade.mac.v1", // Webull (Mac App Store)
|
||||
"com.tastytrade.desktop",
|
||||
"com.tradingview.tradingviewapp.desktop",
|
||||
"com.fidelity.activetrader", // Fidelity Trader+ (new)
|
||||
"com.fmr.activetrader", // Fidelity Active Trader Pro (legacy)
|
||||
// Interactive Brokers TWS — install4j wrapper; Homebrew quit stanza is
|
||||
// authoritative for this exact value but install4j IDs can drift across
|
||||
// major versions — name-substring "trader workstation" is the fallback.
|
||||
"com.install4j.5889-6375-8446-2021",
|
||||
// Crypto
|
||||
"com.binance.BinanceDesktop",
|
||||
"com.electron.exodus",
|
||||
// Electrum uses PyInstaller with bundle_identifier=None → defaults to
|
||||
// org.pythonmac.unspecified.<AppName>. Confirmed in spesmilo/electrum
|
||||
// source + Homebrew zap. IntuneBrew's "org.electrum.electrum" is a fork.
|
||||
"org.pythonmac.unspecified.Electrum",
|
||||
"com.ledger.live",
|
||||
"io.trezor.TrezorSuite",
|
||||
// No native macOS app (name-substring only): Schwab, E*TRADE, TradeStation,
|
||||
// Robinhood, NinjaTrader, Coinbase, Kraken, Bloomberg. thinkorswim
|
||||
// install4j ID drifts per-install — substring safer.
|
||||
]);
|
||||
|
||||
// ─── Policy-deny (not a tier — cannot be granted at all) ─────────────────
|
||||
//
|
||||
// Streaming / ebook / music apps and a handful of publisher apps. These
|
||||
// are auto-denied before the approval dialog — no tier can be granted.
|
||||
// Rationale is copyright / content-control (the agent has no legitimate
|
||||
// need to screenshot Netflix or click Play on Spotify).
|
||||
//
|
||||
// Sourced from the ACP CU-apps blocklist xlsx ("Full block" tab). See
|
||||
// /tmp/extract_cu_blocklist.py for the extraction script.
|
||||
|
||||
const POLICY_DENIED_BUNDLE_IDS: ReadonlySet<string> = new Set([
|
||||
// Verified via Homebrew quit/zap + mdls /System/Applications + IntuneBrew.
|
||||
// Apple built-ins
|
||||
"com.apple.TV",
|
||||
"com.apple.Music",
|
||||
"com.apple.iBooksX",
|
||||
"com.apple.podcasts",
|
||||
// Music
|
||||
"com.spotify.client",
|
||||
"com.amazon.music",
|
||||
"com.tidal.desktop",
|
||||
"com.deezer.deezer-desktop",
|
||||
"com.pandora.desktop",
|
||||
"com.electron.pocket-casts", // direct-download Electron wrapper
|
||||
"au.com.shiftyjelly.PocketCasts", // Mac App Store
|
||||
// Video
|
||||
"tv.plex.desktop",
|
||||
"tv.plex.htpc",
|
||||
"tv.plex.plexamp",
|
||||
"com.amazon.aiv.AIVApp", // Prime Video (iOS-on-Apple-Silicon)
|
||||
// Ebooks
|
||||
"net.kovidgoyal.calibre",
|
||||
"com.amazon.Kindle", // legacy desktop, discontinued
|
||||
"com.amazon.Lassen", // current Mac App Store (iOS-on-Mac)
|
||||
"com.kobo.desktop.Kobo",
|
||||
// No native macOS app (name-substring only): Netflix, Disney+, Hulu,
|
||||
// HBO Max, Peacock, Paramount+, YouTube, Crunchyroll, Tubi, Vudu,
|
||||
// Audible, Reddit, NYTimes. Their iOS apps don't opt into iPad-on-Mac.
|
||||
]);
|
||||
|
||||
const POLICY_DENIED_NAME_SUBSTRINGS: readonly string[] = [
|
||||
// Video streaming
|
||||
"netflix",
|
||||
"disney+",
|
||||
"hulu",
|
||||
"prime video",
|
||||
"apple tv",
|
||||
"peacock",
|
||||
"paramount+",
|
||||
// "plex" is too generic — would match "Perplexity". Covered by
|
||||
// tv.plex.* bundle IDs on macOS.
|
||||
"tubi",
|
||||
"crunchyroll",
|
||||
"vudu",
|
||||
// E-readers / audiobooks
|
||||
"kindle",
|
||||
"apple books",
|
||||
"kobo",
|
||||
"play books",
|
||||
"calibre",
|
||||
"libby",
|
||||
"readium",
|
||||
"audible",
|
||||
"libro.fm",
|
||||
"speechify",
|
||||
// Music
|
||||
"spotify",
|
||||
"apple music",
|
||||
"amazon music",
|
||||
"youtube music",
|
||||
"tidal",
|
||||
"deezer",
|
||||
"pandora",
|
||||
"pocket casts",
|
||||
// Publisher / social apps (from the same blocklist tab)
|
||||
"naver",
|
||||
"reddit",
|
||||
"sony music",
|
||||
"vegas pro",
|
||||
"pitchfork",
|
||||
"economist",
|
||||
"nytimes",
|
||||
// Skipped (too generic for substring matching — need bundle ID):
|
||||
// HBO Max / Max, YouTube (non-Music), Nook, Sony Catalyst, Wired
|
||||
];
|
||||
|
||||
/**
|
||||
* Policy-level auto-deny. Unlike `userDeniedBundleIds` (per-user Settings
|
||||
* page), this is baked into the build. `buildAccessRequest` strips these
|
||||
* before the approval dialog with "blocked by policy" guidance; the agent
|
||||
* is told to not retry.
|
||||
*/
|
||||
export function isPolicyDenied(
|
||||
bundleId: string | undefined,
|
||||
displayName: string,
|
||||
): boolean {
|
||||
if (bundleId && POLICY_DENIED_BUNDLE_IDS.has(bundleId)) return true;
|
||||
const lower = displayName.toLowerCase();
|
||||
for (const sub of POLICY_DENIED_NAME_SUBSTRINGS) {
|
||||
if (lower.includes(sub)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function getDeniedCategory(bundleId: string): DeniedCategory | null {
|
||||
if (BROWSER_BUNDLE_IDS.has(bundleId)) return "browser";
|
||||
if (TERMINAL_BUNDLE_IDS.has(bundleId)) return "terminal";
|
||||
if (TRADING_BUNDLE_IDS.has(bundleId)) return "trading";
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── Display-name fallback (cross-platform) ──────────────────────────────
|
||||
|
||||
/**
|
||||
* Lowercase substrings checked against the requested display name. Catches:
|
||||
* - Unresolved requests (app not installed, Spotlight miss)
|
||||
* - Future Windows/Linux support where bundleId is meaningless
|
||||
*
|
||||
* Matched via `.includes()` on `name.toLowerCase()`. Entries are ordered
|
||||
* by specificity (more-specific first is irrelevant since we return on
|
||||
* first match, but groupings are by category for readability).
|
||||
*/
|
||||
const BROWSER_NAME_SUBSTRINGS: readonly string[] = [
|
||||
"safari",
|
||||
"chrome",
|
||||
"firefox",
|
||||
"microsoft edge",
|
||||
"brave",
|
||||
"opera",
|
||||
"vivaldi",
|
||||
"chromium",
|
||||
// Arc/Dia: the canonical display name is just "Arc"/"Dia" — too short for
|
||||
// substring matching (false-positives: "Arcade", "Diagram"). Covered by
|
||||
// bundle ID on macOS. The "... browser" entries below catch natural-language
|
||||
// phrasings ("the arc browser") but NOT the canonical short name.
|
||||
"arc browser",
|
||||
"tor browser",
|
||||
"duckduckgo",
|
||||
"yandex",
|
||||
"orion browser",
|
||||
// Agentic / AI browsers
|
||||
"comet", // Perplexity's browser — "Comet" substring risks false positives
|
||||
// but leaving for now; "comet" in an app name is rare
|
||||
"sigmaos",
|
||||
"dia browser",
|
||||
];
|
||||
|
||||
const TERMINAL_NAME_SUBSTRINGS: readonly string[] = [
|
||||
// macOS / cross-platform terminals
|
||||
"terminal", // catches Terminal, Windows Terminal (NOT iTerm — separate entry)
|
||||
"iterm",
|
||||
"wezterm",
|
||||
"alacritty",
|
||||
"kitty",
|
||||
"ghostty",
|
||||
"tabby",
|
||||
"termius",
|
||||
// AppleScript runners — see bundle-ID comment above. "shortcuts" is too
|
||||
// generic for substring matching (many apps have "shortcuts" in the name);
|
||||
// covered by bundle ID only, like warp/hyper.
|
||||
"script editor",
|
||||
"automator",
|
||||
// NOTE: "warp" and "hyper" are too generic for substring matching —
|
||||
// they'd false-positive on "Warpaint" or "Hyperion". Covered by bundle ID
|
||||
// (dev.warp.Warp-Stable, co.zeit.hyper) for macOS; Windows exe-name
|
||||
// matching can be added when Windows CU ships.
|
||||
// Windows shells (activate when the darwin gate lifts)
|
||||
"powershell",
|
||||
"cmd.exe",
|
||||
"command prompt",
|
||||
"git bash",
|
||||
"conemu",
|
||||
"cmder",
|
||||
// IDEs (VS Code family)
|
||||
"visual studio code",
|
||||
"visual studio", // catches VS for Mac + Windows
|
||||
"vscode",
|
||||
"vs code",
|
||||
"vscodium",
|
||||
"cursor", // Cursor IDE — "cursor" is generic but IDE is the only common app
|
||||
"windsurf",
|
||||
// Zed: display name is just "Zed" — too short for substring matching
|
||||
// (false-positives). Covered by bundle ID (dev.zed.Zed) on macOS.
|
||||
// IDEs (JetBrains family)
|
||||
"intellij",
|
||||
"pycharm",
|
||||
"webstorm",
|
||||
"clion",
|
||||
"goland",
|
||||
"rubymine",
|
||||
"phpstorm",
|
||||
"datagrip",
|
||||
"rider",
|
||||
"appcode",
|
||||
"rustrover",
|
||||
"fleet",
|
||||
"android studio",
|
||||
// Other IDEs
|
||||
"sublime text",
|
||||
"macvim",
|
||||
"neovim",
|
||||
"emacs",
|
||||
"xcode",
|
||||
"eclipse",
|
||||
"netbeans",
|
||||
];
|
||||
|
||||
const TRADING_NAME_SUBSTRINGS: readonly string[] = [
|
||||
// Trading — brokerage apps. Sourced from the ACP CU-apps blocklist xlsx
|
||||
// ("Read Only" tab). Name-substring safe for proper nouns below; generic
|
||||
// names (IG, Delta, HTX) are skipped and need bundle-ID matching once
|
||||
// verified.
|
||||
"bloomberg",
|
||||
"ameritrade",
|
||||
"thinkorswim",
|
||||
"schwab",
|
||||
"fidelity",
|
||||
"e*trade",
|
||||
"interactive brokers",
|
||||
"trader workstation", // Interactive Brokers TWS
|
||||
"tradestation",
|
||||
"webull",
|
||||
"robinhood",
|
||||
"tastytrade",
|
||||
"ninjatrader",
|
||||
"tradingview",
|
||||
"moomoo",
|
||||
"tradezero",
|
||||
"prorealtime",
|
||||
"plus500",
|
||||
"saxotrader",
|
||||
"oanda",
|
||||
"metatrader",
|
||||
"forex.com",
|
||||
"avaoptions",
|
||||
"ctrader",
|
||||
"jforex",
|
||||
"iq option",
|
||||
"olymp trade",
|
||||
"binomo",
|
||||
"pocket option",
|
||||
"raceoption",
|
||||
"expertoption",
|
||||
"quotex",
|
||||
"naga",
|
||||
"morgan stanley",
|
||||
"ubs neo",
|
||||
"eikon", // Thomson Reuters / LSEG Workspace
|
||||
// Crypto — exchanges, wallets, portfolio trackers
|
||||
"coinbase",
|
||||
"kraken",
|
||||
"binance",
|
||||
"okx",
|
||||
"bybit",
|
||||
// "gate.io" is too generic — the ".io" TLD suffix is common in app names
|
||||
// (e.g., "Draw.io"). Needs bundle-ID matching once verified.
|
||||
"phemex",
|
||||
"stormgain",
|
||||
"crypto.com",
|
||||
// "exodus" is too generic — it's a common noun and would match unrelated
|
||||
// apps/games. Needs bundle-ID matching once verified.
|
||||
"electrum",
|
||||
"ledger live",
|
||||
"trezor",
|
||||
"guarda",
|
||||
"atomic wallet",
|
||||
"bitpay",
|
||||
"bisq",
|
||||
"koinly",
|
||||
"cointracker",
|
||||
"blockfi",
|
||||
"stripe cli",
|
||||
// Crypto games / metaverse (same trade-execution risk model)
|
||||
"decentraland",
|
||||
"axie infinity",
|
||||
"gods unchained",
|
||||
];
|
||||
|
||||
/**
|
||||
* Display-name substring match. Called when bundle-ID resolution returned
|
||||
* nothing (`resolved === undefined`) or when no bundle-ID deny-list entry
|
||||
* matched. Returns the category for the first matching substring, or null.
|
||||
*
|
||||
* Case-insensitive, substring — so `"Google Chrome"`, `"chrome"`, and
|
||||
* `"Chrome Canary"` all match the `"chrome"` entry.
|
||||
*/
|
||||
export function getDeniedCategoryByDisplayName(
|
||||
name: string,
|
||||
): DeniedCategory | null {
|
||||
const lower = name.toLowerCase();
|
||||
// Trading first — proper-noun-only set, most specific. "Bloomberg Terminal"
|
||||
// contains "terminal" and would miscategorize if TERMINAL_NAME_SUBSTRINGS
|
||||
// ran first.
|
||||
for (const sub of TRADING_NAME_SUBSTRINGS) {
|
||||
if (lower.includes(sub)) return "trading";
|
||||
}
|
||||
for (const sub of BROWSER_NAME_SUBSTRINGS) {
|
||||
if (lower.includes(sub)) return "browser";
|
||||
}
|
||||
for (const sub of TERMINAL_NAME_SUBSTRINGS) {
|
||||
if (lower.includes(sub)) return "terminal";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Combined check — bundle ID first (exact, fast), then display-name
|
||||
* fallback. This is the function tool-call handlers should use.
|
||||
*
|
||||
* `bundleId` may be undefined (unresolved request — model asked for an app
|
||||
* that isn't installed or Spotlight didn't find). In that case only the
|
||||
* display-name check runs.
|
||||
*/
|
||||
export function getDeniedCategoryForApp(
|
||||
bundleId: string | undefined,
|
||||
displayName: string,
|
||||
): DeniedCategory | null {
|
||||
if (bundleId) {
|
||||
const byId = getDeniedCategory(bundleId);
|
||||
if (byId) return byId;
|
||||
}
|
||||
return getDeniedCategoryByDisplayName(displayName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Default tier for an app at grant time. Wraps `getDeniedCategoryForApp` +
|
||||
* `categoryToTier`. Browsers → `"read"`, terminals/IDEs → `"click"`,
|
||||
* everything else → `"full"`.
|
||||
*
|
||||
* Called by `buildAccessRequest` to populate `ResolvedAppRequest.proposedTier`
|
||||
* before the approval dialog shows.
|
||||
*/
|
||||
export function getDefaultTierForApp(
|
||||
bundleId: string | undefined,
|
||||
displayName: string,
|
||||
): "read" | "click" | "full" {
|
||||
return categoryToTier(getDeniedCategoryForApp(bundleId, displayName));
|
||||
}
|
||||
|
||||
export const _test = {
|
||||
BROWSER_BUNDLE_IDS,
|
||||
TERMINAL_BUNDLE_IDS,
|
||||
TRADING_BUNDLE_IDS,
|
||||
POLICY_DENIED_BUNDLE_IDS,
|
||||
BROWSER_NAME_SUBSTRINGS,
|
||||
TERMINAL_NAME_SUBSTRINGS,
|
||||
TRADING_NAME_SUBSTRINGS,
|
||||
POLICY_DENIED_NAME_SUBSTRINGS,
|
||||
};
|
||||
108
restored-src/node_modules/@ant/computer-use-mcp/src/imageResize.ts
generated
vendored
Normal file
108
restored-src/node_modules/@ant/computer-use-mcp/src/imageResize.ts
generated
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Port of the API's image transcoder target-size algorithm. Pre-sizing
|
||||
* screenshots to this function's output means the API's early-return fires
|
||||
* (tokens ≤ max) and the image is NOT resized server-side — so the model
|
||||
* sees exactly the dimensions in `ScreenshotResult.width/height` and
|
||||
* `scaleCoord` stays coherent.
|
||||
*
|
||||
* Rust reference: api/api/image_transcoder/rust_transcoder/src/utils/resize.rs
|
||||
* Sibling TS port: apps/claude-browser-use/src/utils/imageResize.ts (identical
|
||||
* algorithm, lives in the Chrome extension tree — not a shared package).
|
||||
*
|
||||
* See COORDINATES.md for why this matters for click accuracy.
|
||||
*/
|
||||
|
||||
export interface ResizeParams {
|
||||
pxPerToken: number;
|
||||
maxTargetPx: number;
|
||||
maxTargetTokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Production defaults — match `resize.rs:160-164` and Chrome's
|
||||
* `CDPService.ts:638-642`. Vision encoder uses 28px tiles; 1568 is both
|
||||
* the long-edge cap (56 tiles) AND the token budget.
|
||||
*/
|
||||
export const API_RESIZE_PARAMS: ResizeParams = {
|
||||
pxPerToken: 28,
|
||||
maxTargetPx: 1568,
|
||||
maxTargetTokens: 1568,
|
||||
};
|
||||
|
||||
/** ceil(px / pxPerToken). Matches resize.rs:74-76 (which uses integer ceil-div). */
|
||||
export function nTokensForPx(px: number, pxPerToken: number): number {
|
||||
return Math.floor((px - 1) / pxPerToken) + 1;
|
||||
}
|
||||
|
||||
function nTokensForImg(
|
||||
width: number,
|
||||
height: number,
|
||||
pxPerToken: number,
|
||||
): number {
|
||||
return nTokensForPx(width, pxPerToken) * nTokensForPx(height, pxPerToken);
|
||||
}
|
||||
|
||||
/**
|
||||
* Binary-search along the width dimension for the largest image that:
|
||||
* - preserves the input aspect ratio
|
||||
* - has long edge ≤ maxTargetPx
|
||||
* - has ceil(w/pxPerToken) × ceil(h/pxPerToken) ≤ maxTargetTokens
|
||||
*
|
||||
* Returns [width, height]. No-op if input already satisfies all three.
|
||||
*
|
||||
* The long-edge constraint alone (what we used to use) is insufficient on
|
||||
* squarer-than-16:9 displays: 1568×1014 (MBP 16" AR) is 56×37 = 2072 tokens,
|
||||
* over budget, and gets server-resized to 1372×887 — model then clicks in
|
||||
* 1372-space but scaleCoord assumed 1568-space → ~14% coord error.
|
||||
*
|
||||
* Matches resize.rs:91-155 exactly (verified against its test vectors).
|
||||
*/
|
||||
export function targetImageSize(
|
||||
width: number,
|
||||
height: number,
|
||||
params: ResizeParams,
|
||||
): [number, number] {
|
||||
const { pxPerToken, maxTargetPx, maxTargetTokens } = params;
|
||||
|
||||
if (
|
||||
width <= maxTargetPx &&
|
||||
height <= maxTargetPx &&
|
||||
nTokensForImg(width, height, pxPerToken) <= maxTargetTokens
|
||||
) {
|
||||
return [width, height];
|
||||
}
|
||||
|
||||
// Normalize to landscape for the search; transpose result back.
|
||||
if (height > width) {
|
||||
const [w, h] = targetImageSize(height, width, params);
|
||||
return [h, w];
|
||||
}
|
||||
|
||||
const aspectRatio = width / height;
|
||||
|
||||
// Loop invariant: lowerBoundWidth is always valid, upperBoundWidth is
|
||||
// always invalid. ~12 iterations for a 4000px image.
|
||||
let upperBoundWidth = width;
|
||||
let lowerBoundWidth = 1;
|
||||
|
||||
for (;;) {
|
||||
if (lowerBoundWidth + 1 === upperBoundWidth) {
|
||||
return [
|
||||
lowerBoundWidth,
|
||||
Math.max(Math.round(lowerBoundWidth / aspectRatio), 1),
|
||||
];
|
||||
}
|
||||
|
||||
const middleWidth = Math.floor((lowerBoundWidth + upperBoundWidth) / 2);
|
||||
const middleHeight = Math.max(Math.round(middleWidth / aspectRatio), 1);
|
||||
|
||||
if (
|
||||
middleWidth <= maxTargetPx &&
|
||||
nTokensForImg(middleWidth, middleHeight, pxPerToken) <= maxTargetTokens
|
||||
) {
|
||||
lowerBoundWidth = middleWidth;
|
||||
} else {
|
||||
upperBoundWidth = middleWidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
69
restored-src/node_modules/@ant/computer-use-mcp/src/index.ts
generated
vendored
Normal file
69
restored-src/node_modules/@ant/computer-use-mcp/src/index.ts
generated
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
export type {
|
||||
ComputerExecutor,
|
||||
DisplayGeometry,
|
||||
FrontmostApp,
|
||||
InstalledApp,
|
||||
ResolvePrepareCaptureResult,
|
||||
RunningApp,
|
||||
ScreenshotResult,
|
||||
} from "./executor.js";
|
||||
|
||||
export type {
|
||||
AppGrant,
|
||||
CuAppPermTier,
|
||||
ComputerUseHostAdapter,
|
||||
ComputerUseOverrides,
|
||||
ComputerUseSessionContext,
|
||||
CoordinateMode,
|
||||
CuGrantFlags,
|
||||
CuPermissionRequest,
|
||||
CuPermissionResponse,
|
||||
CuSubGates,
|
||||
CuTeachPermissionRequest,
|
||||
Logger,
|
||||
ResolvedAppRequest,
|
||||
ScreenshotDims,
|
||||
TeachStepRequest,
|
||||
TeachStepResult,
|
||||
} from "./types.js";
|
||||
|
||||
export { DEFAULT_GRANT_FLAGS } from "./types.js";
|
||||
|
||||
export {
|
||||
SENTINEL_BUNDLE_IDS,
|
||||
getSentinelCategory,
|
||||
} from "./sentinelApps.js";
|
||||
export type { SentinelCategory } from "./sentinelApps.js";
|
||||
|
||||
export {
|
||||
categoryToTier,
|
||||
getDefaultTierForApp,
|
||||
getDeniedCategory,
|
||||
getDeniedCategoryByDisplayName,
|
||||
getDeniedCategoryForApp,
|
||||
isPolicyDenied,
|
||||
} from "./deniedApps.js";
|
||||
export type { DeniedCategory } from "./deniedApps.js";
|
||||
|
||||
export { isSystemKeyCombo, normalizeKeySequence } from "./keyBlocklist.js";
|
||||
|
||||
export { ALL_SUB_GATES_OFF, ALL_SUB_GATES_ON } from "./subGates.js";
|
||||
|
||||
export { API_RESIZE_PARAMS, targetImageSize } from "./imageResize.js";
|
||||
export type { ResizeParams } from "./imageResize.js";
|
||||
|
||||
export { defersLockAcquire, handleToolCall } from "./toolCalls.js";
|
||||
export type {
|
||||
CuCallTelemetry,
|
||||
CuCallToolResult,
|
||||
CuErrorKind,
|
||||
} from "./toolCalls.js";
|
||||
|
||||
export { bindSessionContext, createComputerUseMcpServer } from "./mcpServer.js";
|
||||
export { buildComputerUseTools } from "./tools.js";
|
||||
|
||||
export {
|
||||
comparePixelAtLocation,
|
||||
validateClickTarget,
|
||||
} from "./pixelCompare.js";
|
||||
export type { CropRawPatchFn, PixelCompareResult } from "./pixelCompare.js";
|
||||
153
restored-src/node_modules/@ant/computer-use-mcp/src/keyBlocklist.ts
generated
vendored
Normal file
153
restored-src/node_modules/@ant/computer-use-mcp/src/keyBlocklist.ts
generated
vendored
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Key combos that cross app boundaries or terminate processes. Gated behind
|
||||
* the `systemKeyCombos` grant flag. When that flag is off, the `key` tool
|
||||
* rejects these and returns a tool error telling the model to request the
|
||||
* flag; all other combos work normally.
|
||||
*
|
||||
* Matching is canonicalized: every modifier alias the Rust executor accepts
|
||||
* collapses to one canonical name. Without this, `command+q` / `meta+q` /
|
||||
* `cmd+alt+escape` bypass the gate — see keyBlocklist.test.ts for the three
|
||||
* bypass forms and the Rust parity check that catches future alias drift.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Every modifier alias enigo_wrap.rs accepts (two copies: :351-359, :564-572),
|
||||
* mapped to one canonical per Key:: variant. Left/right variants collapse —
|
||||
* the blocklist doesn't distinguish which Ctrl.
|
||||
*
|
||||
* Canonical names are Rust's own variant names lowercased. Blocklist entries
|
||||
* below use ONLY these. "meta" reads odd for Cmd+Q but it's honest: Rust
|
||||
* sends Key::Meta, which is Cmd on darwin and Win on win32.
|
||||
*/
|
||||
const CANONICAL_MODIFIER: Readonly<Record<string, string>> = {
|
||||
// Key::Meta — "meta"|"super"|"command"|"cmd"|"windows"|"win"
|
||||
meta: "meta",
|
||||
super: "meta",
|
||||
command: "meta",
|
||||
cmd: "meta",
|
||||
windows: "meta",
|
||||
win: "meta",
|
||||
// Key::Control + LControl + RControl
|
||||
ctrl: "ctrl",
|
||||
control: "ctrl",
|
||||
lctrl: "ctrl",
|
||||
lcontrol: "ctrl",
|
||||
rctrl: "ctrl",
|
||||
rcontrol: "ctrl",
|
||||
// Key::Shift + LShift + RShift
|
||||
shift: "shift",
|
||||
lshift: "shift",
|
||||
rshift: "shift",
|
||||
// Key::Alt and Key::Option — distinct Rust variants but same keycode on
|
||||
// darwin (kVK_Option). Collapse: cmd+alt+escape and cmd+option+escape
|
||||
// both Force Quit.
|
||||
alt: "alt",
|
||||
option: "alt",
|
||||
};
|
||||
|
||||
/** Sort order for canonicals. ctrl < alt < shift < meta. */
|
||||
const MODIFIER_ORDER = ["ctrl", "alt", "shift", "meta"];
|
||||
|
||||
/**
|
||||
* Canonical-form entries only. Every modifier must be a CANONICAL_MODIFIER
|
||||
* *value* (not key), modifiers must be in MODIFIER_ORDER, non-modifier last.
|
||||
* The self-consistency test enforces this.
|
||||
*/
|
||||
const BLOCKED_DARWIN = new Set([
|
||||
"meta+q", // Cmd+Q — quit frontmost app
|
||||
"shift+meta+q", // Cmd+Shift+Q — log out
|
||||
"alt+meta+escape", // Cmd+Option+Esc — Force Quit dialog
|
||||
"meta+tab", // Cmd+Tab — app switcher
|
||||
"meta+space", // Cmd+Space — Spotlight
|
||||
"ctrl+meta+q", // Ctrl+Cmd+Q — lock screen
|
||||
]);
|
||||
|
||||
const BLOCKED_WIN32 = new Set([
|
||||
"ctrl+alt+delete", // Secure Attention Sequence
|
||||
"alt+f4", // close window
|
||||
"alt+tab", // window switcher
|
||||
"meta+l", // Win+L — lock
|
||||
"meta+d", // Win+D — show desktop
|
||||
]);
|
||||
|
||||
/**
|
||||
* Partition into sorted-canonical modifiers and non-modifier keys.
|
||||
* Shared by normalizeKeySequence (join for display) and isSystemKeyCombo
|
||||
* (check mods+each-key to catch the cmd+q+a suffix bypass).
|
||||
*/
|
||||
function partitionKeys(seq: string): { mods: string[]; keys: string[] } {
|
||||
const parts = seq
|
||||
.toLowerCase()
|
||||
.split("+")
|
||||
.map((p) => p.trim())
|
||||
.filter(Boolean);
|
||||
const mods: string[] = [];
|
||||
const keys: string[] = [];
|
||||
for (const p of parts) {
|
||||
const canonical = CANONICAL_MODIFIER[p];
|
||||
if (canonical !== undefined) {
|
||||
mods.push(canonical);
|
||||
} else {
|
||||
keys.push(p);
|
||||
}
|
||||
}
|
||||
// Dedupe: "cmd+command+q" → "meta+q", not "meta+meta+q".
|
||||
const uniqueMods = [...new Set(mods)];
|
||||
uniqueMods.sort(
|
||||
(a, b) => MODIFIER_ORDER.indexOf(a) - MODIFIER_ORDER.indexOf(b),
|
||||
);
|
||||
return { mods: uniqueMods, keys };
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize "Cmd + Shift + Q" → "shift+meta+q": lowercase, trim, alias →
|
||||
* canonical, dedupe, sort modifiers, non-modifiers last.
|
||||
*/
|
||||
export function normalizeKeySequence(seq: string): string {
|
||||
const { mods, keys } = partitionKeys(seq);
|
||||
return [...mods, ...keys].join("+");
|
||||
}
|
||||
|
||||
/**
|
||||
* True if the sequence would fire a blocked OS shortcut.
|
||||
*
|
||||
* Checks mods + EACH non-modifier key individually, not just the full
|
||||
* joined string. `cmd+q+a` → Rust presses Cmd, then Q (Cmd+Q fires here),
|
||||
* then A. Exact-match against "meta+q+a" misses; checking "meta+q" and
|
||||
* "meta+a" separately catches the Q.
|
||||
*
|
||||
* Modifiers-only sequences ("cmd+shift") are checked as-is — no key to
|
||||
* pair with, and no blocklist entry is modifier-only, so this is a no-op
|
||||
* that falls through to false. Covers the click-modifier case where
|
||||
* `left_click(text="cmd")` is legitimate.
|
||||
*/
|
||||
export function isSystemKeyCombo(
|
||||
seq: string,
|
||||
platform: "darwin" | "win32",
|
||||
): boolean {
|
||||
const blocklist = platform === "darwin" ? BLOCKED_DARWIN : BLOCKED_WIN32;
|
||||
const { mods, keys } = partitionKeys(seq);
|
||||
const prefix = mods.length > 0 ? mods.join("+") + "+" : "";
|
||||
|
||||
// No non-modifier keys (e.g. "cmd+shift" as click-modifiers) — check the
|
||||
// whole thing. Never matches (no blocklist entry is modifier-only) but
|
||||
// keeps the contract simple: every call reaches a .has().
|
||||
if (keys.length === 0) {
|
||||
return blocklist.has(mods.join("+"));
|
||||
}
|
||||
|
||||
// mods + each key. Any hit blocks the whole sequence.
|
||||
for (const key of keys) {
|
||||
if (blocklist.has(prefix + key)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export const _test = {
|
||||
CANONICAL_MODIFIER,
|
||||
BLOCKED_DARWIN,
|
||||
BLOCKED_WIN32,
|
||||
MODIFIER_ORDER,
|
||||
};
|
||||
313
restored-src/node_modules/@ant/computer-use-mcp/src/mcpServer.ts
generated
vendored
Normal file
313
restored-src/node_modules/@ant/computer-use-mcp/src/mcpServer.ts
generated
vendored
Normal file
@@ -0,0 +1,313 @@
|
||||
/**
|
||||
* MCP server factory + session-context binder.
|
||||
*
|
||||
* Two entry points:
|
||||
*
|
||||
* `bindSessionContext` — the wrapper closure. Takes a `ComputerUseSessionContext`
|
||||
* (getters + callbacks backed by host session state), returns a dispatcher.
|
||||
* Reusable by both the MCP CallTool handler here AND Cowork's
|
||||
* `InternalServerDefinition.handleToolCall` (which doesn't go through MCP).
|
||||
* This replaces the duplicated wrapper closures in apps/desktop/…/serverDef.ts
|
||||
* and the Claude Code CLI's CU host wrapper — both did the same thing: build `ComputerUseOverrides`
|
||||
* fresh from getters, call `handleToolCall`, stash screenshot, merge permissions.
|
||||
*
|
||||
* `createComputerUseMcpServer` — the Server object. When `context` is provided,
|
||||
* the CallTool handler is real (uses `bindSessionContext`). When not, it's the
|
||||
* legacy stub that returns a not-wired error. The tool-schema ListTools handler
|
||||
* is the same either way.
|
||||
*/
|
||||
|
||||
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
||||
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
|
||||
import {
|
||||
CallToolRequestSchema,
|
||||
ListToolsRequestSchema,
|
||||
} from "@modelcontextprotocol/sdk/types.js";
|
||||
|
||||
import type { ScreenshotResult } from "./executor.js";
|
||||
import type { CuCallToolResult } from "./toolCalls.js";
|
||||
import {
|
||||
defersLockAcquire,
|
||||
handleToolCall,
|
||||
resetMouseButtonHeld,
|
||||
} from "./toolCalls.js";
|
||||
import { buildComputerUseTools } from "./tools.js";
|
||||
import type {
|
||||
AppGrant,
|
||||
ComputerUseHostAdapter,
|
||||
ComputerUseOverrides,
|
||||
ComputerUseSessionContext,
|
||||
CoordinateMode,
|
||||
CuGrantFlags,
|
||||
CuPermissionResponse,
|
||||
} from "./types.js";
|
||||
import { DEFAULT_GRANT_FLAGS } from "./types.js";
|
||||
|
||||
const DEFAULT_LOCK_HELD_MESSAGE =
|
||||
"Another Claude session is currently using the computer. Wait for that " +
|
||||
"session to finish, or find a non-computer-use approach.";
|
||||
|
||||
/**
|
||||
* Dedupe `granted` into `existing` on bundleId, spread truthy-only flags over
|
||||
* defaults+existing. Truthy-only: a subsequent `request_access` that doesn't
|
||||
* request clipboard can't revoke an earlier clipboard grant — revocation lives
|
||||
* in a Settings page, not here.
|
||||
*
|
||||
* Same merge both hosts implemented independently today.
|
||||
*/
|
||||
function mergePermissionResponse(
|
||||
existing: readonly AppGrant[],
|
||||
existingFlags: CuGrantFlags,
|
||||
response: CuPermissionResponse,
|
||||
): { apps: AppGrant[]; flags: CuGrantFlags } {
|
||||
const seen = new Set(existing.map((a) => a.bundleId));
|
||||
const apps = [
|
||||
...existing,
|
||||
...response.granted.filter((g) => !seen.has(g.bundleId)),
|
||||
];
|
||||
const truthyFlags = Object.fromEntries(
|
||||
Object.entries(response.flags).filter(([, v]) => v === true),
|
||||
);
|
||||
const flags: CuGrantFlags = {
|
||||
...DEFAULT_GRANT_FLAGS,
|
||||
...existingFlags,
|
||||
...truthyFlags,
|
||||
};
|
||||
return { apps, flags };
|
||||
}
|
||||
|
||||
/**
|
||||
* Bind session state to a reusable dispatcher. The returned function is the
|
||||
* wrapper closure: async lock gate → build overrides fresh → `handleToolCall`
|
||||
* → stash screenshot → strip piggybacked fields.
|
||||
*
|
||||
* The last-screenshot blob is held in a closure cell here (not on `ctx`), so
|
||||
* hosts don't need to guarantee `ctx` object identity across calls — they just
|
||||
* need to hold onto the returned dispatcher. Cowork caches per
|
||||
* `InternalServerContext` in a WeakMap; the CLI host constructs once at server creation.
|
||||
*/
|
||||
export function bindSessionContext(
|
||||
adapter: ComputerUseHostAdapter,
|
||||
coordinateMode: CoordinateMode,
|
||||
ctx: ComputerUseSessionContext,
|
||||
): (name: string, args: unknown) => Promise<CuCallToolResult> {
|
||||
const { logger, serverName } = adapter;
|
||||
|
||||
// Screenshot blob persists here across calls — NOT on `ctx`. Hosts hold
|
||||
// onto the returned dispatcher; that's the identity that matters.
|
||||
let lastScreenshot: ScreenshotResult | undefined;
|
||||
|
||||
const wrapPermission = ctx.onPermissionRequest
|
||||
? async (
|
||||
req: Parameters<NonNullable<typeof ctx.onPermissionRequest>>[0],
|
||||
signal: AbortSignal,
|
||||
): Promise<CuPermissionResponse> => {
|
||||
const response = await ctx.onPermissionRequest!(req, signal);
|
||||
const { apps, flags } = mergePermissionResponse(
|
||||
ctx.getAllowedApps(),
|
||||
ctx.getGrantFlags(),
|
||||
response,
|
||||
);
|
||||
logger.debug(
|
||||
`[${serverName}] permission result: granted=${response.granted.length} denied=${response.denied.length}`,
|
||||
);
|
||||
ctx.onAllowedAppsChanged?.(apps, flags);
|
||||
return response;
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const wrapTeachPermission = ctx.onTeachPermissionRequest
|
||||
? async (
|
||||
req: Parameters<NonNullable<typeof ctx.onTeachPermissionRequest>>[0],
|
||||
signal: AbortSignal,
|
||||
): Promise<CuPermissionResponse> => {
|
||||
const response = await ctx.onTeachPermissionRequest!(req, signal);
|
||||
logger.debug(
|
||||
`[${serverName}] teach permission result: granted=${response.granted.length} denied=${response.denied.length}`,
|
||||
);
|
||||
// Teach doesn't request grant flags — preserve existing.
|
||||
const { apps } = mergePermissionResponse(
|
||||
ctx.getAllowedApps(),
|
||||
ctx.getGrantFlags(),
|
||||
response,
|
||||
);
|
||||
ctx.onAllowedAppsChanged?.(apps, {
|
||||
...DEFAULT_GRANT_FLAGS,
|
||||
...ctx.getGrantFlags(),
|
||||
});
|
||||
return response;
|
||||
}
|
||||
: undefined;
|
||||
|
||||
return async (name, args) => {
|
||||
// ─── Async lock gate ─────────────────────────────────────────────────
|
||||
// Replaces the sync Gate-3 in `handleToolCall` — we pass
|
||||
// `checkCuLock: undefined` below so it no-ops. Hosts with
|
||||
// cross-process locks (O_EXCL file) await the real primitive here
|
||||
// instead of pre-computing + feeding a fake sync result.
|
||||
if (ctx.checkCuLock) {
|
||||
const lock = await ctx.checkCuLock();
|
||||
if (lock.holder !== undefined && !lock.isSelf) {
|
||||
const text =
|
||||
ctx.formatLockHeldMessage?.(lock.holder) ?? DEFAULT_LOCK_HELD_MESSAGE;
|
||||
return {
|
||||
content: [{ type: "text", text }],
|
||||
isError: true,
|
||||
telemetry: { error_kind: "cu_lock_held" },
|
||||
};
|
||||
}
|
||||
if (lock.holder === undefined && !defersLockAcquire(name)) {
|
||||
await ctx.acquireCuLock?.();
|
||||
// Re-check: the awaits above yield the microtask queue, so another
|
||||
// session's check+acquire can interleave with ours. Hosts where
|
||||
// acquire is a no-op when already held (Cowork's CuLockManager) give
|
||||
// no signal that we lost — verify we're now the holder before
|
||||
// proceeding. The CLI's O_EXCL file lock would surface this as a throw from
|
||||
// acquire instead; this re-check is a belt-and-suspenders for that
|
||||
// path too.
|
||||
const recheck = await ctx.checkCuLock();
|
||||
if (recheck.holder !== undefined && !recheck.isSelf) {
|
||||
const text =
|
||||
ctx.formatLockHeldMessage?.(recheck.holder) ??
|
||||
DEFAULT_LOCK_HELD_MESSAGE;
|
||||
return {
|
||||
content: [{ type: "text", text }],
|
||||
isError: true,
|
||||
telemetry: { error_kind: "cu_lock_held" },
|
||||
};
|
||||
}
|
||||
// Fresh holder → any prior session's mouseButtonHeld is stale.
|
||||
// Mirrors what Gate-3 does on the acquire branch. After the
|
||||
// re-check so we only clear module state when we actually won.
|
||||
resetMouseButtonHeld();
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Build overrides fresh ───────────────────────────────────────────
|
||||
// Blob-first; dims-fallback with base64:"" when the closure cell is
|
||||
// unset (cross-respawn). scaleCoord reads dims; pixelCompare sees "" →
|
||||
// isEmpty → skip.
|
||||
const dimsFallback = lastScreenshot
|
||||
? undefined
|
||||
: ctx.getLastScreenshotDims?.();
|
||||
|
||||
// Per-call AbortController for dialog dismissal. Aborted in `finally` —
|
||||
// if handleToolCall finishes (MCP timeout, throw) before the user
|
||||
// answers, the host's dialog handler sees the abort and tears down.
|
||||
const dialogAbort = new AbortController();
|
||||
|
||||
const overrides: ComputerUseOverrides = {
|
||||
allowedApps: [...ctx.getAllowedApps()],
|
||||
grantFlags: ctx.getGrantFlags(),
|
||||
userDeniedBundleIds: ctx.getUserDeniedBundleIds(),
|
||||
coordinateMode,
|
||||
selectedDisplayId: ctx.getSelectedDisplayId(),
|
||||
displayPinnedByModel: ctx.getDisplayPinnedByModel?.(),
|
||||
displayResolvedForApps: ctx.getDisplayResolvedForApps?.(),
|
||||
lastScreenshot:
|
||||
lastScreenshot ??
|
||||
(dimsFallback ? { ...dimsFallback, base64: "" } : undefined),
|
||||
onPermissionRequest: wrapPermission
|
||||
? (req) => wrapPermission(req, dialogAbort.signal)
|
||||
: undefined,
|
||||
onTeachPermissionRequest: wrapTeachPermission
|
||||
? (req) => wrapTeachPermission(req, dialogAbort.signal)
|
||||
: undefined,
|
||||
onAppsHidden: ctx.onAppsHidden,
|
||||
getClipboardStash: ctx.getClipboardStash,
|
||||
onClipboardStashChanged: ctx.onClipboardStashChanged,
|
||||
onResolvedDisplayUpdated: ctx.onResolvedDisplayUpdated,
|
||||
onDisplayPinned: ctx.onDisplayPinned,
|
||||
onDisplayResolvedForApps: ctx.onDisplayResolvedForApps,
|
||||
onTeachModeActivated: ctx.onTeachModeActivated,
|
||||
onTeachStep: ctx.onTeachStep,
|
||||
onTeachWorking: ctx.onTeachWorking,
|
||||
getTeachModeActive: ctx.getTeachModeActive,
|
||||
// Undefined → handleToolCall's sync Gate-3 no-ops. The async gate
|
||||
// above already ran.
|
||||
checkCuLock: undefined,
|
||||
acquireCuLock: undefined,
|
||||
isAborted: ctx.isAborted,
|
||||
};
|
||||
|
||||
logger.debug(
|
||||
`[${serverName}] tool=${name} allowedApps=${overrides.allowedApps.length} coordMode=${coordinateMode}`,
|
||||
);
|
||||
|
||||
// ─── Dispatch ────────────────────────────────────────────────────────
|
||||
try {
|
||||
const result = await handleToolCall(adapter, name, args, overrides);
|
||||
|
||||
if (result.screenshot) {
|
||||
lastScreenshot = result.screenshot;
|
||||
const { base64: _blob, ...dims } = result.screenshot;
|
||||
logger.debug(`[${serverName}] screenshot dims: ${JSON.stringify(dims)}`);
|
||||
ctx.onScreenshotCaptured?.(dims);
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
dialogAbort.abort();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
export function createComputerUseMcpServer(
|
||||
adapter: ComputerUseHostAdapter,
|
||||
coordinateMode: CoordinateMode,
|
||||
context?: ComputerUseSessionContext,
|
||||
): Server {
|
||||
const { serverName, logger } = adapter;
|
||||
|
||||
const server = new Server(
|
||||
{ name: serverName, version: "0.1.3" },
|
||||
{ capabilities: { tools: {}, logging: {} } },
|
||||
);
|
||||
|
||||
const tools = buildComputerUseTools(
|
||||
adapter.executor.capabilities,
|
||||
coordinateMode,
|
||||
);
|
||||
|
||||
server.setRequestHandler(ListToolsRequestSchema, async () =>
|
||||
adapter.isDisabled() ? { tools: [] } : { tools },
|
||||
);
|
||||
|
||||
if (context) {
|
||||
const dispatch = bindSessionContext(adapter, coordinateMode, context);
|
||||
server.setRequestHandler(
|
||||
CallToolRequestSchema,
|
||||
async (request): Promise<CallToolResult> => {
|
||||
const { screenshot: _s, telemetry: _t, ...result } = await dispatch(
|
||||
request.params.name,
|
||||
request.params.arguments ?? {},
|
||||
);
|
||||
return result;
|
||||
},
|
||||
);
|
||||
return server;
|
||||
}
|
||||
|
||||
// Legacy: no context → stub handler. Reached only if something calls the
|
||||
// server over MCP transport WITHOUT going through a binder (a wiring
|
||||
// regression). Clear error instead of silent failure.
|
||||
server.setRequestHandler(
|
||||
CallToolRequestSchema,
|
||||
async (request): Promise<CallToolResult> => {
|
||||
logger.warn(
|
||||
`[${serverName}] tool call "${request.params.name}" reached the stub handler — no session context bound. Per-session state unavailable.`,
|
||||
);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "This computer-use server instance is not wired to a session. Per-session app permissions are not available on this code path.",
|
||||
},
|
||||
],
|
||||
isError: true,
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
return server;
|
||||
}
|
||||
171
restored-src/node_modules/@ant/computer-use-mcp/src/pixelCompare.ts
generated
vendored
Normal file
171
restored-src/node_modules/@ant/computer-use-mcp/src/pixelCompare.ts
generated
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
/**
|
||||
* Staleness guard ported from the Vercept acquisition.
|
||||
*
|
||||
* Compares the model's last-seen screenshot against a fresh-right-now
|
||||
* screenshot at the click target, so the model never clicks pixels it hasn't
|
||||
* seen. If the 9×9 patch around the target differs, the click is aborted and
|
||||
* the model is told to re-screenshot. This is NOT a popup detector.
|
||||
*
|
||||
* Semantics preserved exactly:
|
||||
* - Skip on no `lastScreenshot` (cold start) — click proceeds.
|
||||
* - Skip on any internal error (crop throws, screenshot fails, etc.) —
|
||||
* click proceeds. Validation failure must never block the action.
|
||||
* - 9×9 exact byte equality on raw pixel bytes. No fuzzing, no tolerance.
|
||||
* - Compare in percentage coords so Retina scale doesn't matter.
|
||||
*
|
||||
* JPEG decode + crop is INJECTED via `ComputerUseHostAdapter.cropRawPatch`.
|
||||
* The original used `sharp` (LGPL, native `.node` addon); we inject Electron's
|
||||
* `nativeImage` (Chromium decoders, BSD, nothing to bundle) from the host, so
|
||||
* this package never imports it — the crop is a function parameter.
|
||||
*/
|
||||
|
||||
import type { ScreenshotResult } from "./executor.js";
|
||||
import type { Logger } from "./types.js";
|
||||
|
||||
/** Injected by the host. See `ComputerUseHostAdapter.cropRawPatch`. */
|
||||
export type CropRawPatchFn = (
|
||||
jpegBase64: string,
|
||||
rect: { x: number; y: number; width: number; height: number },
|
||||
) => Buffer | null;
|
||||
|
||||
/** 9×9 is empirically the sweet spot — large enough to catch a tooltip
|
||||
* appearing, small enough to not false-positive on surrounding animation.
|
||||
**/
|
||||
const DEFAULT_GRID_SIZE = 9;
|
||||
|
||||
export interface PixelCompareResult {
|
||||
/** true → click may proceed. false → patch changed, abort the click. */
|
||||
valid: boolean;
|
||||
/** true → validation did not run (cold start, sub-gate off, or internal
|
||||
* error). The caller MUST treat this identically to `valid: true`. */
|
||||
skipped: boolean;
|
||||
/** Populated when valid === false. Returned to the model verbatim. */
|
||||
warning?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the crop rect for a patch centered on (xPercent, yPercent).
|
||||
*
|
||||
* Dimensions come from ScreenshotResult.width/height (physical pixels). Both
|
||||
* screenshots have the same dimensions (same display, consecutive captures),
|
||||
* so the rect is the same for both.
|
||||
*/
|
||||
function computeCropRect(
|
||||
imgW: number,
|
||||
imgH: number,
|
||||
xPercent: number,
|
||||
yPercent: number,
|
||||
gridSize: number,
|
||||
): { x: number; y: number; width: number; height: number } | null {
|
||||
if (!imgW || !imgH) return null;
|
||||
|
||||
const clampedX = Math.max(0, Math.min(100, xPercent));
|
||||
const clampedY = Math.max(0, Math.min(100, yPercent));
|
||||
|
||||
const centerX = Math.round((clampedX / 100.0) * imgW);
|
||||
const centerY = Math.round((clampedY / 100.0) * imgH);
|
||||
|
||||
const halfGrid = Math.floor(gridSize / 2);
|
||||
const cropX = Math.max(0, centerX - halfGrid);
|
||||
const cropY = Math.max(0, centerY - halfGrid);
|
||||
const cropW = Math.min(gridSize, imgW - cropX);
|
||||
const cropH = Math.min(gridSize, imgH - cropY);
|
||||
if (cropW <= 0 || cropH <= 0) return null;
|
||||
|
||||
return { x: cropX, y: cropY, width: cropW, height: cropH };
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare the same patch location between two screenshots.
|
||||
*
|
||||
* @returns true when the raw pixel bytes are identical. false on any
|
||||
* difference, or on any internal error (the caller treats an error here as
|
||||
* `skipped`, so the false is harmless).
|
||||
*/
|
||||
export function comparePixelAtLocation(
|
||||
crop: CropRawPatchFn,
|
||||
lastScreenshot: ScreenshotResult,
|
||||
freshScreenshot: ScreenshotResult,
|
||||
xPercent: number,
|
||||
yPercent: number,
|
||||
gridSize: number = DEFAULT_GRID_SIZE,
|
||||
): boolean {
|
||||
// Both screenshots are of the same display — use the fresh one's
|
||||
// dimensions (less likely to be stale than last's).
|
||||
const rect = computeCropRect(
|
||||
freshScreenshot.width,
|
||||
freshScreenshot.height,
|
||||
xPercent,
|
||||
yPercent,
|
||||
gridSize,
|
||||
);
|
||||
if (!rect) return false;
|
||||
|
||||
const patch1 = crop(lastScreenshot.base64, rect);
|
||||
const patch2 = crop(freshScreenshot.base64, rect);
|
||||
if (!patch1 || !patch2) return false;
|
||||
|
||||
// Direct buffer equality. Note: nativeImage.toBitmap() gives BGRA, sharp's
|
||||
// .raw() gave RGB.
|
||||
// Doesn't matter — we're comparing two same-format buffers for equality.
|
||||
return patch1.equals(patch2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Battle-tested click-target validation ported from the Vercept acquisition,
|
||||
* with the fresh-screenshot capture delegated to the caller (we don't have
|
||||
* a global `SystemActions.takeScreenshot()` — the executor is injected).
|
||||
*
|
||||
* Skip conditions (any of these → `{ valid: true, skipped: true }`):
|
||||
* - `lastScreenshot` is undefined (cold start).
|
||||
* - `takeFreshScreenshot()` throws or returns null.
|
||||
* - Injected crop function returns null (decode failure).
|
||||
* - Any other exception.
|
||||
*
|
||||
* The caller decides whether to invoke this at all (sub-gate check lives
|
||||
* in toolCalls.ts, not here).
|
||||
*/
|
||||
export async function validateClickTarget(
|
||||
crop: CropRawPatchFn,
|
||||
lastScreenshot: ScreenshotResult | undefined,
|
||||
xPercent: number,
|
||||
yPercent: number,
|
||||
takeFreshScreenshot: () => Promise<ScreenshotResult | null>,
|
||||
logger: Logger,
|
||||
gridSize: number = DEFAULT_GRID_SIZE,
|
||||
): Promise<PixelCompareResult> {
|
||||
if (!lastScreenshot) {
|
||||
return { valid: true, skipped: true };
|
||||
}
|
||||
|
||||
try {
|
||||
const fresh = await takeFreshScreenshot();
|
||||
if (!fresh) {
|
||||
return { valid: true, skipped: true };
|
||||
}
|
||||
|
||||
const pixelsMatch = comparePixelAtLocation(
|
||||
crop,
|
||||
lastScreenshot,
|
||||
fresh,
|
||||
xPercent,
|
||||
yPercent,
|
||||
gridSize,
|
||||
);
|
||||
|
||||
if (pixelsMatch) {
|
||||
return { valid: true, skipped: false };
|
||||
}
|
||||
return {
|
||||
valid: false,
|
||||
skipped: false,
|
||||
warning:
|
||||
"Screen content at the target location changed since the last screenshot. Take a new screenshot before clicking.",
|
||||
};
|
||||
} catch (err) {
|
||||
// Skip validation on technical errors, execute action anyway.
|
||||
// Battle-tested: validation failure must never block the click.
|
||||
logger.debug("[pixelCompare] validation error, skipping", err);
|
||||
return { valid: true, skipped: true };
|
||||
}
|
||||
}
|
||||
43
restored-src/node_modules/@ant/computer-use-mcp/src/sentinelApps.ts
generated
vendored
Normal file
43
restored-src/node_modules/@ant/computer-use-mcp/src/sentinelApps.ts
generated
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Bundle IDs that are escalations-in-disguise. The approval UI shows a warning
|
||||
* badge for these; they are NOT blocked. Power users may legitimately want the
|
||||
* model controlling a terminal.
|
||||
*
|
||||
* Imported by the renderer via the `./sentinelApps` subpath (package.json
|
||||
* `exports`), which keeps Next.js from reaching index.ts → mcpServer.ts →
|
||||
* @modelcontextprotocol/sdk (devDep, would fail module resolution). Keep
|
||||
* this file import-free so the subpath stays clean.
|
||||
*/
|
||||
|
||||
/** These apps can execute arbitrary shell commands. */
|
||||
const SHELL_ACCESS_BUNDLE_IDS = new Set([
|
||||
"com.apple.Terminal",
|
||||
"com.googlecode.iterm2",
|
||||
"com.microsoft.VSCode",
|
||||
"dev.warp.Warp-Stable",
|
||||
"com.github.wez.wezterm",
|
||||
"io.alacritty",
|
||||
"net.kovidgoyal.kitty",
|
||||
"com.jetbrains.intellij",
|
||||
"com.jetbrains.pycharm",
|
||||
]);
|
||||
|
||||
/** Finder in the allowlist ≈ browse + open-any-file. */
|
||||
const FILESYSTEM_ACCESS_BUNDLE_IDS = new Set(["com.apple.finder"]);
|
||||
|
||||
const SYSTEM_SETTINGS_BUNDLE_IDS = new Set(["com.apple.systempreferences"]);
|
||||
|
||||
export const SENTINEL_BUNDLE_IDS: ReadonlySet<string> = new Set([
|
||||
...SHELL_ACCESS_BUNDLE_IDS,
|
||||
...FILESYSTEM_ACCESS_BUNDLE_IDS,
|
||||
...SYSTEM_SETTINGS_BUNDLE_IDS,
|
||||
]);
|
||||
|
||||
export type SentinelCategory = "shell" | "filesystem" | "system_settings";
|
||||
|
||||
export function getSentinelCategory(bundleId: string): SentinelCategory | null {
|
||||
if (SHELL_ACCESS_BUNDLE_IDS.has(bundleId)) return "shell";
|
||||
if (FILESYSTEM_ACCESS_BUNDLE_IDS.has(bundleId)) return "filesystem";
|
||||
if (SYSTEM_SETTINGS_BUNDLE_IDS.has(bundleId)) return "system_settings";
|
||||
return null;
|
||||
}
|
||||
3649
restored-src/node_modules/@ant/computer-use-mcp/src/toolCalls.ts
generated
vendored
Normal file
3649
restored-src/node_modules/@ant/computer-use-mcp/src/toolCalls.ts
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
706
restored-src/node_modules/@ant/computer-use-mcp/src/tools.ts
generated
vendored
Normal file
706
restored-src/node_modules/@ant/computer-use-mcp/src/tools.ts
generated
vendored
Normal file
@@ -0,0 +1,706 @@
|
||||
/**
|
||||
* MCP tool schemas for the computer-use server. Mirrors
|
||||
* claude-for-chrome-mcp/src/browserTools.ts in shape (plain `Tool`-shaped
|
||||
* object literals, no zod).
|
||||
*
|
||||
* Coordinate descriptions are baked in at tool-list build time from the
|
||||
* `chicago_coordinate_mode` gate. The model sees exactly ONE coordinate
|
||||
* convention in the param descriptions and never learns the other exists.
|
||||
* The host (`serverDef.ts`) reads the same frozen gate value for
|
||||
* `scaleCoord` — both must agree or clicks land in the wrong space.
|
||||
*/
|
||||
|
||||
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
||||
|
||||
import type { CoordinateMode } from "./types.js";
|
||||
|
||||
// See packages/desktop/computer-use-mcp/COORDINATES.md before touching any
|
||||
// model-facing coordinate text. Chrome's browserTools.ts:143 is the reference
|
||||
// phrasing — "pixels from the left edge", no geometry, no number to do math with.
|
||||
const COORD_DESC: Record<CoordinateMode, { x: string; y: string }> = {
|
||||
pixels: {
|
||||
x: "Horizontal pixel position read directly from the most recent screenshot image, measured from the left edge. The server handles all scaling.",
|
||||
y: "Vertical pixel position read directly from the most recent screenshot image, measured from the top edge. The server handles all scaling.",
|
||||
},
|
||||
normalized_0_100: {
|
||||
x: "Horizontal position as a percentage of screen width, 0.0–100.0 (0 = left edge, 100 = right edge).",
|
||||
y: "Vertical position as a percentage of screen height, 0.0–100.0 (0 = top edge, 100 = bottom edge).",
|
||||
},
|
||||
};
|
||||
|
||||
const FRONTMOST_GATE_DESC =
|
||||
"The frontmost application must be in the session allowlist at the time of this call, or this tool returns an error and does nothing.";
|
||||
|
||||
/**
|
||||
* Item schema for the `actions` array in `computer_batch`, `teach_step`, and
|
||||
* `teach_batch`. All three dispatch through the same `dispatchAction` path
|
||||
* with the same validation — keep this enum in sync with `BATCHABLE_ACTIONS`
|
||||
* in toolCalls.ts.
|
||||
*/
|
||||
const BATCH_ACTION_ITEM_SCHEMA = {
|
||||
type: "object",
|
||||
properties: {
|
||||
action: {
|
||||
type: "string",
|
||||
enum: [
|
||||
"key",
|
||||
"type",
|
||||
"mouse_move",
|
||||
"left_click",
|
||||
"left_click_drag",
|
||||
"right_click",
|
||||
"middle_click",
|
||||
"double_click",
|
||||
"triple_click",
|
||||
"scroll",
|
||||
"hold_key",
|
||||
"screenshot",
|
||||
"cursor_position",
|
||||
"left_mouse_down",
|
||||
"left_mouse_up",
|
||||
"wait",
|
||||
],
|
||||
description: "The action to perform.",
|
||||
},
|
||||
coordinate: {
|
||||
type: "array",
|
||||
items: { type: "number" },
|
||||
minItems: 2,
|
||||
maxItems: 2,
|
||||
description:
|
||||
"(x, y) for click/mouse_move/scroll/left_click_drag end point.",
|
||||
},
|
||||
start_coordinate: {
|
||||
type: "array",
|
||||
items: { type: "number" },
|
||||
minItems: 2,
|
||||
maxItems: 2,
|
||||
description:
|
||||
"(x, y) drag start — left_click_drag only. Omit to drag from current cursor.",
|
||||
},
|
||||
text: {
|
||||
type: "string",
|
||||
description:
|
||||
"For type: the text. For key/hold_key: the chord string. For click/scroll: modifier keys to hold.",
|
||||
},
|
||||
scroll_direction: {
|
||||
type: "string",
|
||||
enum: ["up", "down", "left", "right"],
|
||||
},
|
||||
scroll_amount: { type: "integer", minimum: 0, maximum: 100 },
|
||||
duration: {
|
||||
type: "number",
|
||||
description: "Seconds (0–100). For hold_key/wait.",
|
||||
},
|
||||
repeat: {
|
||||
type: "integer",
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
description: "For key: repeat count.",
|
||||
},
|
||||
},
|
||||
required: ["action"],
|
||||
};
|
||||
|
||||
/**
|
||||
* Build the tool list. Parameterized by capabilities and coordinate mode so
|
||||
* descriptions are honest and unambiguous (plan §1 — "Unfiltered + honest").
|
||||
*
|
||||
* `coordinateMode` MUST match what the host passes to `scaleCoord` at tool-
|
||||
* -call time. Both should read the same frozen-at-load gate constant.
|
||||
*
|
||||
* `installedAppNames` — optional pre-sanitized list of app display names to
|
||||
* enumerate in the `request_access` description. The caller is responsible
|
||||
* for sanitization (length cap, character allowlist, sort, count cap) —
|
||||
* this function just splices the list into the description verbatim. Omit
|
||||
* to fall back to the generic "display names or bundle IDs" wording.
|
||||
*/
|
||||
export function buildComputerUseTools(
|
||||
caps: {
|
||||
screenshotFiltering: "native" | "none";
|
||||
platform: "darwin" | "win32";
|
||||
/** Include request_teach_access + teach_step. Read once at server construction. */
|
||||
teachMode?: boolean;
|
||||
},
|
||||
coordinateMode: CoordinateMode,
|
||||
installedAppNames?: string[],
|
||||
): Tool[] {
|
||||
const coord = COORD_DESC[coordinateMode];
|
||||
|
||||
// Shared hint suffix for BOTH request_access and request_teach_access —
|
||||
// they use the same resolveRequestedApps path, so the model should get
|
||||
// the same enumeration for both.
|
||||
const installedAppsHint =
|
||||
installedAppNames && installedAppNames.length > 0
|
||||
? ` Available applications on this machine: ${installedAppNames.join(", ")}.`
|
||||
: "";
|
||||
|
||||
// [x, y]` tuple — param shape for all
|
||||
// click/move/scroll tools.
|
||||
const coordinateTuple = {
|
||||
type: "array",
|
||||
items: { type: "number" },
|
||||
minItems: 2,
|
||||
maxItems: 2,
|
||||
description: `(x, y): ${coord.x}`,
|
||||
};
|
||||
// Modifier hold during click. Shared across all 5 click variants.
|
||||
const clickModifierText = {
|
||||
type: "string",
|
||||
description:
|
||||
'Modifier keys to hold during the click (e.g. "shift", "ctrl+shift"). Supports the same syntax as the key tool.',
|
||||
};
|
||||
|
||||
const screenshotDesc =
|
||||
caps.screenshotFiltering === "native"
|
||||
? "Take a screenshot of the primary display. Applications not in the session allowlist are excluded at the compositor level — only granted apps and the desktop are visible."
|
||||
: "Take a screenshot of the primary display. On this platform, screenshots are NOT filtered — all open windows are visible. Input actions targeting apps not in the session allowlist are rejected.";
|
||||
|
||||
return [
|
||||
{
|
||||
name: "request_access",
|
||||
description:
|
||||
"Request user permission to control a set of applications for this session. Must be called before any other tool in this server. " +
|
||||
"The user sees a single dialog listing all requested apps and either allows the whole set or denies it. " +
|
||||
"Call this again mid-session to add more apps; previously granted apps remain granted. " +
|
||||
"Returns the granted apps, denied apps, and screenshot filtering capability.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
apps: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description:
|
||||
"Application display names (e.g. \"Slack\", \"Calendar\") or bundle identifiers (e.g. \"com.tinyspeck.slackmacgap\"). Display names are resolved case-insensitively against installed apps." +
|
||||
installedAppsHint,
|
||||
},
|
||||
reason: {
|
||||
type: "string",
|
||||
description:
|
||||
"One-sentence explanation shown to the user in the approval dialog. Explain the task, not the mechanism.",
|
||||
},
|
||||
clipboardRead: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Also request permission to read the user's clipboard (separate checkbox in the dialog).",
|
||||
},
|
||||
clipboardWrite: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Also request permission to write the user's clipboard. When granted, multi-line `type` calls use the clipboard fast path.",
|
||||
},
|
||||
systemKeyCombos: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Also request permission to send system-level key combos (quit app, switch app, lock screen). Without this, those specific combos are blocked.",
|
||||
},
|
||||
},
|
||||
required: ["apps", "reason"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "screenshot",
|
||||
description:
|
||||
screenshotDesc +
|
||||
" Returns an error if the allowlist is empty. The returned image is what subsequent click coordinates are relative to.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
save_to_disk: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image — screenshots you're just looking at don't need saving.",
|
||||
},
|
||||
},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "zoom",
|
||||
description:
|
||||
"Take a higher-resolution screenshot of a specific region of the last full-screen screenshot. Use this liberally to inspect small text, button labels, or fine UI details that are hard to read in the downsampled full-screen image. " +
|
||||
"IMPORTANT: Coordinates in subsequent click calls always refer to the full-screen screenshot, never the zoomed image. This tool is read-only for inspecting detail.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
region: {
|
||||
type: "array",
|
||||
items: { type: "integer" },
|
||||
minItems: 4,
|
||||
maxItems: 4,
|
||||
description:
|
||||
"(x0, y0, x1, y1): Rectangle to zoom into, in the coordinate space of the most recent full-screen screenshot. x0,y0 = top-left, x1,y1 = bottom-right.",
|
||||
},
|
||||
save_to_disk: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image.",
|
||||
},
|
||||
},
|
||||
required: ["region"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "left_click",
|
||||
description: `Left-click at the given coordinates. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
text: clickModifierText,
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "double_click",
|
||||
description: `Double-click at the given coordinates. Selects a word in most text editors. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
text: clickModifierText,
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "triple_click",
|
||||
description: `Triple-click at the given coordinates. Selects a line in most text editors. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
text: clickModifierText,
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "right_click",
|
||||
description: `Right-click at the given coordinates. Opens a context menu in most applications. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
text: clickModifierText,
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "middle_click",
|
||||
description: `Middle-click (scroll-wheel click) at the given coordinates. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
text: clickModifierText,
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "type",
|
||||
description: `Type text into whatever currently has keyboard focus. ${FRONTMOST_GATE_DESC} Newlines are supported. For keyboard shortcuts use \`key\` instead.`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
text: { type: "string", description: "Text to type." },
|
||||
},
|
||||
required: ["text"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "key",
|
||||
description:
|
||||
`Press a key or key combination (e.g. "return", "escape", "cmd+a", "ctrl+shift+tab"). ${FRONTMOST_GATE_DESC} ` +
|
||||
"System-level combos (quit app, switch app, lock screen) require the `systemKeyCombos` grant — without it they return an error. All other combos work.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
text: {
|
||||
type: "string",
|
||||
description: 'Modifiers joined with "+", e.g. "cmd+shift+a".',
|
||||
},
|
||||
repeat: {
|
||||
type: "integer",
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
description: "Number of times to repeat the key press. Default is 1.",
|
||||
},
|
||||
},
|
||||
required: ["text"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "scroll",
|
||||
description: `Scroll at the given coordinates. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
scroll_direction: {
|
||||
type: "string",
|
||||
enum: ["up", "down", "left", "right"],
|
||||
description: "Direction to scroll.",
|
||||
},
|
||||
scroll_amount: {
|
||||
type: "integer",
|
||||
minimum: 0,
|
||||
maximum: 100,
|
||||
description: "Number of scroll ticks.",
|
||||
},
|
||||
},
|
||||
required: ["coordinate", "scroll_direction", "scroll_amount"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "left_click_drag",
|
||||
description: `Press, move to target, and release. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: {
|
||||
...coordinateTuple,
|
||||
description: `(x, y) end point: ${coord.x}`,
|
||||
},
|
||||
start_coordinate: {
|
||||
...coordinateTuple,
|
||||
description: `(x, y) start point. If omitted, drags from the current cursor position. ${coord.x}`,
|
||||
},
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "mouse_move",
|
||||
description: `Move the mouse cursor without clicking. Useful for triggering hover states. ${FRONTMOST_GATE_DESC}`,
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
coordinate: coordinateTuple,
|
||||
},
|
||||
required: ["coordinate"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "open_application",
|
||||
description:
|
||||
"Bring an application to the front, launching it if necessary. The target application must already be in the session allowlist — call request_access first.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
app: {
|
||||
type: "string",
|
||||
description:
|
||||
"Display name (e.g. \"Slack\") or bundle identifier (e.g. \"com.tinyspeck.slackmacgap\").",
|
||||
},
|
||||
},
|
||||
required: ["app"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "switch_display",
|
||||
description:
|
||||
"Switch which monitor subsequent screenshots capture. Use this when the " +
|
||||
"application you need is on a different monitor than the one shown. " +
|
||||
"The screenshot tool tells you which monitor it captured and lists " +
|
||||
"other attached monitors by name — pass one of those names here. " +
|
||||
"After switching, call screenshot to see the new monitor. " +
|
||||
'Pass "auto" to return to automatic monitor selection.',
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
display: {
|
||||
type: "string",
|
||||
description:
|
||||
'Monitor name from the screenshot note (e.g. "Built-in Retina Display", ' +
|
||||
'"LG UltraFine"), or "auto" to re-enable automatic selection.',
|
||||
},
|
||||
},
|
||||
required: ["display"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "list_granted_applications",
|
||||
description:
|
||||
"List the applications currently in the session allowlist, plus the active grant flags and coordinate mode. No side effects.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "read_clipboard",
|
||||
description:
|
||||
"Read the current clipboard contents as text. Requires the `clipboardRead` grant.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "write_clipboard",
|
||||
description:
|
||||
"Write text to the clipboard. Requires the `clipboardWrite` grant.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
text: { type: "string" },
|
||||
},
|
||||
required: ["text"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "wait",
|
||||
description: "Wait for a specified duration.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
duration: {
|
||||
type: "number",
|
||||
description: "Duration in seconds (0–100).",
|
||||
},
|
||||
},
|
||||
required: ["duration"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "cursor_position",
|
||||
description:
|
||||
"Get the current mouse cursor position. Returns image-pixel coordinates relative to the most recent screenshot, or logical points if no screenshot has been taken.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "hold_key",
|
||||
description:
|
||||
`Press and hold a key or key combination for the specified duration, then release. ${FRONTMOST_GATE_DESC} ` +
|
||||
"System-level combos require the `systemKeyCombos` grant.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
text: {
|
||||
type: "string",
|
||||
description: 'Key or chord to hold, e.g. "space", "shift+down".',
|
||||
},
|
||||
duration: {
|
||||
type: "number",
|
||||
description: "Duration in seconds (0–100).",
|
||||
},
|
||||
},
|
||||
required: ["text", "duration"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "left_mouse_down",
|
||||
description:
|
||||
`Press the left mouse button at the current cursor position and leave it held. ${FRONTMOST_GATE_DESC} ` +
|
||||
"Use mouse_move first to position the cursor. Call left_mouse_up to release. Errors if the button is already held.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "left_mouse_up",
|
||||
description:
|
||||
`Release the left mouse button at the current cursor position. ${FRONTMOST_GATE_DESC} ` +
|
||||
"Pairs with left_mouse_down. Safe to call even if the button is not currently held.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {},
|
||||
required: [],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "computer_batch",
|
||||
description:
|
||||
"Execute a sequence of actions in ONE tool call. Each individual tool call requires a model→API round trip (seconds); " +
|
||||
"batching a predictable sequence eliminates all but one. Use this whenever you can predict the outcome of several actions ahead — " +
|
||||
"e.g. click a field, type into it, press Return. Actions execute sequentially and stop on the first error. " +
|
||||
`${FRONTMOST_GATE_DESC} The frontmost check runs before EACH action inside the batch — if an action opens a non-allowed app, the next action's gate fires and the batch stops there. ` +
|
||||
"Mid-batch screenshot actions are allowed for inspection but coordinates in subsequent clicks always refer to the PRE-BATCH full-screen screenshot.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
actions: {
|
||||
type: "array",
|
||||
minItems: 1,
|
||||
items: BATCH_ACTION_ITEM_SCHEMA,
|
||||
description:
|
||||
'List of actions. Example: [{"action":"left_click","coordinate":[100,200]},{"action":"type","text":"hello"},{"action":"key","text":"Return"}]',
|
||||
},
|
||||
},
|
||||
required: ["actions"],
|
||||
},
|
||||
},
|
||||
|
||||
...(caps.teachMode ? buildTeachTools(coord, installedAppsHint) : []),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Teach-mode tools. Split out so the spread above stays a single expression;
|
||||
* takes `coord` so `teach_step.anchor`'s description uses the same
|
||||
* frozen coordinate-mode phrasing as click coords, and `installedAppsHint`
|
||||
* so `request_teach_access.apps` gets the same enumeration as
|
||||
* `request_access.apps` (same resolution path → same hint).
|
||||
*/
|
||||
function buildTeachTools(
|
||||
coord: { x: string; y: string },
|
||||
installedAppsHint: string,
|
||||
): Tool[] {
|
||||
// Shared between teach_step (top-level) and teach_batch (inside steps[]
|
||||
// items). Depends on coord, so it lives inside this factory.
|
||||
const teachStepProperties = {
|
||||
explanation: {
|
||||
type: "string",
|
||||
description:
|
||||
"Tooltip body text. Explain what the user is looking at and why it matters. " +
|
||||
"This is the ONLY place the user sees your words — be complete but concise.",
|
||||
},
|
||||
next_preview: {
|
||||
type: "string",
|
||||
description:
|
||||
"One line describing exactly what will happen when the user clicks Next. " +
|
||||
'Example: "Next: I\'ll click Create Bucket and type the name." ' +
|
||||
"Shown below the explanation in a smaller font.",
|
||||
},
|
||||
anchor: {
|
||||
type: "array",
|
||||
items: { type: "number" },
|
||||
minItems: 2,
|
||||
maxItems: 2,
|
||||
description:
|
||||
`(x, y) — where the tooltip arrow points. ${coord.x} ` +
|
||||
"Omit to center the tooltip with no arrow (for general-context steps).",
|
||||
},
|
||||
actions: {
|
||||
type: "array",
|
||||
// Empty allowed — "read this, click Next" steps.
|
||||
items: BATCH_ACTION_ITEM_SCHEMA,
|
||||
description:
|
||||
"Actions to execute when the user clicks Next. Same item schema as computer_batch.actions. " +
|
||||
"Empty array is valid for purely explanatory steps. Actions run sequentially and stop on first error.",
|
||||
},
|
||||
} as const;
|
||||
|
||||
return [
|
||||
{
|
||||
name: "request_teach_access",
|
||||
description:
|
||||
"Request permission to guide the user through a task step-by-step with on-screen tooltips. " +
|
||||
"Use this INSTEAD OF request_access when the user wants to LEARN how to do something " +
|
||||
'(phrases like "teach me", "walk me through", "show me how", "help me learn"). ' +
|
||||
"On approval the main Claude window hides and a fullscreen tooltip overlay appears. " +
|
||||
"You then call teach_step repeatedly; each call shows one tooltip and waits for the user to click Next. " +
|
||||
"Same app-allowlist semantics as request_access, but no clipboard/system-key flags. " +
|
||||
"Teach mode ends automatically when your turn ends.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
apps: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description:
|
||||
'Application display names (e.g. "Slack", "Calendar") or bundle identifiers. Resolved case-insensitively against installed apps.' +
|
||||
installedAppsHint,
|
||||
},
|
||||
reason: {
|
||||
type: "string",
|
||||
description:
|
||||
'What you will be teaching. Shown in the approval dialog as "Claude wants to guide you through {reason}". Keep it short and task-focused.',
|
||||
},
|
||||
},
|
||||
required: ["apps", "reason"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "teach_step",
|
||||
description:
|
||||
"Show one guided-tour tooltip and wait for the user to click Next. On Next, execute the actions, " +
|
||||
"take a fresh screenshot, and return both — you do NOT need a separate screenshot call between steps. " +
|
||||
"The returned image shows the state after your actions ran; anchor the next teach_step against it. " +
|
||||
"IMPORTANT — the user only sees the tooltip during teach mode. Put ALL narration in `explanation`. " +
|
||||
"Text you emit outside teach_step calls is NOT visible until teach mode ends. " +
|
||||
"Pack as many actions as possible into each step's `actions` array — the user waits through " +
|
||||
"the whole round trip between clicks, so one step that fills a form beats five steps that fill one field each. " +
|
||||
"Returns {exited:true} if the user clicks Exit — do not call teach_step again after that. " +
|
||||
"Take an initial screenshot before your FIRST teach_step to anchor it.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: teachStepProperties,
|
||||
required: ["explanation", "next_preview", "actions"],
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "teach_batch",
|
||||
description:
|
||||
"Queue multiple teach steps in one tool call. Parallels computer_batch: " +
|
||||
"N steps → one model↔API round trip instead of N. Each step still shows a tooltip " +
|
||||
"and waits for the user's Next click, but YOU aren't waiting for a round trip between steps. " +
|
||||
"You can call teach_batch multiple times in one tour — treat each batch as one predictable " +
|
||||
"SEGMENT (typically: all the steps on one page). The returned screenshot shows the state " +
|
||||
"after the batch's final actions; anchor the NEXT teach_batch against it. " +
|
||||
"WITHIN a batch, all anchors and click coordinates refer to the PRE-BATCH screenshot " +
|
||||
"(same invariant as computer_batch) — for steps 2+ in a batch, either omit anchor " +
|
||||
"(centered tooltip) or target elements you know won't have moved. " +
|
||||
"Good pattern: batch 5 tooltips on page A (last step navigates) → read returned screenshot → " +
|
||||
"batch 3 tooltips on page B → done. " +
|
||||
"Returns {exited:true, stepsCompleted:N} if the user clicks Exit — do NOT call again after that; " +
|
||||
"{stepsCompleted, stepFailed, ...} if an action errors mid-batch; " +
|
||||
"otherwise {stepsCompleted, results:[...]} plus a final screenshot. " +
|
||||
"Fall back to individual teach_step calls when you need to react to each intermediate screenshot.",
|
||||
inputSchema: {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
steps: {
|
||||
type: "array",
|
||||
minItems: 1,
|
||||
items: {
|
||||
type: "object",
|
||||
properties: teachStepProperties,
|
||||
required: ["explanation", "next_preview", "actions"],
|
||||
},
|
||||
description:
|
||||
"Ordered steps. Validated upfront — a typo in step 5 errors before any tooltip shows.",
|
||||
},
|
||||
},
|
||||
required: ["steps"],
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
622
restored-src/node_modules/@ant/computer-use-mcp/src/types.ts
generated
vendored
Normal file
622
restored-src/node_modules/@ant/computer-use-mcp/src/types.ts
generated
vendored
Normal file
@@ -0,0 +1,622 @@
|
||||
import type {
|
||||
ComputerExecutor,
|
||||
InstalledApp,
|
||||
ScreenshotResult,
|
||||
} from "./executor.js";
|
||||
|
||||
/** `ScreenshotResult` without the base64 blob. The shape hosts persist for
|
||||
* cross-respawn `scaleCoord` survival. */
|
||||
export type ScreenshotDims = Omit<ScreenshotResult, "base64">;
|
||||
|
||||
/** Shape mirrors claude-for-chrome-mcp/src/types.ts:1-7 */
|
||||
export interface Logger {
|
||||
info: (message: string, ...args: unknown[]) => void;
|
||||
error: (message: string, ...args: unknown[]) => void;
|
||||
warn: (message: string, ...args: unknown[]) => void;
|
||||
debug: (message: string, ...args: unknown[]) => void;
|
||||
silly: (message: string, ...args: unknown[]) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-app permission tier. Hardcoded by category at grant time — the
|
||||
* approval dialog displays the tier but the user cannot change it (for now).
|
||||
*
|
||||
* - `"read"` — visible in screenshots, NO interaction (no clicks, no typing).
|
||||
* Browsers land here: the model can read a page that's already open, but
|
||||
* must use the Claude-in-Chrome MCP for any navigation/clicking. Trading
|
||||
* platforms land here too (no CiC alternative — the model asks the user).
|
||||
* - `"click"` — visible + plain left-click, scroll. NO typing/keys,
|
||||
* NO right/middle-click, NO modifier-clicks, NO drag-drop (all text-
|
||||
* injection vectors). Terminals/IDEs land here: the model can click a
|
||||
* Run button or scroll test output, but `type("rm -rf /")` is blocked
|
||||
* and so is right-click→Paste and dragging text onto the terminal.
|
||||
* - `"full"` — visible + click + type/key/paste. Everything else.
|
||||
*
|
||||
* Enforced in `runInputActionGates` via the frontmost-app check: keyboard
|
||||
* actions require `"full"`, mouse actions require `"click"` or higher.
|
||||
*/
|
||||
export type CuAppPermTier = "read" | "click" | "full";
|
||||
|
||||
/**
|
||||
* A single app the user has approved for the current session. Session-scoped
|
||||
* only — there is no "once" or "forever" scope (unlike Chrome's per-domain
|
||||
* three-way). CU has no natural "once" unit; one task = hundreds of clicks.
|
||||
* Mirrors how `chromeAllowedDomains` is a plain `string[]` with no per-item
|
||||
* scope.
|
||||
*/
|
||||
export interface AppGrant {
|
||||
bundleId: string;
|
||||
displayName: string;
|
||||
/** Epoch ms. For Settings-page display ("Granted 3m ago"). */
|
||||
grantedAt: number;
|
||||
/** Undefined → `"full"` (back-compat for pre-tier grants persisted in
|
||||
* session state). */
|
||||
tier?: CuAppPermTier;
|
||||
}
|
||||
|
||||
/** Orthogonal to the app allowlist. */
|
||||
export interface CuGrantFlags {
|
||||
clipboardRead: boolean;
|
||||
clipboardWrite: boolean;
|
||||
/**
|
||||
* When false, the `key` tool rejects combos in `keyBlocklist.ts`
|
||||
* (cmd+q, cmd+tab, cmd+space, cmd+shift+q, ctrl+alt+delete). All other
|
||||
* key sequences work regardless.
|
||||
*/
|
||||
systemKeyCombos: boolean;
|
||||
}
|
||||
|
||||
export const DEFAULT_GRANT_FLAGS: CuGrantFlags = {
|
||||
clipboardRead: false,
|
||||
clipboardWrite: false,
|
||||
systemKeyCombos: false,
|
||||
};
|
||||
|
||||
/**
|
||||
* Host picks via GrowthBook JSON feature `chicago_coordinate_mode`, baked
|
||||
* into tool param descriptions at server-construction time. The model sees
|
||||
* ONE convention and never learns the other exists. `normalized_0_100`
|
||||
* sidesteps the Retina scaleFactor bug class entirely.
|
||||
*/
|
||||
export type CoordinateMode = "pixels" | "normalized_0_100";
|
||||
|
||||
/**
|
||||
* Independent kill switches for subtle/risky ported behaviors. Read from
|
||||
* GrowthBook by the host adapter, consulted in `toolCalls.ts`.
|
||||
*/
|
||||
export interface CuSubGates {
|
||||
/** 9×9 exact-byte staleness guard before click. */
|
||||
pixelValidation: boolean;
|
||||
/** Route `type("foo\nbar")` through clipboard instead of keystroke-by-keystroke. */
|
||||
clipboardPasteMultiline: boolean;
|
||||
/**
|
||||
* Ease-out-cubic mouse glide at 60fps, distance-proportional duration
|
||||
* (2000 px/sec, capped at 0.5s). Adds up to ~0.5s latency
|
||||
* per click. When off, cursor teleports instantly.
|
||||
*/
|
||||
mouseAnimation: boolean;
|
||||
/**
|
||||
* Pre-action sequence: hide non-allowlisted apps, then defocus us (from the
|
||||
* Vercept acquisition). When off, the
|
||||
* frontmost gate fires in the normal case and the model gets stuck — this
|
||||
* is the A/B-test-the-old-broken-behavior switch.
|
||||
*/
|
||||
hideBeforeAction: boolean;
|
||||
/**
|
||||
* Auto-resolve the target display before each screenshot when the
|
||||
* selected display has no allowed-app windows. When on, `handleScreenshot`
|
||||
* uses the atomic Swift path; off → sticks with `selectedDisplayId`.
|
||||
*/
|
||||
autoTargetDisplay: boolean;
|
||||
/**
|
||||
* Stash+clear the clipboard while a tier-"click" app is frontmost.
|
||||
* Closes the gap where a click-tier terminal/IDE has a UI Paste button
|
||||
* that's plain-left-clickable — without this, the tier "click"
|
||||
* keyboard block can be routed around by clicking Paste. Restored when
|
||||
* a non-"click" app becomes frontmost, or at turn end.
|
||||
*/
|
||||
clipboardGuard: boolean;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Permission request/response (mirror of BridgePermissionRequest, types.ts:77-94)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/** One entry per app the model asked for, after name → bundle ID resolution. */
|
||||
export interface ResolvedAppRequest {
|
||||
/** What the model asked for (e.g. "Slack", "com.tinyspeck.slackmacgap"). */
|
||||
requestedName: string;
|
||||
/** The resolved InstalledApp if found, else undefined (shown greyed in the UI). */
|
||||
resolved?: InstalledApp;
|
||||
/** Shell-access-equivalent bundle IDs get a UI warning. See sentinelApps.ts. */
|
||||
isSentinel: boolean;
|
||||
/** Already in the allowlist → skip the checkbox, return in `granted` immediately. */
|
||||
alreadyGranted: boolean;
|
||||
/** Hardcoded tier for this app (browser→"read", terminal→"click", else "full").
|
||||
* The dialog displays this read-only; the renderer passes it through
|
||||
* verbatim in the AppGrant. */
|
||||
proposedTier: CuAppPermTier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Payload for the renderer approval dialog. Rides through the existing
|
||||
* `ToolPermissionRequest.input: unknown` field
|
||||
* (packages/utils/desktop/bridge/common/claude.web.ts:1262) — no IPC schema
|
||||
* change needed.
|
||||
*/
|
||||
export interface CuPermissionRequest {
|
||||
requestId: string;
|
||||
/** Model-provided reason string. Shown prominently in the approval UI. */
|
||||
reason: string;
|
||||
apps: ResolvedAppRequest[];
|
||||
/** What the model asked for. User can toggle independently of apps. */
|
||||
requestedFlags: Partial<CuGrantFlags>;
|
||||
/**
|
||||
* For the "On Windows, Claude can see all apps..." footnote. Taken from
|
||||
* `executor.capabilities.screenshotFiltering` so the renderer doesn't
|
||||
* need to know about platforms.
|
||||
*/
|
||||
screenshotFiltering: "native" | "none";
|
||||
/**
|
||||
* Present only when TCC permissions are NOT yet granted. When present,
|
||||
* the renderer shows a TCC toggle panel (two rows: Accessibility, Screen
|
||||
* Recording) INSTEAD OF the app list. Clicking a row's "Request" button
|
||||
* triggers the OS prompt; the store polls on window-focus and flips the
|
||||
* toggle when the grant is detected. macOS itself prompts the user to
|
||||
* restart after granting Screen Recording — we don't.
|
||||
*/
|
||||
tccState?: {
|
||||
accessibility: boolean;
|
||||
screenRecording: boolean;
|
||||
};
|
||||
/**
|
||||
* Apps with windows on the CU display that aren't in the requested
|
||||
* allowlist. These will be hidden the first time Claude takes an action.
|
||||
* Computed at request_access time — may be slightly stale by the time the
|
||||
* user clicks Allow, but it's a preview, not a contract. Absent when
|
||||
* empty so the renderer can skip the section cleanly.
|
||||
*/
|
||||
willHide?: Array<{ bundleId: string; displayName: string }>;
|
||||
/**
|
||||
* `chicagoAutoUnhide` app preference at request time. The renderer picks
|
||||
* between "...then restored when Claude is done" and "...will be hidden"
|
||||
* copy. Absent when `willHide` is absent (same condition).
|
||||
*/
|
||||
autoUnhideEnabled?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* What the renderer stuffs into `updatedInput._cuGrants` when the user clicks
|
||||
* "Allow for this session" (mirror of the `_allowAllSites` sentinel at
|
||||
* LocalAgentModeSessionManager.ts:2794).
|
||||
*/
|
||||
export interface CuPermissionResponse {
|
||||
granted: AppGrant[];
|
||||
/** Bundle IDs the user unchecked, or apps that weren't installed. */
|
||||
denied: Array<{ bundleId: string; reason: "user_denied" | "not_installed" }>;
|
||||
flags: CuGrantFlags;
|
||||
/**
|
||||
* Whether the user clicked Allow in THIS dialog. Only set by the
|
||||
* teach-mode handler — regular request_access doesn't need it (the
|
||||
* session manager's `result.behavior` gates the merge there). Needed
|
||||
* because when all requested apps are already granted (skipDialogGrants
|
||||
* non-empty, needDialog empty), Allow and Deny produce identical
|
||||
* `{granted:[], denied:[]}` payloads and the tool handler can't tell
|
||||
* them apart without this. Undefined → legacy/regular path, do not
|
||||
* gate on it.
|
||||
*/
|
||||
userConsented?: boolean;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Host adapter (mirror of ClaudeForChromeContext, types.ts:33-62)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Process-lifetime singleton dependencies. Everything that does NOT vary per
|
||||
* tool call. Built once by `apps/desktop/src/main/nest-only/chicago/hostAdapter.ts`.
|
||||
* No Electron imports in this package — the host injects everything.
|
||||
*/
|
||||
export interface ComputerUseHostAdapter {
|
||||
serverName: string;
|
||||
logger: Logger;
|
||||
executor: ComputerExecutor;
|
||||
|
||||
/**
|
||||
* TCC state check — Accessibility + Screen Recording on macOS. Pure check,
|
||||
* no dialog, no relaunch. When either is missing, `request_access` threads
|
||||
* the state through to the renderer which shows a toggle panel; all other
|
||||
* tools return a tool error.
|
||||
*/
|
||||
ensureOsPermissions(): Promise<
|
||||
| { granted: true }
|
||||
| { granted: false; accessibility: boolean; screenRecording: boolean }
|
||||
>;
|
||||
|
||||
/** The Settings-page kill switch (`chicagoEnabled` app preference). */
|
||||
isDisabled(): boolean;
|
||||
|
||||
/**
|
||||
* The `chicagoAutoUnhide` app preference. Consumed by `buildAccessRequest`
|
||||
* to populate `CuPermissionRequest.autoUnhideEnabled` so the renderer's
|
||||
* "will be hidden" copy can say "then restored" only when true.
|
||||
*/
|
||||
getAutoUnhideEnabled(): boolean;
|
||||
|
||||
/**
|
||||
* Sub-gates re-read on every tool call so GrowthBook flips take effect
|
||||
* mid-session without restart.
|
||||
*/
|
||||
getSubGates(): CuSubGates;
|
||||
|
||||
/**
|
||||
* JPEG decode + crop + raw pixel bytes, for the PixelCompare staleness guard.
|
||||
* Injected so this package stays Electron-free. The host implements it via
|
||||
* `nativeImage.createFromBuffer(jpeg).crop(rect).toBitmap()` — Chromium's
|
||||
* decoders, BSD-licensed, no `.node` binary.
|
||||
*
|
||||
* Returns null on decode/crop failure — caller treats null as `skipped`,
|
||||
* click proceeds (validation failure must never block the action).
|
||||
*/
|
||||
cropRawPatch(
|
||||
jpegBase64: string,
|
||||
rect: { x: number; y: number; width: number; height: number },
|
||||
): Buffer | null;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Session context (getter/callback bag for bindSessionContext)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Per-session state binding for `bindSessionContext`. Hosts build this once
|
||||
* per session with getters that read fresh from their session store and
|
||||
* callbacks that write back. The returned dispatcher builds
|
||||
* `ComputerUseOverrides` from these getters on every call.
|
||||
*
|
||||
* Callbacks must be set at construction time — `bindSessionContext` reads
|
||||
* them once at bind, not per call.
|
||||
*
|
||||
* The lock hooks are **async** — `bindSessionContext` awaits them before
|
||||
* `handleToolCall`, then passes `checkCuLock: undefined` in overrides so the
|
||||
* sync Gate-3 in `handleToolCall` no-ops. Hosts with in-memory sync locks
|
||||
* (Cowork) wrap them trivially; hosts with cross-process locks (the CLI's
|
||||
* O_EXCL file) call the real async primitive directly.
|
||||
*/
|
||||
export interface ComputerUseSessionContext {
|
||||
// ── Read state fresh per call ──────────────────────────────────────
|
||||
|
||||
getAllowedApps(): readonly AppGrant[];
|
||||
getGrantFlags(): CuGrantFlags;
|
||||
/** Per-user auto-deny list (Settings page). Empty array = none. */
|
||||
getUserDeniedBundleIds(): readonly string[];
|
||||
getSelectedDisplayId(): number | undefined;
|
||||
getDisplayPinnedByModel?(): boolean;
|
||||
getDisplayResolvedForApps?(): string | undefined;
|
||||
getTeachModeActive?(): boolean;
|
||||
/** Dims-only fallback when `lastScreenshot` is unset (cross-respawn).
|
||||
* `bindSessionContext` reconstructs `{...dims, base64: ""}` so scaleCoord
|
||||
* works and pixelCompare correctly skips. */
|
||||
getLastScreenshotDims?(): ScreenshotDims | undefined;
|
||||
|
||||
// ── Write-back callbacks ───────────────────────────────────────────
|
||||
|
||||
/** Shows the approval dialog. Host routes to its UI, awaits user. The
|
||||
* signal is aborted if the tool call finishes before the user answers
|
||||
* (MCP timeout, etc.) — hosts dismiss the dialog on abort. */
|
||||
onPermissionRequest?(
|
||||
req: CuPermissionRequest,
|
||||
signal: AbortSignal,
|
||||
): Promise<CuPermissionResponse>;
|
||||
/** Teach-mode sibling of `onPermissionRequest`. */
|
||||
onTeachPermissionRequest?(
|
||||
req: CuTeachPermissionRequest,
|
||||
signal: AbortSignal,
|
||||
): Promise<CuPermissionResponse>;
|
||||
/** Called by `bindSessionContext` after merging a permission response into
|
||||
* the allowlist (dedupe on bundleId, truthy-only flag spread). Host
|
||||
* persists for resume survival. */
|
||||
onAllowedAppsChanged?(apps: readonly AppGrant[], flags: CuGrantFlags): void;
|
||||
onAppsHidden?(bundleIds: string[]): void;
|
||||
/** Reads the session's clipboardGuard stash. undefined → no stash held. */
|
||||
getClipboardStash?(): string | undefined;
|
||||
/** Writes the clipboardGuard stash. undefined clears it. */
|
||||
onClipboardStashChanged?(stash: string | undefined): void;
|
||||
onResolvedDisplayUpdated?(displayId: number): void;
|
||||
onDisplayPinned?(displayId: number | undefined): void;
|
||||
onDisplayResolvedForApps?(sortedBundleIdsKey: string): void;
|
||||
/** Called after each screenshot. Host persists for respawn survival. */
|
||||
onScreenshotCaptured?(dims: ScreenshotDims): void;
|
||||
onTeachModeActivated?(): void;
|
||||
onTeachStep?(req: TeachStepRequest): Promise<TeachStepResult>;
|
||||
onTeachWorking?(): void;
|
||||
|
||||
// ── Lock (async) ───────────────────────────────────────────────────
|
||||
|
||||
/** At most one session uses CU at a time. Awaited by `bindSessionContext`
|
||||
* before dispatch. Undefined → no lock gating (proceed). */
|
||||
checkCuLock?(): Promise<{ holder: string | undefined; isSelf: boolean }>;
|
||||
/** Take the lock. Called when `checkCuLock` returned `holder: undefined`
|
||||
* on a non-deferring tool. Host emits enter-CU signals here. */
|
||||
acquireCuLock?(): Promise<void>;
|
||||
/** Host-specific lock-held error text. Default is the package's generic
|
||||
* message. The CLI host includes the holder session-ID prefix. */
|
||||
formatLockHeldMessage?(holder: string): string;
|
||||
|
||||
/** User-abort signal. Passed through to `ComputerUseOverrides.isAborted`
|
||||
* for the mid-loop checks in handleComputerBatch / handleType. See that
|
||||
* field for semantics. */
|
||||
isAborted?(): boolean;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Per-call overrides (mirror of PermissionOverrides, types.ts:97-102)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Built FRESH on every tool call by `bindSessionContext` from
|
||||
* `ComputerUseSessionContext` getters. This is what lets a singleton MCP
|
||||
* server carry per-session state — the state lives on the host's session
|
||||
* store, not the server.
|
||||
*/
|
||||
export interface ComputerUseOverrides {
|
||||
allowedApps: AppGrant[];
|
||||
grantFlags: CuGrantFlags;
|
||||
coordinateMode: CoordinateMode;
|
||||
|
||||
/**
|
||||
* User-configured auto-deny list (Settings → Desktop app → Computer Use).
|
||||
* Bundle IDs
|
||||
* here are stripped from request_access BEFORE the approval dialog — they
|
||||
* never reach the user for approval regardless of tier. The response tells
|
||||
* the agent to ask the user to remove the app from their deny list in
|
||||
* Settings if access is genuinely needed.
|
||||
*
|
||||
* Per-USER, persists across restarts (read from appPreferences per call,
|
||||
* not session state). Contrast with `allowedApps` which is per-session.
|
||||
* Empty array = no user-configured denies (the default).
|
||||
*/
|
||||
userDeniedBundleIds: readonly string[];
|
||||
|
||||
/**
|
||||
* Display CU operates on; read fresh per call. `scaleCoord` uses the
|
||||
* `originX/Y` snapshotted in `lastScreenshot`, so mid-session switches
|
||||
* only affect the NEXT screenshot/prepare call.
|
||||
*/
|
||||
selectedDisplayId?: number;
|
||||
|
||||
/**
|
||||
* The `request_access` tool handler calls this and awaits. The wrapper
|
||||
* closure in serverDef.ts (mirroring InternalMcpServerManager.ts:131-177)
|
||||
* routes through `handleToolPermission` → IPC → renderer ChicagoApproval.
|
||||
* When it resolves, the wrapper side-effectfully mutates
|
||||
* `InternalServerContext.cuAllowedApps` BEFORE returning here.
|
||||
*
|
||||
* Undefined when the session wasn't wired with a permission handler (e.g.
|
||||
* a future headless mode). `request_access` returns a tool error in that case.
|
||||
*/
|
||||
onPermissionRequest?: (req: CuPermissionRequest) => Promise<CuPermissionResponse>;
|
||||
|
||||
/**
|
||||
* For the pixel-validation staleness guard. The model's-last-screenshot,
|
||||
* stashed by serverDef.ts after each `screenshot` tool call. Undefined on
|
||||
* cold start → pixel validation skipped (click proceeds).
|
||||
*/
|
||||
lastScreenshot?: ScreenshotResult;
|
||||
|
||||
/**
|
||||
* Fired after every `prepareForAction` with the bundle IDs it just hid.
|
||||
* The wrapper closure in serverDef.ts accumulates these into
|
||||
* `Session.cuHiddenDuringTurn` via a write-through callback (same pattern
|
||||
* as `onCuPermissionUpdated`). At turn end (`sdkMessage.type === "result"`),
|
||||
* if the `chicagoAutoUnhide` setting is on, everything in the set is
|
||||
* unhidden. Set is cleared regardless of the setting so it doesn't leak
|
||||
* across turns.
|
||||
*
|
||||
* Undefined when the session wasn't wired with a tracker — unhide just
|
||||
* doesn't happen.
|
||||
*/
|
||||
onAppsHidden?: (bundleIds: string[]) => void;
|
||||
|
||||
/**
|
||||
* Reads the clipboardGuard stash from session state. `undefined` means no
|
||||
* stash is held — `syncClipboardStash` stashes on first entry to click-tier
|
||||
* and clears on restore. Sibling of the `cuHiddenDuringTurn` getter pattern
|
||||
* — state lives on the host's session, not module-level here.
|
||||
*/
|
||||
getClipboardStash?: () => string | undefined;
|
||||
|
||||
/**
|
||||
* Writes the clipboardGuard stash to session state. `undefined` clears.
|
||||
* Sibling of `onAppsHidden` — the wrapper closure writes through to
|
||||
* `Session.cuClipboardStash`. At turn end the host reads + clears it
|
||||
* directly and restores via Electron's `clipboard.writeText` (no nest-only
|
||||
* import surface).
|
||||
*/
|
||||
onClipboardStashChanged?: (stash: string | undefined) => void;
|
||||
|
||||
/**
|
||||
* Write the resolver's picked display back to session so teach overlay
|
||||
* positioning and subsequent non-resolver calls use the same display.
|
||||
* Fired by `handleScreenshot` in the atomic `autoTargetDisplay` path when
|
||||
* `resolvePrepareCapture`'s pick differs from `selectedDisplayId`.
|
||||
* Fire-and-forget.
|
||||
*/
|
||||
onResolvedDisplayUpdated?: (displayId: number) => void;
|
||||
|
||||
/**
|
||||
* Set when the model explicitly picked a display via `switch_display`.
|
||||
* When true, `handleScreenshot` passes `autoResolve: false` so the Swift
|
||||
* resolver honors `selectedDisplayId` directly (straight cuDisplayInfo
|
||||
* passthrough) instead of running the co-location/chase chain. The
|
||||
* resolver's Step 2 ("host + allowed co-located → host") otherwise
|
||||
* overrides any `selectedDisplayId` whenever an allowed app shares the
|
||||
* host's monitor.
|
||||
*/
|
||||
displayPinnedByModel?: boolean;
|
||||
|
||||
/**
|
||||
* Write the model's explicit display pick to session. `displayId:
|
||||
* undefined` clears both `selectedDisplayId` and the pin (back to auto).
|
||||
* Sibling of `onResolvedDisplayUpdated` but also sets the pin flag —
|
||||
* the two are semantically distinct (resolver-picked vs model-picked).
|
||||
*/
|
||||
onDisplayPinned?: (displayId: number | undefined) => void;
|
||||
|
||||
/**
|
||||
* Sorted comma-joined bundle-ID set the display was last auto-resolved
|
||||
* for. `handleScreenshot` compares this to the current allowed set and
|
||||
* only passes `autoResolve: true` when they differ — so the resolver
|
||||
* doesn't yank the display on every screenshot, only when the app set
|
||||
* has changed since the last resolve (or manual switch).
|
||||
*/
|
||||
displayResolvedForApps?: string;
|
||||
|
||||
/**
|
||||
* Records which app set the current display selection was made for. Fired
|
||||
* alongside `onResolvedDisplayUpdated` when the resolver picks, so the next
|
||||
* screenshot sees a matching set and skips auto-resolve.
|
||||
*/
|
||||
onDisplayResolvedForApps?: (sortedBundleIdsKey: string) => void;
|
||||
|
||||
/**
|
||||
* Global CU lock — at most one session actively uses CU at a time. Checked
|
||||
* in `handleToolCall` after kill-switch/TCC, before dispatch. Every CU tool
|
||||
* including `request_access` goes through it.
|
||||
*
|
||||
* - `holder === undefined` → lock is free, safe to acquire
|
||||
* - `isSelf === true` → this session already holds it (no-op, proceed)
|
||||
* - `holder !== undefined && !isSelf` → blocked, return tool error
|
||||
*
|
||||
* `undefined` callback → lock system not wired (e.g. CCD). Proceed without
|
||||
* gating — absence of the mechanism ≠ locked out.
|
||||
*
|
||||
* The host manages release (on session idle/stop/archive) — this package
|
||||
* never releases.
|
||||
*/
|
||||
checkCuLock?: () => { holder: string | undefined; isSelf: boolean };
|
||||
|
||||
/**
|
||||
* Take the lock for this session. `handleToolCall` calls this exactly once
|
||||
* per turn, on the FIRST CU tool call when `checkCuLock().holder` is
|
||||
* undefined. No-op if already held (defensive — the check should have
|
||||
* short-circuited). Host emits an event the overlay listens to.
|
||||
*/
|
||||
acquireCuLock?: () => void;
|
||||
|
||||
/**
|
||||
* User-abort signal. Checked mid-iteration inside `handleComputerBatch`
|
||||
* and `handleType`'s grapheme loop so an in-flight batch/type stops
|
||||
* promptly on overlay Stop instead of running to completion after the
|
||||
* host has already abandoned the tool result.
|
||||
*
|
||||
* Undefined → never aborts (e.g. unwired host). Live per-check read —
|
||||
* same lazy-getter pattern as `checkCuLock`.
|
||||
*/
|
||||
isAborted?: () => boolean;
|
||||
|
||||
// ── Teach mode ───────────────────────────────────────────────────────
|
||||
// Wired only when the host's teachModeEnabled gate is on. All five
|
||||
// undefined → `request_teach_access` / `teach_step` return tool errors
|
||||
// and teach mode is effectively off.
|
||||
|
||||
/**
|
||||
* Sibling of `onPermissionRequest`. Same blocking-await-on-renderer-dialog
|
||||
* semantics, but routes to ComputerUseTeachApproval.tsx (which explains
|
||||
* the window-hides-during-guide behavior) instead of ComputerUseApproval.
|
||||
* The wrapper closure in serverDef.ts writes grants through to session state
|
||||
* via `onCuPermissionUpdated` exactly as `onPermissionRequest` does.
|
||||
*/
|
||||
onTeachPermissionRequest?: (
|
||||
req: CuTeachPermissionRequest,
|
||||
) => Promise<CuPermissionResponse>;
|
||||
|
||||
/**
|
||||
* Called by `handleRequestTeachAccess` after the user approves and at least
|
||||
* one app was granted. Host sets `session.teachModeActive = true`, emits
|
||||
* `teachModeChanged` → teach controller hides the main window and shows the
|
||||
* fullscreen overlay. Cleared by the host on turn end (`transitionTo("idle")`)
|
||||
* alongside the CU lock release.
|
||||
*/
|
||||
onTeachModeActivated?: () => void;
|
||||
|
||||
/**
|
||||
* Read by `handleRequestAccess` and `handleRequestTeachAccess` to
|
||||
* short-circuit with a clear tool error when teach mode is active. The
|
||||
* main window is hidden during teach mode, so permission dialogs render
|
||||
* invisibly and handleToolPermission blocks forever on an invisible
|
||||
* prompt. Better to tell the model to exit teach mode first. Getter
|
||||
* (not a boolean field) because teach mode state lives on the session,
|
||||
* not on this per-call overrides object.
|
||||
*/
|
||||
getTeachModeActive?: () => boolean;
|
||||
|
||||
/**
|
||||
* Called by `handleTeachStep` with the scaled anchor + text. Host stores
|
||||
* the resolver, emits `teachStepRequested` → teach controller pushes the
|
||||
* payload to the overlay → user reads, clicks Next → IPC → host calls the
|
||||
* stored resolver → this promise resolves. `{action: "exit"}` when the user
|
||||
* clicks Exit (or the turn is interrupted) — `handleTeachStep` short-circuits
|
||||
* without executing actions.
|
||||
*
|
||||
* Same blocking-promise pattern as `onPermissionRequest`, but resolved by
|
||||
* the teach overlay's own preload (not the main renderer's tool-approval UI).
|
||||
*/
|
||||
onTeachStep?: (req: TeachStepRequest) => Promise<TeachStepResult>;
|
||||
|
||||
/**
|
||||
* Called immediately after `onTeachStep` resolves with "next", before
|
||||
* action dispatch begins. Host emits `teachStepWorking` → overlay flips to
|
||||
* the spinner state (Next button gone, Exit stays, "Working…" + rotating
|
||||
* notch). The next `onTeachStep` call replaces the spinner with the new
|
||||
* tooltip content.
|
||||
*/
|
||||
onTeachWorking?: () => void;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Teach mode (guided-tour tooltips with Next-button action execution)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Payload the host pushes to the teach overlay BrowserWindow. Built by
|
||||
* `handleTeachStep` in toolCalls.ts from the model's `teach_step` args.
|
||||
*
|
||||
* `anchorLogical` here is POST-`scaleCoord` — **full-display** logical
|
||||
* macOS points (origin = monitor top-left, menu bar included, since
|
||||
* cuDisplayInfo returns CGDisplayBounds). The overlay window is positioned
|
||||
* at `workArea.{x,y}` (excludes menu bar/Dock), so `updateTeachStep` in
|
||||
* teach/window.ts subtracts the workArea offset before IPC so the HTML's
|
||||
* CSS coords match.
|
||||
*/
|
||||
export interface TeachStepRequest {
|
||||
explanation: string;
|
||||
nextPreview: string;
|
||||
/** Full-display logical points. Undefined → overlay centers the tooltip, hides the arrow. */
|
||||
anchorLogical?: { x: number; y: number };
|
||||
}
|
||||
|
||||
export type TeachStepResult = { action: "next" } | { action: "exit" };
|
||||
|
||||
/**
|
||||
* Payload for the renderer's ComputerUseTeachApproval dialog. Rides through
|
||||
* `ToolPermissionRequest.input: unknown` same as `CuPermissionRequest`.
|
||||
* Separate type (not a flag on `CuPermissionRequest`) so the two approval
|
||||
* components can narrow independently and the teach dialog is free to drop
|
||||
* fields it doesn't render (no grant-flag checkboxes in teach mode).
|
||||
*/
|
||||
export interface CuTeachPermissionRequest {
|
||||
requestId: string;
|
||||
/** Model-provided reason. Shown in the dialog headline ("guide you through {reason}"). */
|
||||
reason: string;
|
||||
apps: ResolvedAppRequest[];
|
||||
screenshotFiltering: "native" | "none";
|
||||
/** Present only when TCC is ungranted — same semantics as `CuPermissionRequest.tccState`. */
|
||||
tccState?: {
|
||||
accessibility: boolean;
|
||||
screenRecording: boolean;
|
||||
};
|
||||
willHide?: Array<{ bundleId: string; displayName: string }>;
|
||||
/** Same semantics as `CuPermissionRequest.autoUnhideEnabled`. */
|
||||
autoUnhideEnabled?: boolean;
|
||||
}
|
||||
Reference in New Issue
Block a user