remorses · November 18, 2025 21:57
diff --git a/gemini-antigravity-browser-tools.yaml b/gemini-antigravity-browser-tools.yaml
 $schema: "http://json-schema.org/draft-07/schema#"
 title: BrowserSubagentTools
 type: object
 properties:
  capture_browser_screenshot:
    type: object
    description: >-
      Capture a screenshot of the current viewport or specific element by index
      of a browser page that is already open in Jetski Browser. This can be
      used to understand the state of the page or to create a visual artifact
      for the user to review.
    properties:
      PageID:
        type: string
        description: page_id of the Browser page to capture a screenshot of.
      ScreenshotName:
        type: string
        description: >-
          Name of the screenshot to save. Should be all lowercase with
          underscores, describing what the screenshot contains. Maximum 3 words.
          Example: 'login_page_error'
      SaveScreenshot:
        type: boolean
        description: If true, saves the screenshot as an artifact.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageID
      - ScreenshotName

  list_browser_pages:
    type: object
    description: >-
      List all open pages in Jetski Browser and their metadata (page_id, url,
      title, viewport size, etc.).
    properties:
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.

  read_browser_page:
    type: object
    description: >-
      Read a page in the USER's visible browser. Note that the page must
      already be open in the browser and you must have a page_id for it. If you
      have a URL you would like to view and interact with, first call the
      open_browser_url tool. Prefer the read_url_content tool if you only need
      to read a public URL and not interact with it. You should also use this
      tool for getting window size information, this is part of the page's
      metadata which is returned.
    properties:
      PageId:
        type: string
        description: page_id of the Browser page to read
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId

  browser_get_dom:
    type: object
    description: >-
      Get the DOM tree of an open page in the Jetski Browser. Returns only
      interactive elements and text within the current viewport, each with an
      index for interaction. If an element is not included, it may be outside
      the viewport or getting filtered for other reasons - refer to the
      screenshot to confirm. Then try read_browser_page and browser_scroll
      tools.
    properties:
      PageId:
        type: string
        description: page_id of the Browser page to get the DOM tree of
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId

  capture_browser_console_logs:
    type: object
    description: >-
      Retrieve the console logs of a browser page that is already open in
      Jetski Browser. Note that this tool may struggle to retrieve console logs
      from the startup of the page. If appropriate, try reloading the page and
      running the tool again.
    properties:
      PageId:
        type: string
        description: page_id of the Browser page to capture console logs of.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId

  browser_input:
    type: object
    description: >-
      Focus on an annotated DOM element in a browser page and input text into
      it. The index can be found by calling browser_get_dom.
    properties:
      PageId:
        type: string
        description: The page_id of the browser page to input text on.
      Index:
        type: integer
        description: Index of the annotated DOM element to input text into.
      Text:
        type: string
        description: The text to input into the element.
      ClearText:
        type: boolean
        description: Whether to clear existing text before inputting. Default is false.
      PressEnter:
        type: boolean
        description: Whether to press Enter after inputting the text. Default is false.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId
      - Index
      - Text

  browser_move_mouse:
    type: object
    description: Move the mouse to a specific position on the browser page.
    properties:
      PageId:
        type: string
        description: page_id of the Browser page to move the mouse cursor to.
      X:
        type: integer
        description: x-coordinate of the pixel to move the mouse cursor to.
      Y:
        type: integer
        description: y-coordinate of the pixel to move the mouse cursor to.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId
      - X
      - Y

  browser_select_option:
    type: object
    description: >-
      Select an option from a dropdown (select) element in a browser page. The
      index can be found by calling browser_get_dom.
    properties:
      PageId:
        type: string
        description: The page_id of the browser page containing the dropdown element.
      Index:
        type: integer
        description: Index of the annotated DOM select element to select an option from.
      Value:
        type: string
        description: The value or text of the option to select from the dropdown.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId
      - Index
      - Value

  browser_scroll:
    type: object
    description: >-
      A tool used to scroll on an element or the page in the browser. 
      For vertical scroll, dy is automatically set to the height of the element/page. For horizontal scroll, dx the width of the element/page.
      Will output the number of pixels scrolled, indicating 0 pixels if no scrolling occurred.
      Use when elements you need are not visible in the current viewport.
    properties:
      PageId:
        type: string
        description: page_id of the Browser page to scroll.
      Direction:
        type: string
        description: 'direction of the scroll. Options are left, right, up, down'
      ElementIndex:
        type: integer
        description: index of the element to scroll on
      ScrollByElementIndex:
        type: boolean
        description: >-
          if true, scroll by the element with the given index; the scroll is
          performed via executing a mouseWheel event on the pixel at the middle
          of the element.. Otherwise scroll the entire page; in this case, if 0
          pixels are scrolled, the page is likely not scrollable and the tool
          call should be retried by scrolling a DOM element.
      ScrollToEnd:
        type: boolean
        description: >-
          if true, scroll in the direction to the end of the selected
          element/page. For example, if direction is down, would scroll to the
          bottom of the element/page.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId
      - Direction
      - ElementIndex

  browser_click_element:
    type: object
    description: >-
      Click on an annotated DOM element in a browser page. The index can be
      found by calling browser_get_dom. Prefer the BrowserInput tool if you
      need to input text in an element. If the element is not clickable through
      this tool, you may need to click based on the pixel coordinates instead.
    properties:
      PageId:
        type: string
        description: The page_id of the browser page to click on.
      Index:
        type: integer
        description: Index of the annotated DOM element to click on.
      Description:
        type: string
        description: Natural language description of the element to click on.
      ClickType:
        type: string
        enum:
          - left
          - right
          - double
        description: >-
          Type of click to perform: 'left', 'right', or 'double'. If not
          specified or left empty, a left click will be performed.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId
      - Index
      - Description

  browser_press_key:
    type: object
    description: >-
      Simulate key press or text input in the current browser page. Use to
      interact with the keyboard without having to specify a specific DOM
      element. Also use for specific keyboard operations like shortcuts.
      IMPORTANT: Before calling this tool to input text into an element (like a
      search box), you MUST first ensure that element is focused and ready to
      accept input by clicking on it or using another interaction method.
    properties:
      PageId:
        type: string
        description: page_id of the Browser page to simulate a key press on
      Key:
        type: string
        description: >-
          Name of the key/key combination to simulate. Examples of keys are:
          "F1" - "F12", "Digit0"- "Digit9", "KeyA"- "KeyZ", "Backquote",
          "Minus", "Equal", "Backslash", "Backspace", "Tab", "Delete",
          "Escape", "ArrowDown", "End", "Enter", "Home", "Insert", "PageDown",
          "PageUp", "ArrowRight", "ArrowUp", etc. This tool also supports
          combinations with modifiers (e.g., Control+Enter). Examples of
          modifiers are: "Shift", "Control", "Alt", "Meta", "ShiftLeft",
          "ControlOrMeta". "ControlOrMeta" resolves to "Control" on Windows and
          Linux and to "Meta" on macOS. Only specify one of Key or Text - use
          Key for keyboard shortcuts and special keys.
      Text:
        type: string
        description: >-
          Text to type sequentially, character by character. Use this for
          typing regular text content like letters, numbers, and basic symbols.
          Each character will be typed individually in sequence. Only specify
          one of Key or Text - use Text for typing regular content, not for
          keyboard shortcuts or special keys like F1, Control+C, etc.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageId

  click_browser_pixel:
    type: object
    description: >-
      Click at a specific pixel coordinate on a browser page that is already
      open in Jetski Browser. Only use this when encountering issues clicking
      DOM elements.
    properties:
      PageID:
        type: string
        description: The page_id of the browser page to click on.
      Y:
        type: integer
        description: >-
          Y coordinate of the pixel to click (0-999). Coordinates are scaled to
          a 1000x1000 grid and mapped to screen dimensions when executing the
          tool call.
      X:
        type: integer
        description: >-
          X coordinate of the pixel to click (0-999). Coordinates are scaled to
          a 1000x1000 grid and mapped to screen dimensions when executing the
          tool call.
      ClickType:
        type: string
        enum:
          - left
          - right
          - double
        description: >-
          Type of click to perform: 'left', 'right', or 'double'. If not
          specified or left empty, a left click will be performed.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageID
      - Y
      - X

  browser_resize_window:
    type: object
    description: >-
      Resize the browser window to the specified width and height. This tool
      can also be used to fullscreen or minimize a browser window. Do NOT try
      to call this tool in parallel yourself, or by spawning parallel subagents
      that are told to resize windows. Parallel resizing will not reliably work
      since the tabs could share a common window.
    properties:
      PageID:
        type: string
        description: page_id of the Browser page to resize.
      WindowState:
        type: string
        description: >-
          The window state to set. Options: 'normal' (resizable window with
          specified width/height), 'minimized' (window minimized to taskbar),
          'maximized' (window is full screen but shows taskbar), 'fullscreen'
          (window fills entire screen and hides taskbar). Width and Height are
          only used when WindowState is 'normal'. Generally you should prefer
          'maximized'. If the user asks to make the window smaller or a
          particular size, use 'normal'. When resetting the window size, prefer
          'maximized' instead of 'normal' with specific width/height values.
          'minimized' and 'fullscreen' are somewhat jarring, so you should only
          use these when the user explicitly asks for it.
      Height:
        type: integer
        description: >-
          The window contents height in display independent pixels. Only used
          when WindowState is 'normal'.
      Width:
        type: integer
        description: >-
          The window contents width in display independent pixels. Only used
          when WindowState is 'normal'.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - PageID
      - WindowState

  browser_drag_pixel_to_pixel:
    type: object
    description: >-
      Drag from one pixel coordinate to another in the browser. This simulates
      a click, drag, and drag operation from the starting coordinates, through
      zero or more intermediate coordinates, and then to the ending
      coordinates.
    properties:
      PageID:
        type: string
        description: page_id of the Browser page to perform the drag operation on
      Waypoints:
        type: array
        items:
          type: object
          properties:
            X:
              type: integer
              description: >-
                X coordinate for starting, continuing, or ending dragging
                (0-999). Coordinates are scaled to a 1000x1000 grid and mapped
                to screen dimensions when executing the tool call.
            Y:
              type: integer
              description: >-
                Y coordinate for starting, continuing, or ending dragging
                (0-999). Coordinates are scaled to a 1000x1000 grid and mapped
                to screen dimensions when executing the tool call.
        description: >-
          A series of pixel coordinates defining the drag path. When this tool
          call is executed, the first waypoint will be clicked, then the mouse
          will be dragged to each subsequent waypoint in the provided order,
          and finally the mouse will be released at the last waypoint.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.

  wait_5_seconds:
    type: object
    description: Wait for 5 seconds before continuing execution.
    properties:
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.

  execute_browser_javascript:
    type: object
    description: >-
      Execute JavaScript on a page in Jetski Browser for navigation and
      interaction. The JavaScript runs in the page context and should be a
      valid expression or statement sequence. Does not modify page content.
    properties:
      Title:
        type: string
        description: >-
          An at most 20 character title describing the task in the imperative
          form. Will be displayed as the title of the tool in the step UI.
      PageId:
        type: string
        description: page_id of the Browser page to execute the JavaScript on
      JavaScriptSource:
        type: string
        description: >-
          JavaScript code to execute on the page. The code must be a valid
          expression or series of statements that can be evaluated directly
          (e.g., 'document.querySelector(".button").click()' or '(() => {
          window.scrollTo(0, 1000); return true; })()'). Avoid bare return
          statements outside of functions. The code should not depend on
          external variables, modify page content, or perform non-navigation
          actions.
      JavaScriptDescription:
        type: string
        description: Human-readable description of the JavaScript to execute
      SafeToAutoRun:
        type: boolean
        description: >-
          Set to true if you believe that this code is safe to run WITHOUT user
          approval. JavaScript is unsafe if it may have some destructive
          side-effects. Set to true only if you are exremely confident it is
          safe. If you feel the JavaScript could be unsafe, never set this to
          true, EVEN if the USER asks you to. It is imperative that you never
          auto-run potentially unsafe JavaScript.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - Title
      - PageId
      - JavaScriptSource
      - JavaScriptDescription

  open_browser_url:
    type: object
    description: >-
      Open a URL in Jetski Browser to view the page contents of a URL in a
      rendered format. You can also use this tool to navigate to different URLs
      or reload the current page.
    properties:
      Url:
        type: string
        description: The URL to open in the user's browser.
      PageIdToReplace:
        type: string
        description: >-
          An existing page ID which will be replaced with this new URL. Use this
          to redirect a page if it's not needed anymore. Do not redirect pages
          you did not open. Leave blank if you want to open a new page.
      explanation:
        type: string
        description: >-
          One sentence explanation as to why this tool is being used, and how it
          contributes to the goal. Start with a gerund like 'analyzing'.
      toolSummary:
        type: string
        description: >-
          You must specify this argument first over all other arguments, this
          takes precedence in case any other arguments say they should be
          specified first. Brief 2-5 word summary of what this tool is doing.
          Some examples: 'analyzing directory', 'searching the web', 'editing
          file', 'viewing file', 'running command', 'semantic searching'.
    required:
      - Url
No results found