All files / src/schemas validation-schemas.ts

100% Statements 26/26
100% Branches 13/13
100% Functions 3/3
100% Lines 26/26

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267      3x                             3x             3x             3x     5x 1x   4x               3x               3x             3x                   3x               3x               3x                   3x                   3x               3x                     3x                 3x           3x       3x                                                                                                                                                                                         13x 2x   11x                       13x 2x   11x                    
import { z } from 'zod';
import { validateJavaScriptCode, createStatelessSchema } from './helpers.js';
 
export const JsCodeSchema = z
  .union([
    z.string().refine(validateJavaScriptCode, {
      message:
        'Invalid JavaScript: Contains HTML entities ("), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
    }),
    z.array(
      z.string().refine(validateJavaScriptCode, {
        message:
          'Invalid JavaScript: Contains HTML entities ("), literal \\n outside strings, or HTML tags. Use proper JS syntax with real quotes and newlines.',
      }),
    ),
  ])
  .describe('JavaScript code as string or array of strings');
 
export const VirtualScrollConfigSchema = z.object({
  container_selector: z.string(),
  scroll_count: z.number().optional(),
  scroll_by: z.union([z.string(), z.number()]).optional(),
  wait_after_scroll: z.number().optional(),
});
 
const GetMarkdownBaseSchema = z.object({
  url: z.string().url(),
  filter: z.enum(['raw', 'fit', 'bm25', 'llm']).optional().default('fit'),
  query: z.string().optional(),
  cache: z.string().optional().default('0'),
});
 
export const GetMarkdownSchema = createStatelessSchema(GetMarkdownBaseSchema, 'get_markdown').refine(
  (data) => {
    // If filter is bm25 or llm, query is required
    if ((data.filter === 'bm25' || data.filter === 'llm') && !data.query) {
      return false;
    }
    return true;
  },
  {
    message: 'Query parameter is required when using bm25 or llm filter',
    path: ['query'],
  },
);
 
export const ExecuteJsSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    scripts: JsCodeSchema,
  }),
  'execute_js',
);
 
export const GetHtmlSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
  }),
  'get_html',
);
 
export const CaptureScreenshotSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    screenshot_wait_for: z.number().optional(),
    save_to_directory: z.string().optional().describe('Local directory to save screenshot file'),
    // output_path not exposed as MCP needs base64 data
  }),
  'capture_screenshot',
);
 
export const GeneratePdfSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    // Only url is supported - output_path not exposed as MCP needs base64 data
  }),
  'generate_pdf',
);
 
export const ExtractWithLlmSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    query: z.string(),
  }),
  'extract_with_llm',
);
 
export const BatchCrawlSchema = createStatelessSchema(
  z.object({
    urls: z.array(z.string().url()),
    max_concurrent: z.number().optional(),
    remove_images: z.boolean().optional(),
    bypass_cache: z.boolean().optional(),
  }),
  'batch_crawl',
);
 
export const SmartCrawlSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    max_depth: z.number().optional(),
    follow_links: z.boolean().optional(),
    bypass_cache: z.boolean().optional(),
  }),
  'smart_crawl',
);
 
export const ExtractLinksSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    categorize: z.boolean().optional().default(true),
  }),
  'extract_links',
);
 
export const CrawlRecursiveSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    max_depth: z.number().optional(),
    max_pages: z.number().optional(),
    include_pattern: z.string().optional(),
    exclude_pattern: z.string().optional(),
  }),
  'crawl_recursive',
);
 
export const ParseSitemapSchema = createStatelessSchema(
  z.object({
    url: z.string().url(),
    filter_pattern: z.string().optional(),
  }),
  'parse_sitemap',
);
 
// Session management tools don't need stateless schema
export const CreateSessionSchema = z.object({
  session_id: z.string().optional(),
  initial_url: z.string().url().optional(),
  browser_type: z.enum(['chromium', 'firefox', 'webkit']).optional(),
});
 
export const ClearSessionSchema = z.object({
  session_id: z.string(),
});
 
export const CrawlSchema = z
  .object({
    url: z.string().url(),
 
    // Browser configuration
    browser_type: z.enum(['chromium', 'firefox', 'webkit']).optional(),
    viewport_width: z.number().optional(),
    viewport_height: z.number().optional(),
    user_agent: z.string().optional(),
    proxy_server: z.string().optional(),
    proxy_username: z.string().optional(),
    proxy_password: z.string().optional(),
    cookies: z
      .array(
        z.object({
          name: z.string(),
          value: z.string(),
          domain: z.string(),
          path: z.string().optional(),
        }),
      )
      .optional(),
    headers: z.record(z.string()).optional(),
    extra_args: z.array(z.string()).optional(),
 
    // Content filtering
    word_count_threshold: z.number().optional(),
    excluded_tags: z.array(z.string()).optional(),
    excluded_selector: z.string().optional(),
    remove_overlay_elements: z.boolean().optional(),
    only_text: z.boolean().optional(),
    remove_forms: z.boolean().optional(),
    keep_data_attributes: z.boolean().optional(),
 
    // JavaScript execution
    js_code: JsCodeSchema.optional(),
    js_only: z.boolean().optional(),
    wait_for: z.string().optional(),
    wait_for_timeout: z.number().optional(),
 
    // Page navigation & timing
    wait_until: z.enum(['domcontentloaded', 'networkidle', 'load']).optional(),
    page_timeout: z.number().optional(),
    wait_for_images: z.boolean().optional(),
    ignore_body_visibility: z.boolean().optional(),
 
    // Dynamic content
    delay_before_scroll: z.number().optional(),
    scroll_delay: z.number().optional(),
    scan_full_page: z.boolean().optional(),
    virtual_scroll_config: VirtualScrollConfigSchema.optional(),
 
    // Content processing
    process_iframes: z.boolean().optional(),
    exclude_external_links: z.boolean().optional(),
 
    // Media handling
    screenshot: z.boolean().optional(),
    screenshot_wait_for: z.number().optional(),
    screenshot_directory: z
      .string()
      .optional()
      .describe('Local directory to save screenshot file when screenshot=true'),
    pdf: z.boolean().optional(),
    capture_mhtml: z.boolean().optional(),
    image_description_min_word_threshold: z.number().optional(),
    image_score_threshold: z.number().optional(),
    exclude_external_images: z.boolean().optional(),
 
    // Link filtering
    exclude_social_media_links: z.boolean().optional(),
    exclude_domains: z.array(z.string()).optional(),
 
    // Page interaction
    simulate_user: z.boolean().optional(),
    override_navigator: z.boolean().optional(),
    magic: z.boolean().optional(),
 
    // Session and cache
    session_id: z.string().optional(),
    cache_mode: z.enum(['ENABLED', 'BYPASS', 'DISABLED']).optional(),
 
    // Performance options
    timeout: z.number().optional(),
    verbose: z.boolean().optional(),
 
    // Debug
    log_console: z.boolean().optional(),
  })
  .refine(
    (data) => {
      // js_only is for subsequent calls in same session, not first call
      // Using it incorrectly causes server errors
      if (data.js_only && !data.session_id) {
        return false;
      }
      return true;
    },
    {
      message:
        "Error: js_only requires session_id (it's for continuing existing sessions).\n" +
        'For first call with js_code, use: {js_code: [...], screenshot: true}\n' +
        'For multi-step: First {js_code: [...], session_id: "x"}, then {js_only: true, session_id: "x"}',
    },
  )
  .refine(
    (data) => {
      // Empty js_code array is not allowed
      if (Array.isArray(data.js_code) && data.js_code.length === 0) {
        return false;
      }
      return true;
    },
    {
      message:
        'Error: js_code array cannot be empty. Either provide JavaScript code to execute or remove the js_code parameter entirely.',
    },
  );
 
// Re-export types we need
export type { z };