Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | 9x 9x 2x 7x 7x 7x 7x 7x 4x 5x 5x 5x 2x 2x 3x 5x 3x 7x 7x 4x 4x 7x 2x 13x 13x 13x 13x 13x 13x 13x 13x 9x 9x 9x 9x 3x 3x 3x 9x 3x 3x 3x 3x 9x 9x 9x 6x 3x 13x 13x 13x 8x 6x 9x 3x 3x 13x 13x 4x 9x 9x 9x 9x 9x 9x 85x 1x 69x 1x 22x 1x 10x 9x 36x 1x 43x 1x 31x 1x 9x 1x 4x 9x 9x 54x | import { BaseHandler } from './base-handler.js'; import { JSExecuteEndpointOptions, JSExecuteEndpointResponse, CrawlResultItem } from '../types.js'; export class UtilityHandlers extends BaseHandler { async executeJS(options: JSExecuteEndpointOptions) { try { // Check if scripts is provided if (!options.scripts || options.scripts === null) { throw new Error( 'scripts is required. Please provide JavaScript code to execute. Use "return" statements to get values back.', ); } const result: JSExecuteEndpointResponse = await this.service.executeJS(options); // Extract JavaScript execution results const jsResults = result.js_execution_result?.results || []; // Ensure scripts is always an array for mapping const scripts = Array.isArray(options.scripts) ? options.scripts : [options.scripts]; // Format results for display let formattedResults = ''; if (jsResults.length > 0) { formattedResults = jsResults .map((res: unknown, idx: number) => { const script = scripts[idx] || 'Script ' + (idx + 1); // Handle the actual return value or success/error status let resultStr = ''; if (res && typeof res === 'object' && 'success' in res) { // This is a status object (e.g., from null return or execution without return) const statusObj = res as { success: unknown; error?: unknown }; resultStr = statusObj.success ? 'Executed successfully (no return value)' : `Error: ${statusObj.error || 'Unknown error'}`; } else { // This is an actual return value resultStr = JSON.stringify(res, null, 2); } return `Script: ${script}\nReturned: ${resultStr}`; }) .join('\n\n'); } else { formattedResults = 'No results returned'; } // Handle markdown content - can be string or object let markdownContent = ''; if (result.markdown) { if (typeof result.markdown === 'string') { markdownContent = result.markdown; } else Eif (typeof result.markdown === 'object' && result.markdown.raw_markdown) { // Use raw_markdown from the object structure markdownContent = result.markdown.raw_markdown; } } return { content: [ { type: 'text', text: `JavaScript executed on: ${options.url}\n\nResults:\n${formattedResults}${markdownContent ? `\n\nPage Content After Execution:\n${markdownContent}` : ''}`, }, ], }; } catch (error) { throw this.formatError(error, 'execute JavaScript'); } } async extractLinks(options: { url: string; categorize?: boolean }) { try { // Use crawl endpoint instead of md to get full link data const response = await this.axiosClient.post('/crawl', { urls: [options.url], crawler_config: { cache_mode: 'bypass', }, }); const results = response.data.results || [response.data]; const result: CrawlResultItem = results[0] || {}; // Variables for manually extracted links let manuallyExtractedInternal: string[] = []; let manuallyExtractedExternal: string[] = []; let hasManuallyExtractedLinks = false; // Check if the response is likely JSON or non-HTML content if (!result.links || (result.links.internal.length === 0 && result.links.external.length === 0)) { // Try to detect if this might be a JSON endpoint const markdownContent = result.markdown?.raw_markdown || result.markdown?.fit_markdown || ''; const htmlContent = result.html || ''; // Check for JSON indicators Iif ( // Check URL pattern options.url.includes('/api/') || options.url.includes('/api.') || // Check content type (often shown in markdown conversion) markdownContent.includes('application/json') || // Check for JSON structure patterns (markdownContent.startsWith('{') && markdownContent.endsWith('}')) || (markdownContent.startsWith('[') && markdownContent.endsWith(']')) || // Check HTML for JSON indicators htmlContent.includes('application/json') || // Common JSON patterns markdownContent.includes('"links"') || markdownContent.includes('"url"') || markdownContent.includes('"data"') ) { return { content: [ { type: 'text', text: `Note: ${options.url} appears to return JSON data rather than HTML. The extract_links tool is designed for HTML pages with <a> tags. To extract URLs from JSON, you would need to parse the JSON structure directly.`, }, ], }; } // If no links found but it's HTML, let's check the markdown content for href patterns if (markdownContent && markdownContent.includes('href=')) { // Extract links manually from markdown if server didn't provide them const hrefPattern = /href=["']([^"']+)["']/g; const foundLinks: string[] = []; let match; while ((match = hrefPattern.exec(markdownContent)) !== null) { foundLinks.push(match[1]); } Eif (foundLinks.length > 0) { hasManuallyExtractedLinks = true; // Categorize found links const currentDomain = new URL(options.url).hostname; foundLinks.forEach((link) => { try { const linkUrl = new URL(link, options.url); if (linkUrl.hostname === currentDomain) { manuallyExtractedInternal.push(linkUrl.href); } else { manuallyExtractedExternal.push(linkUrl.href); } } catch { // Relative link manuallyExtractedInternal.push(link); } }); } } } // Handle both cases: API-provided links and manually extracted links let internalUrls: string[] = []; let externalUrls: string[] = []; if (result.links && (result.links.internal.length > 0 || result.links.external.length > 0)) { // Use API-provided links internalUrls = result.links.internal.map((link) => (typeof link === 'string' ? link : link.href)); externalUrls = result.links.external.map((link) => (typeof link === 'string' ? link : link.href)); } else if (hasManuallyExtractedLinks) { // Use manually extracted links internalUrls = manuallyExtractedInternal; externalUrls = manuallyExtractedExternal; } const allUrls = [...internalUrls, ...externalUrls]; if (!options.categorize) { return { content: [ { type: 'text', text: `All links from ${options.url}:\n${allUrls.join('\n')}`, }, ], }; } // Categorize links const categorized: Record<string, string[]> = { internal: [], external: [], social: [], documents: [], images: [], scripts: [], }; // Further categorize links const socialDomains = ['facebook.com', 'twitter.com', 'linkedin.com', 'instagram.com', 'youtube.com']; const docExtensions = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']; const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp']; const scriptExtensions = ['.js', '.css']; // Categorize internal URLs internalUrls.forEach((href: string) => { if (docExtensions.some((ext) => href.toLowerCase().endsWith(ext))) { categorized.documents.push(href); } else if (imageExtensions.some((ext) => href.toLowerCase().endsWith(ext))) { categorized.images.push(href); } else if (scriptExtensions.some((ext) => href.toLowerCase().endsWith(ext))) { categorized.scripts.push(href); } else { categorized.internal.push(href); } }); // Categorize external URLs externalUrls.forEach((href: string) => { if (socialDomains.some((domain) => href.includes(domain))) { categorized.social.push(href); } else if (docExtensions.some((ext) => href.toLowerCase().endsWith(ext))) { categorized.documents.push(href); } else if (imageExtensions.some((ext) => href.toLowerCase().endsWith(ext))) { categorized.images.push(href); } else if (scriptExtensions.some((ext) => href.toLowerCase().endsWith(ext))) { categorized.scripts.push(href); } else { categorized.external.push(href); } }); // Return based on categorize option (defaults to true) if (options.categorize) { return { content: [ { type: 'text', text: `Link analysis for ${options.url}:\n\n${Object.entries(categorized) .map( ([category, links]: [string, string[]]) => `${category} (${links.length}):\n${links.slice(0, 10).join('\n')}${links.length > 10 ? '\n...' : ''}`, ) .join('\n\n')}`, }, ], }; } else E{ // Return simple list without categorization const allLinks = [...internalUrls, ...externalUrls]; return { content: [ { type: 'text', text: `All links from ${options.url} (${allLinks.length} total):\n\n${allLinks.slice(0, 50).join('\n')}${allLinks.length > 50 ? '\n...' : ''}`, }, ], }; } } catch (error) { throw this.formatError(error, 'extract links'); } } } |