All files / src/handlers utility-handlers.ts

93.06% Statements 94/101
81.81% Branches 81/99
100% Functions 16/16
92.39% Lines 85/92

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254          9x   9x 2x         7x     7x   7x     7x 7x 4x   5x   5x 5x   2x 2x         3x   5x       3x       7x 7x 4x 4x             7x                 2x         13x   13x             13x 13x     13x 13x 13x     13x   9x 9x     9x                                                   9x   3x 3x   3x 9x   3x 3x   3x   3x 9x 9x 9x 6x   3x                       13x 13x   13x   8x 6x 9x   3x 3x     13x   13x 4x                     9x                   9x 9x 9x 9x     9x 85x 1x 69x 1x 22x 1x   10x         9x 36x 1x 43x 1x 31x 1x 9x 1x   4x         9x 9x             54x                                              
import { BaseHandler } from './base-handler.js';
import { JSExecuteEndpointOptions, JSExecuteEndpointResponse, CrawlResultItem } from '../types.js';
 
export class UtilityHandlers extends BaseHandler {
  async executeJS(options: JSExecuteEndpointOptions) {
    try {
      // Check if scripts is provided
      if (!options.scripts || options.scripts === null) {
        throw new Error(
          'scripts is required. Please provide JavaScript code to execute. Use "return" statements to get values back.',
        );
      }
 
      const result: JSExecuteEndpointResponse = await this.service.executeJS(options);
 
      // Extract JavaScript execution results
      const jsResults = result.js_execution_result?.results || [];
      // Ensure scripts is always an array for mapping
      const scripts = Array.isArray(options.scripts) ? options.scripts : [options.scripts];
 
      // Format results for display
      let formattedResults = '';
      if (jsResults.length > 0) {
        formattedResults = jsResults
          .map((res: unknown, idx: number) => {
            const script = scripts[idx] || 'Script ' + (idx + 1);
            // Handle the actual return value or success/error status
            let resultStr = '';
            if (res && typeof res === 'object' && 'success' in res) {
              // This is a status object (e.g., from null return or execution without return)
              const statusObj = res as { success: unknown; error?: unknown };
              resultStr = statusObj.success
                ? 'Executed successfully (no return value)'
                : `Error: ${statusObj.error || 'Unknown error'}`;
            } else {
              // This is an actual return value
              resultStr = JSON.stringify(res, null, 2);
            }
            return `Script: ${script}\nReturned: ${resultStr}`;
          })
          .join('\n\n');
      } else {
        formattedResults = 'No results returned';
      }
 
      // Handle markdown content - can be string or object
      let markdownContent = '';
      if (result.markdown) {
        if (typeof result.markdown === 'string') {
          markdownContent = result.markdown;
        } else Eif (typeof result.markdown === 'object' && result.markdown.raw_markdown) {
          // Use raw_markdown from the object structure
          markdownContent = result.markdown.raw_markdown;
        }
      }
 
      return {
        content: [
          {
            type: 'text',
            text: `JavaScript executed on: ${options.url}\n\nResults:\n${formattedResults}${markdownContent ? `\n\nPage Content After Execution:\n${markdownContent}` : ''}`,
          },
        ],
      };
    } catch (error) {
      throw this.formatError(error, 'execute JavaScript');
    }
  }
 
  async extractLinks(options: { url: string; categorize?: boolean }) {
    try {
      // Use crawl endpoint instead of md to get full link data
      const response = await this.axiosClient.post('/crawl', {
        urls: [options.url],
        crawler_config: {
          cache_mode: 'bypass',
        },
      });
 
      const results = response.data.results || [response.data];
      const result: CrawlResultItem = results[0] || {};
 
      // Variables for manually extracted links
      let manuallyExtractedInternal: string[] = [];
      let manuallyExtractedExternal: string[] = [];
      let hasManuallyExtractedLinks = false;
 
      // Check if the response is likely JSON or non-HTML content
      if (!result.links || (result.links.internal.length === 0 && result.links.external.length === 0)) {
        // Try to detect if this might be a JSON endpoint
        const markdownContent = result.markdown?.raw_markdown || result.markdown?.fit_markdown || '';
        const htmlContent = result.html || '';
 
        // Check for JSON indicators
        Iif (
          // Check URL pattern
          options.url.includes('/api/') ||
          options.url.includes('/api.') ||
          // Check content type (often shown in markdown conversion)
          markdownContent.includes('application/json') ||
          // Check for JSON structure patterns
          (markdownContent.startsWith('{') && markdownContent.endsWith('}')) ||
          (markdownContent.startsWith('[') && markdownContent.endsWith(']')) ||
          // Check HTML for JSON indicators
          htmlContent.includes('application/json') ||
          // Common JSON patterns
          markdownContent.includes('"links"') ||
          markdownContent.includes('"url"') ||
          markdownContent.includes('"data"')
        ) {
          return {
            content: [
              {
                type: 'text',
                text: `Note: ${options.url} appears to return JSON data rather than HTML. The extract_links tool is designed for HTML pages with <a> tags. To extract URLs from JSON, you would need to parse the JSON structure directly.`,
              },
            ],
          };
        }
        // If no links found but it's HTML, let's check the markdown content for href patterns
        if (markdownContent && markdownContent.includes('href=')) {
          // Extract links manually from markdown if server didn't provide them
          const hrefPattern = /href=["']([^"']+)["']/g;
          const foundLinks: string[] = [];
          let match;
          while ((match = hrefPattern.exec(markdownContent)) !== null) {
            foundLinks.push(match[1]);
          }
          Eif (foundLinks.length > 0) {
            hasManuallyExtractedLinks = true;
            // Categorize found links
            const currentDomain = new URL(options.url).hostname;
 
            foundLinks.forEach((link) => {
              try {
                const linkUrl = new URL(link, options.url);
                if (linkUrl.hostname === currentDomain) {
                  manuallyExtractedInternal.push(linkUrl.href);
                } else {
                  manuallyExtractedExternal.push(linkUrl.href);
                }
              } catch {
                // Relative link
                manuallyExtractedInternal.push(link);
              }
            });
          }
        }
      }
 
      // Handle both cases: API-provided links and manually extracted links
      let internalUrls: string[] = [];
      let externalUrls: string[] = [];
 
      if (result.links && (result.links.internal.length > 0 || result.links.external.length > 0)) {
        // Use API-provided links
        internalUrls = result.links.internal.map((link) => (typeof link === 'string' ? link : link.href));
        externalUrls = result.links.external.map((link) => (typeof link === 'string' ? link : link.href));
      } else if (hasManuallyExtractedLinks) {
        // Use manually extracted links
        internalUrls = manuallyExtractedInternal;
        externalUrls = manuallyExtractedExternal;
      }
 
      const allUrls = [...internalUrls, ...externalUrls];
 
      if (!options.categorize) {
        return {
          content: [
            {
              type: 'text',
              text: `All links from ${options.url}:\n${allUrls.join('\n')}`,
            },
          ],
        };
      }
 
      // Categorize links
      const categorized: Record<string, string[]> = {
        internal: [],
        external: [],
        social: [],
        documents: [],
        images: [],
        scripts: [],
      };
 
      // Further categorize links
      const socialDomains = ['facebook.com', 'twitter.com', 'linkedin.com', 'instagram.com', 'youtube.com'];
      const docExtensions = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'];
      const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp'];
      const scriptExtensions = ['.js', '.css'];
 
      // Categorize internal URLs
      internalUrls.forEach((href: string) => {
        if (docExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
          categorized.documents.push(href);
        } else if (imageExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
          categorized.images.push(href);
        } else if (scriptExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
          categorized.scripts.push(href);
        } else {
          categorized.internal.push(href);
        }
      });
 
      // Categorize external URLs
      externalUrls.forEach((href: string) => {
        if (socialDomains.some((domain) => href.includes(domain))) {
          categorized.social.push(href);
        } else if (docExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
          categorized.documents.push(href);
        } else if (imageExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
          categorized.images.push(href);
        } else if (scriptExtensions.some((ext) => href.toLowerCase().endsWith(ext))) {
          categorized.scripts.push(href);
        } else {
          categorized.external.push(href);
        }
      });
 
      // Return based on categorize option (defaults to true)
      if (options.categorize) {
        return {
          content: [
            {
              type: 'text',
              text: `Link analysis for ${options.url}:\n\n${Object.entries(categorized)
                .map(
                  ([category, links]: [string, string[]]) =>
                    `${category} (${links.length}):\n${links.slice(0, 10).join('\n')}${links.length > 10 ? '\n...' : ''}`,
                )
                .join('\n\n')}`,
            },
          ],
        };
      } else E{
        // Return simple list without categorization
        const allLinks = [...internalUrls, ...externalUrls];
        return {
          content: [
            {
              type: 'text',
              text: `All links from ${options.url} (${allLinks.length} total):\n\n${allLinks.slice(0, 50).join('\n')}${allLinks.length > 50 ? '\n...' : ''}`,
            },
          ],
        };
      }
    } catch (error) {
      throw this.formatError(error, 'extract links');
    }
  }
}