import { load } from 'cheerio';

export function analyzeContent(html: string): {
  title: string;
  score: number;
  internalLinks: string[];
} {
  const $ = load(html);
  
  return {
    title: $('title').text().trim(),
    score: calculateContentScore($),
    internalLinks: extractInternalLinks($)
  };
}

function calculateContentScore($: cheerio.CheerioAPI): number {
  const contentLength = $('body').text().length;
  const headings = $('h1, h2, h3').length;
  const images = $('img[alt]').length; // Only count images with alt text
  const links = $('a[href]').length;
  
  // Weighted scoring system
  const lengthScore = Math.min(50, (contentLength / 2000) * 50);
  const headingScore = Math.min(20, (headings / 6) * 20);
  const imageScore = Math.min(15, (images / 4) * 15);
  const linkScore = Math.min(15, (links / 8) * 15);
  
  return Math.round(lengthScore + headingScore + imageScore + linkScore);
}

function extractInternalLinks($: cheerio.CheerioAPI): string[] {
  const links = new Set<string>();
  
  $('a[href]').each((_, el) => {
    const href = $(el).attr('href');
    if (href && !href.startsWith('#') && !href.startsWith('mailto:')) {
      links.add(href);
    }
  });
  
  return Array.from(links);
}