import { XMLParser } from 'fast-xml-parser';
import { fetchUrl } from './urlFetcher';
import { analyzeContent } from './contentAnalyzer';
import { RateLimiter } from '../utils/rateLimiter';
import type { SitemapNode, AnalysisResult } from './types';

const rateLimiter = new RateLimiter(5);

export async function analyzeSitemap(
  domain: string,
  onProgress?: (current: number, total: number) => void
): Promise<AnalysisResult> {
  const visited = new Set<string>();
  const stats = {
    totalPages: 0,
    totalScore: 0,
    maxDepth: 0
  };

  try {
    const urls = await getSitemapUrls(domain);
    const total = urls.length || 1;
    let processed = 0;

    const nodes = await Promise.all(
      urls.map(async (url) => {
        try {
          const node = await analyzeUrl(url, domain, visited, 0, stats);
          processed++;
          onProgress?.(processed, total);
          return node;
        } catch (error) {
          console.error(`Error analyzing ${url}:`, error instanceof Error ? error.message : 'Unknown error');
          return null;
        }
      })
    );

    const validNodes = nodes.filter((node): node is SitemapNode => node !== null);

    return {
      nodes: validNodes,
      stats: {
        totalPages: stats.totalPages,
        averageScore: stats.totalPages > 0 ? stats.totalScore / stats.totalPages : 0,
        maxDepth: stats.maxDepth
      }
    };
  } catch (error) {
    console.error('Sitemap analysis error:', error instanceof Error ? error.message : 'Unknown error');
    return { nodes: [], stats: { totalPages: 0, averageScore: 0, maxDepth: 0 } };
  }
}

async function getSitemapUrls(domain: string): Promise<string[]> {
  try {
    const xml = await fetchUrl(`https://${domain}/sitemap.xml`);
    const parser = new XMLParser({
      ignoreAttributes: false,
      attributeNamePrefix: "@_"
    });
    
    const result = parser.parse(xml);
    
    const urlset = result.urlset?.url || [];
    return Array.isArray(urlset) 
      ? urlset.map((item: any) => item.loc).filter(Boolean)
      : [urlset.loc].filter(Boolean);
  } catch (error) {
    console.error('Error fetching sitemap:', error instanceof Error ? error.message : 'Unknown error');
    return [`https://${domain}`];
  }
}

async function analyzeUrl(
  url: string,
  domain: string,
  visited: Set<string>,
  depth: number,
  stats: { totalPages: number; totalScore: number; maxDepth: number }
): Promise<SitemapNode | null> {
  if (visited.has(url) || depth > 2) return null;
  visited.add(url);

  try {
    await rateLimiter.wait();
    const html = await fetchUrl(url);
    const { title, score, internalLinks } = analyzeContent(html);

    stats.totalPages++;
    stats.totalScore += score;
    stats.maxDepth = Math.max(stats.maxDepth, depth);

    // Only process a subset of internal links to avoid overwhelming
    const childPromises = depth < 2 
      ? internalLinks
          .filter(link => link.includes(domain))
          .slice(0, 5)
          .map(link => analyzeUrl(link, domain, visited, depth + 1, stats))
      : [];

    const children = (await Promise.all(childPromises))
      .filter((node): node is SitemapNode => node !== null);

    return {
      url,
      title: title || url,
      contentScore: score,
      children
    };
  } catch (error) {
    console.error(`Error analyzing ${url}:`, error instanceof Error ? error.message : 'Unknown error');
    return null;
  }
}