import { FAQItem } from '../components/blog/AccordionFAQ';
import { Source } from '../types/blog';

/**
 * Extracts sources from HTML content
 */
export function extractSources(htmlContent: string): Source[] {
  const sources: Source[] = [];
  
  // Find links with target="_blank" that might be sources
  const linkRegex = /<a\s+[^>]*href="([^"]+)"[^>]*target="_blank"[^>]*>([\s\S]*?)<\/a>/gi;
  let match;
  
  while ((match = linkRegex.exec(htmlContent)) !== null) {
    const url = match[1];
    const content = match[2];
    
    // Skip if it's an image or empty content
    if (content.includes('<img') || !content.trim()) {
      continue;
    }
    
    // For NCBI links, use a more descriptive title
    let title = '';
    let author = '';
    let publication = '';
    let date = '';
    
    if (url.includes('ncbi.nlm.nih.gov')) {
      // For NCBI links, extract the PMC ID
      const pmcMatch = url.match(/PMC(\d+)/);
      if (pmcMatch) {
        const pmcId = pmcMatch[1];
        title = `NCBI PubMed Central Article PMC${pmcId}`;
        publication = 'National Center for Biotechnology Information';
      } else {
        title = 'NCBI PubMed Central Article';
        publication = 'National Center for Biotechnology Information';
      }
    } else {
      // Extract title and other details
      const titleMatch = content.match(/(.*?)(?:<span|$)/s);
      title = titleMatch ? titleMatch[1].trim() : content.trim();
      
      // Extract author, publication, date if available
      const detailsMatch = content.match(/<span[^>]*class="source-details"[^>]*>([\s\S]*?)<\/span>/i);
      
      if (detailsMatch && detailsMatch[1]) {
        const details = detailsMatch[1].trim();
        
        // Try to extract author, publication, date based on common patterns
        const dateMatch = details.match(/\b(20\d{2})\b/); // Look for years like 2023
        if (dateMatch) {
          date = dateMatch[1];
        }
        
        // If we have a date, try to extract author and publication
        if (date && details.includes(',')) {
          const parts = details.split(',').map(p => p.trim());
          // Last part is likely the date
          const nonDateParts = parts.filter(p => !p.includes(date));
          
          if (nonDateParts.length > 0) {
            // If multiple parts, first is likely author, second is publication
            if (nonDateParts.length > 1) {
              author = nonDateParts[0];
              publication = nonDateParts[1];
            } else {
              // If only one part, it could be either author or publication
              // For simplicity, assume it's the author
              author = nonDateParts[0];
            }
          }
        }
      }
    }
    
    sources.push({
      title,
      url,
      author: author || undefined,
      publication: publication || undefined,
      date: date || undefined
    });
  }
  
  return sources;
}

/**
 * Extracts FAQ sections from HTML content and returns structured FAQ items
 */
export function extractFAQs(htmlContent: string): FAQItem[] {
  const faqs: FAQItem[] = [];
  
  // Find the FAQ section - handle both plain h2 and WordPress comment wrapped h2
  // Look for headings that contain "FAQ" or "Frequently Asked Questions"
  const faqSectionMatch = htmlContent.match(/(?:<!-- wp:heading -->\s*)?<h2[^>]*>(?:.*FAQ.*|.*Frequently Asked Questions.*)<\/h2>(?:\s*<!-- \/wp:heading -->)?([\s\S]*?)(?:<h2|$)/i);
  
  if (!faqSectionMatch || !faqSectionMatch[1]) {
    return faqs;
  }
  
  const faqSection = faqSectionMatch[1];
  
  // Extract questions and answers - handle both plain h3 and WordPress comment wrapped h3
  const questionRegex = /(?:<!-- wp:heading {"level":3} -->\s*)?<h3[^>]*>([\s\S]*?)<\/h3>(?:\s*<!-- \/wp:heading -->)?([\s\S]*?)(?=(?:<!-- wp:heading {"level":3} -->)?\s*<h3|(?:<!-- wp:heading -->)?\s*<h2|$)/gi;
  let match;
  
  while ((match = questionRegex.exec(faqSection)) !== null) {
    const question = match[1].trim();
    const answer = match[2].trim();
    
    if (question && answer) {
      faqs.push({
        question,
        answer
      });
    }
  }
  
  return faqs;
}

/**
 * Processes HTML content to remove FAQ sections
 */
export function removeFAQSection(htmlContent: string): string {
  // Remove the FAQ section - handle both plain h2 and WordPress comment wrapped h2
  // Look for headings that contain "FAQ" or "Frequently Asked Questions"
  return htmlContent.replace(/(?:<!-- wp:heading -->\s*)?<h2[^>]*>(?:.*FAQ.*|.*Frequently Asked Questions.*)<\/h2>(?:\s*<!-- \/wp:heading -->)?[\s\S]*?(?=(?:<!-- wp:heading -->)?\s*<h2|$)/i, '');
}

/**
 * Adds a sources section to the HTML content
 */
export function addSourcesSection(htmlContent: string, sources: Source[]): string {
  if (!sources || sources.length === 0) {
    return htmlContent;
  }
  
  const sourcesHTML = `
<h2>Sources</h2>
<ul class="sources-list">
  ${sources.map(source => `
  <li>
    <a href="${source.url}" target="_blank" rel="noopener noreferrer" class="source-link">
      ${source.title}
      ${source.author || source.publication || source.date ? 
        `<span class="source-details">
          ${source.author ? source.author : ''}
          ${source.author && source.publication ? ', ' : ''}
          ${source.publication ? source.publication : ''}
          ${(source.author || source.publication) && source.date ? ', ' : ''}
          ${source.date ? source.date : ''}
        </span>` : 
        ''
      }
    </a>
  </li>
  `).join('')}
</ul>
`;
  
  // Add the sources section at the end of the content
  return htmlContent + sourcesHTML;
}

/**
 * Processes the blog content to extract FAQs and sources
 */
export function processBlogContent(htmlContent: string): {
  processedContent: string;
  faqs: FAQItem[];
  sources: Source[];
} {
  // Extract FAQs
  const faqs = extractFAQs(htmlContent);
  
  // Extract sources
  const sources = extractSources(htmlContent);
  
  // Remove FAQ section from content
  let processedContent = removeFAQSection(htmlContent);
  
  // Remove any existing sources section (h2 with "Sources" text)
  processedContent = processedContent.replace(/<h2[^>]*>Sources<\/h2>[\s\S]*?(?=<h2|$)/i, '');
  
  return {
    processedContent,
    faqs,
    sources
  };
}
