// Extract emails with B2B validation
function extractEmails() {
  const text = document.body.innerText;
  const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
  const emails = text.match(emailRegex) || [];
  
  // Also check mailto: links
  const mailtoLinks = Array.from(document.querySelectorAll('a[href^="mailto:"]'))
    .map(link => link.href.replace('mailto:', '').split('?')[0]);
  
  const allEmails = [...new Set([...emails, ...mailtoLinks])];
  
  // Filter out personal email domains (B2B only)
  const personalDomains = [
    'gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com',
    'aol.com', 'icloud.com', 'live.com', 'msn.com',
    'mail.ru', 'yandex.ru', 'protonmail.com', 'tutanota.com',
    'gmx.com', 'zoho.com', 'fastmail.com'
  ];
  
  return allEmails.filter(email => {
    const domain = email.split('@')[1]?.toLowerCase();
    return domain && !personalDomains.includes(domain);
  });
}

// Extract phone numbers (international formats)
function extractPhones() {
  const text = document.body.innerText || '';

  // Prefer tel: links (найнадійніші)
  const telLinks = Array.from(document.querySelectorAll('a[href^="tel:"]'))
    .map(link => link.href.replace(/^tel:/i, '').trim())
    .filter(Boolean);

  // Multiple phone formats (broad)
  const phoneRegex = /(\+?\d{1,3}[-.\s]?)?\(?\d{2,4}\)?[-.\s]?\d{2,4}[-.\s]?\d{2,4}[-.\s]?\d{0,4}/g;

  // Heuristic: skip numbers that appear on lines with tax identifiers (NIP/VAT/REGON/KRS/ЄДРПОУ)
  const taxLineRe = /\b(nip|vat|regon|krs|tax id|tin|єдрпоу|едрпоу)\b/i;

  const candidates = new Set(telLinks);
  for (const line of text.split(/\r?\n/)) {
    const l = (line || '').trim();
    if (!l) continue;
    if (taxLineRe.test(l)) continue;
    const matches = l.match(phoneRegex) || [];
    for (const m of matches) {
      const raw = (m || '').trim();
      if (raw) candidates.add(raw);
    }
  }

  const seenDigits = new Set();
  const results = [];

  for (const raw of candidates) {
    const digits = raw.replace(/\D/g, '');
    // basic length filter (avoid ZIP, IDs)
    if (digits.length < 9 || digits.length > 15) continue;

    // exclude likely NIP-like numbers: 10 digits without +/()/- and not starting with 0
    // (still allows PL phones like 0XXXXXXXXX)
    if (digits.length === 10 && !/[\+\-\(\)]/.test(raw) && !digits.startsWith('0')) {
      continue;
    }

    if (seenDigits.has(digits)) continue;
    seenDigits.add(digits);
    results.push(raw);
  }

  return results;
}

// Additional B2B validation utilities
function extractCompanyName() {
  // Try multiple sources
  const sources = [
    document.querySelector('meta[property="og:site_name"]')?.content,
    document.querySelector('meta[name="application-name"]')?.content,
    document.querySelector('h1')?.textContent,
    document.title.split('|')[0].split('-')[0].trim()
  ];
  
  return sources.find(s => s && s.length > 0 && s.length < 100) || 'Unknown';
}

function extractIndustryKeywords() {
  const keywords = {
    medical: ['clinic', 'hospital', 'doctor', 'medical', 'health', 'pharmacy', 'лікарня', 'клініка', 'аптека'],
    beauty: ['salon', 'spa', 'beauty', 'cosmetic', 'barbershop', 'салон', 'барбершоп'],
    restaurant: ['restaurant', 'cafe', 'bistro', 'menu', 'food', 'ресторан', 'кафе', 'меню'],
    hotel: ['hotel', 'resort', 'accommodation', 'booking', 'готель', 'бронювання'],
    construction: ['construction', 'builder', 'contractor', 'renovation', 'будівництво', 'ремонт'],
    retail: ['shop', 'store', 'retail', 'магазин', 'крамниця'],
    legal: ['law', 'lawyer', 'attorney', 'legal', 'адвокат', 'юрист']
  };
  
  const pageText = document.body.innerText.toLowerCase();
  const metaKeywords = document.querySelector('meta[name="keywords"]')?.content?.toLowerCase() || '';
  const description = document.querySelector('meta[name="description"]')?.content?.toLowerCase() || '';
  
  const combinedText = `${pageText} ${metaKeywords} ${description}`;
  
  const matches = [];
  for (const [industry, words] of Object.entries(keywords)) {
    const matchCount = words.filter(word => combinedText.includes(word)).length;
    if (matchCount > 0) {
      matches.push({ industry, score: matchCount });
    }
  }
  
  matches.sort((a, b) => b.score - a.score);
  return matches.length > 0 ? matches[0].industry : 'other';
}

function extractAddresses() {
  const addresses = [];
  
  // Schema.org structured data
  document.querySelectorAll('[itemprop="address"]').forEach(el => {
    addresses.push(el.textContent.trim());
  });
  
  // Common address patterns
  const text = document.body.innerText;
  const addressPattern = /(street|st\.|avenue|ave\.|road|rd\.|вул\.|вулиця|проспект)[^\n]{5,100}/gi;
  const matches = text.match(addressPattern) || [];
  addresses.push(...matches);
  
  return [...new Set(addresses)].filter(addr => addr.length > 10 && addr.length < 200);
}

function extractSocialLinks() {
  const social = {
    facebook: null,
    instagram: null,
    linkedin: null,
    twitter: null
  };
  
  document.querySelectorAll('a[href*="facebook.com"]').forEach(a => {
    if (!social.facebook) social.facebook = a.href;
  });
  
  document.querySelectorAll('a[href*="instagram.com"]').forEach(a => {
    if (!social.instagram) social.instagram = a.href;
  });
  
  document.querySelectorAll('a[href*="linkedin.com"]').forEach(a => {
    if (!social.linkedin) social.linkedin = a.href;
  });
  
  document.querySelectorAll('a[href*="twitter.com"], a[href*="x.com"]').forEach(a => {
    if (!social.twitter) social.twitter = a.href;
  });
  
  return social;
}

// Listen for scrape command from popup or background
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
  if (request.action === 'scrape') {
    try {
      const batchId = request.batch_id || null;
      // Load extraction libraries
      const emails = extractEmails();
      const phones = extractPhones();
      const addresses = extractAddresses();
      const company = extractCompanyName();
      const industryHint = extractIndustryKeywords();
      const socialLinks = extractSocialLinks();
      
      // Get page metadata
      const metadata = {
        url: window.location.href,
        title: document.title,
        description: document.querySelector('meta[name="description"]')?.content || '',
        keywords: document.querySelector('meta[name="keywords"]')?.content || ''
      };
      
      // Send each contact individually via background script
      emails.forEach(email => {
        const contact = {
          email: email,
          phone: phones[0] || null,
          address: addresses[0] || null,
          company: company,
          name: null, // Can be extracted from LinkedIn profile
          url: window.location.href,
          source_url: window.location.href,
          industry_hint: industryHint,
          social_links: socialLinks,
          metadata: metadata
        };
        
        // Send to background script, which will forward to WebSocket
        chrome.runtime.sendMessage({
          action: 'scraped_contact',
          batch_id: batchId,
          contact: contact
        });
      });
      
      sendResponse({
        success: true,
        contacts_found: emails.length,
        message: `Found ${emails.length} contact(s), sending to desktop app...`
      });
    } catch (error) {
      sendResponse({
        success: false,
        error: error.message
      });
    }
  }
  
  return true; // Keep channel open for async response
});

