import { _errorDataAppend } from '@naturalcycles/js-lib'
import { sentry } from '@src/srv/sentry.service'

/**
 * Looks at the document.referrer and any UTM parameters in the document.location to determine the traffic origin
 * Stolen from: https://github.com/Bounteous-Inc/utmz-replicator/blob/bc4027b2eab1993bce946f8416c4d4aa947e7dad/utmz-cookie.js
 */
export const getTrafficOrigin = (document: Document): TrafficOrigin => {
  const direct: TrafficOrigin = {
    utm_source: '(direct)',
    utm_medium: '(direct)',
    utm_campaign: '(not set)',
  }

  return getPaidOrigin(document) || getNonPaidOrigin(document) || direct
}

/**
 * Paid traffic is defined as any traffic that has UTM parameters or Google Click Identifiers (gclid/dclid)
 */
const getPaidOrigin = (document: Document): TrafficOrigin | undefined => {
  if (isDirectTraffic(document)) return

  const origin = {} as TrafficOrigin
  // By removing ? and #, we're standardizing the input, treating search and hash as plain strings
  const search = document.location.search.replace('?', '')
  const hash = document.location.hash.replace('#', '')
  const campaignParams = ['source', 'medium', 'campaign', 'term', 'content']

  // Parses UTM parameters and Google Click Identifiers (gclid/dclid) from the document.location
  const regex = new RegExp(`(utm_(${campaignParams.join('|')})|(d|g)clid)=.*?([^&#]*|$)`, 'gi')
  // We're adding the # back in here because the regex captures everything up to the next #, & or end of string
  const utmTags = `${search}#${hash}`.match(regex)
  if (!utmTags) return

  for (const utmTag of utmTags) {
    const vals = utmTag.split('=')

    if (vals.length > 1) {
      const key = vals[0] as keyof TrafficOrigin | undefined
      const value = vals[1]
      if (key && value) {
        if (key === 'utm_medium') origin[key] = value as UtmMedium
        else if (key === 'utm_term') origin[key] = decodeURIComponent(value)
        else origin[key] = value
      }
    }
  }
  if (origin.gclid || origin.dclid) {
    origin.utm_source = 'google'
    origin.utm_medium = origin.gclid ? 'cpc' : 'cpm'
  }

  return origin
}

/**
 * Non-paid traffic is defined as any traffic that has a referrer but no UTM parameters or Google Click Identifiers (gclid/dclid)
 */
const getNonPaidOrigin = (document: Document): TrafficOrigin | undefined => {
  if (isDirectTraffic(document)) return

  let referringDomain = getDomain(document.referrer)
  if (!referringDomain) return

  // Shim for the billion google search engines
  if (referringDomain.includes('google')) {
    referringDomain = 'google'
  }

  const searchEngine = searchEngines[referringDomain]

  if (searchEngine) {
    return { utm_source: searchEngine.name, utm_medium: 'organic' }
  }

  return { utm_source: referringDomain, utm_medium: 'referral' }
}

const isDirectTraffic = (document: Document): boolean => {
  // If there's no referrer, this is direct traffic
  const referringUrl = toURL(document.referrer)
  if (!referringUrl) return true

  const thisDomain = getDomain(document.location.hostname)
  const referringDomain = getDomain(document.referrer)
  if (!referringDomain || referringDomain === thisDomain) {
    // This is internal traffic
    return true
  }
  return false
}

export const getDomain = (url: string): string | undefined => {
  const uri = toURL(url)
  if (!uri) return

  // Extracts the domain name from the hostname, including second-level TLDs like .co.uk.
  return uri.hostname.match(/[^.]*\.[^.]{2,3}(?:\.[^.]{2,3})?$/)?.[0]
}

const toURL = (urlString: string): URL | undefined => {
  if (!urlString) return

  // The URL constructor requires a protocol, so we'll add one if it's missing
  if (!urlString.includes('://')) {
    urlString = 'https://' + urlString
  }
  try {
    return new URL(urlString)
  } catch (err) {
    sentry.captureException(_errorDataAppend(err, { fingerprint: 'traffic_origin_invalid_url' }))
    // If the URL is invalid, we'll just ignore it
    return
  }
}

type UtmMedium = 'organic' | 'referral' | 'cpc' | 'cpm' | '(direct)' | '(invalid)'
interface TrafficOrigin {
  utm_source: string
  utm_medium: UtmMedium
  utm_campaign?: string
  utm_term?: string
  utm_content?: string
  gclid?: string
  dclid?: string
}

interface SearchEngine {
  /**
   * The query parameter name for the search term
   */
  searchParam: string
  /**
   * The name of the search engine
   */
  name: string
}
const searchEngines: { [key: string]: SearchEngine } = {
  'daum.net': {
    searchParam: 'q',
    name: 'daum',
  },
  'eniro.se': {
    searchParam: 'search_word',
    name: 'eniro ',
  },
  'naver.com': {
    searchParam: 'query',
    name: 'naver ',
  },
  'yahoo.com': {
    searchParam: 'p',
    name: 'yahoo',
  },
  'msn.com': {
    searchParam: 'q',
    name: 'msn',
  },
  'bing.com': {
    searchParam: 'q',
    name: 'live',
  },
  'aol.com': {
    searchParam: 'q',
    name: 'aol',
  },
  'lycos.com': {
    searchParam: 'q',
    name: 'lycos',
  },
  'ask.com': {
    searchParam: 'q',
    name: 'ask',
  },
  'altavista.com': {
    searchParam: 'q',
    name: 'altavista',
  },
  'search.netscape.com': {
    searchParam: 'query',
    name: 'netscape',
  },
  'cnn.com': {
    searchParam: 'query',
    name: 'cnn',
  },
  'about.com': {
    searchParam: 'terms',
    name: 'about',
  },
  'mamma.com': {
    searchParam: 'query',
    name: 'mama',
  },
  'alltheweb.com': {
    searchParam: 'q',
    name: 'alltheweb',
  },
  'voila.fr': {
    searchParam: 'rdata',
    name: 'voila',
  },
  'search.virgilio.it': {
    searchParam: 'qs',
    name: 'virgilio',
  },
  'baidu.com': {
    searchParam: 'wd',
    name: 'baidu',
  },
  'alice.com': {
    searchParam: 'qs',
    name: 'alice',
  },
  'yandex.com': {
    searchParam: 'text',
    name: 'yandex',
  },
  'najdi.org.mk': {
    searchParam: 'q',
    name: 'najdi',
  },
  'seznam.cz': {
    searchParam: 'q',
    name: 'seznam',
  },
  'search.com': {
    searchParam: 'q',
    name: 'search',
  },
  'wp.pl': {
    searchParam: 'szukaj ',
    name: 'wirtulana polska',
  },
  'online.onetcenter.org': {
    searchParam: 'qt',
    name: 'o*net',
  },
  'szukacz.pl': {
    searchParam: 'q',
    name: 'szukacz',
  },
  'yam.com': {
    searchParam: 'k',
    name: 'yam',
  },
  'pchome.com': {
    searchParam: 'q',
    name: 'pchome',
  },
  'kvasir.no': {
    searchParam: 'q',
    name: 'kvasir',
  },
  'sesam.no': {
    searchParam: 'q',
    name: 'sesam',
  },
  'ozu.es': {
    searchParam: 'q',
    name: 'ozu ',
  },
  'terra.com': {
    searchParam: 'query',
    name: 'terra',
  },
  'mynet.com': {
    searchParam: 'q',
    name: 'mynet',
  },
  'ekolay.net': {
    searchParam: 'q',
    name: 'ekolay',
  },
  'rambler.ru': {
    searchParam: 'words',
    name: 'rambler',
  },
  google: {
    searchParam: 'q',
    name: 'google',
  },
}
