import { getAllUtms } from './queryParams'
import { toURL } from './url'

/**
 * Looks at the document.referrer and any UTM parameters in the location to determine the traffic origin
 * Stolen from: https://github.com/Bounteous-Inc/utmz-replicator/blob/bc4027b2eab1993bce946f8416c4d4aa947e7dad/utmz-cookie.js
 */
export function getTrafficOrigin(): TrafficOrigin {
  const direct: TrafficOrigin = {
    utm_source: '(direct)',
    utm_medium: '(direct)',
    utm_campaign: '(not set)',
  }

  return getPaidOrigin() || getNonPaidOrigin() || direct
}

/**
 * Paid traffic is defined as any traffic that has UTM parameters or Google Click Identifiers (gclid/dclid)
 */
function getPaidOrigin(): TrafficOrigin | undefined {
  if (isDirectTraffic()) return

  // By removing ? and #, we're standardizing the input, treating search and hash as plain strings
  const search = location.search.replace('?', '')
  const hash = location.hash.replace('#', '')

  const utmTagEntries = parseSearchAndHash(search, hash)
  if (!utmTagEntries.length) return

  const origin = parseEntries(utmTagEntries)

  if (isImpactTraffic()) {
    origin.utm_source = 'Impact'
    origin.utm_medium = 'cpc'
  }

  if (origin.gclid || origin.dclid) {
    origin.utm_source = 'google'
    origin.utm_medium = origin.gclid ? 'cpc' : 'cpm'
  }

  return origin
}

function parseEntries(entries: [keyof TrafficOrigin, UtmMedium][]): TrafficOrigin {
  const origin = {} as TrafficOrigin
  for (const [key, value] of entries) {
    if (key && value) {
      if (key === 'utm_medium') origin[key] = value
      else if (key === 'utm_term') origin[key] = decodeURIComponent(value)
      else origin[key] = value
    }
  }

  return origin
}

// Parses UTM parameters and Google Click Identifiers (gclid/dclid) from the location
// We're adding the # back in here because the regex captures everything up to the next #, & or end of string
function parseSearchAndHash(search: string, hash: string): [keyof TrafficOrigin, UtmMedium][] {
  const campaignParams = ['source', 'medium', 'campaign', 'term', 'content']
  const regex = new RegExp(`(utm_(${campaignParams.join('|')})|(d|g)clid)=.*?([^&#]*|$)`, 'gi')

  return (`${search}#${hash}`
    .match(regex)
    ?.map(tag => tag.replace('#', '').split('='))
    .filter(entry => entry.length === 2) || []) as [keyof TrafficOrigin, UtmMedium][]
}

function isImpactTraffic(): boolean {
  return location.search.includes('irclickid=')
}

/**
 * Non-paid traffic is defined as any traffic that has a referrer but no UTM parameters or Google Click Identifiers (gclid/dclid)
 */
function getNonPaidOrigin(): TrafficOrigin | undefined {
  if (isDirectTraffic()) return

  let referringDomain = getDomain(document.referrer)
  if (!referringDomain) return

  // Shim for the billion google search engines
  if (referringDomain.includes('google')) {
    referringDomain = 'google'
  }

  const searchEngine = searchEngines[referringDomain]

  if (searchEngine) {
    return { utm_source: searchEngine.name, utm_medium: 'organic' }
  }

  return { utm_source: referringDomain, utm_medium: 'referral' }
}

function isDirectTraffic(): boolean {
  // If there's no referrer and no utms (both in cookies and url query params) we consider it unpaid/direct traffic
  const utms = getAllUtms()
  if (Object.keys(utms).length > 0) return false

  const referringUrl = toURL(document.referrer)
  if (!referringUrl) return true

  const thisDomain = getDomain(location.hostname)
  const referringDomain = getDomain(document.referrer)
  if (!referringDomain || referringDomain === thisDomain) {
    // This is internal traffic
    return true
  }
  return false
}

export function getDomain(url: string): string | undefined {
  const uri = toURL(url)
  if (!uri) return

  // Extracts the domain name from the hostname, including second-level TLDs like .co.uk.
  return uri.hostname.match(/[^.]*\.[^.]{2,3}(?:\.[^.]{2,3})?$/)?.[0]
}

type UtmMedium = 'organic' | 'referral' | 'cpc' | 'cpm' | '(direct)' | '(invalid)'
interface TrafficOrigin {
  utm_source: string
  utm_medium: UtmMedium
  utm_campaign?: string
  utm_term?: string
  utm_content?: string
  gclid?: string
  dclid?: string
}

interface SearchEngine {
  /**
   * The query parameter name for the search term
   */
  searchParam: string
  /**
   * The name of the search engine
   */
  name: string
}
const searchEngines: { [key: string]: SearchEngine } = {
  'daum.net': {
    searchParam: 'q',
    name: 'daum',
  },
  'eniro.se': {
    searchParam: 'search_word',
    name: 'eniro ',
  },
  'naver.com': {
    searchParam: 'query',
    name: 'naver ',
  },
  'yahoo.com': {
    searchParam: 'p',
    name: 'yahoo',
  },
  'msn.com': {
    searchParam: 'q',
    name: 'msn',
  },
  'bing.com': {
    searchParam: 'q',
    name: 'live',
  },
  'aol.com': {
    searchParam: 'q',
    name: 'aol',
  },
  'lycos.com': {
    searchParam: 'q',
    name: 'lycos',
  },
  'ask.com': {
    searchParam: 'q',
    name: 'ask',
  },
  'altavista.com': {
    searchParam: 'q',
    name: 'altavista',
  },
  'search.netscape.com': {
    searchParam: 'query',
    name: 'netscape',
  },
  'cnn.com': {
    searchParam: 'query',
    name: 'cnn',
  },
  'about.com': {
    searchParam: 'terms',
    name: 'about',
  },
  'mamma.com': {
    searchParam: 'query',
    name: 'mama',
  },
  'alltheweb.com': {
    searchParam: 'q',
    name: 'alltheweb',
  },
  'voila.fr': {
    searchParam: 'rdata',
    name: 'voila',
  },
  'search.virgilio.it': {
    searchParam: 'qs',
    name: 'virgilio',
  },
  'baidu.com': {
    searchParam: 'wd',
    name: 'baidu',
  },
  'alice.com': {
    searchParam: 'qs',
    name: 'alice',
  },
  'yandex.com': {
    searchParam: 'text',
    name: 'yandex',
  },
  'najdi.org.mk': {
    searchParam: 'q',
    name: 'najdi',
  },
  'seznam.cz': {
    searchParam: 'q',
    name: 'seznam',
  },
  'search.com': {
    searchParam: 'q',
    name: 'search',
  },
  'wp.pl': {
    searchParam: 'szukaj ',
    name: 'wirtulana polska',
  },
  'online.onetcenter.org': {
    searchParam: 'qt',
    name: 'o*net',
  },
  'szukacz.pl': {
    searchParam: 'q',
    name: 'szukacz',
  },
  'yam.com': {
    searchParam: 'k',
    name: 'yam',
  },
  'pchome.com': {
    searchParam: 'q',
    name: 'pchome',
  },
  'kvasir.no': {
    searchParam: 'q',
    name: 'kvasir',
  },
  'sesam.no': {
    searchParam: 'q',
    name: 'sesam',
  },
  'ozu.es': {
    searchParam: 'q',
    name: 'ozu ',
  },
  'terra.com': {
    searchParam: 'query',
    name: 'terra',
  },
  'mynet.com': {
    searchParam: 'q',
    name: 'mynet',
  },
  'ekolay.net': {
    searchParam: 'q',
    name: 'ekolay',
  },
  'rambler.ru': {
    searchParam: 'words',
    name: 'rambler',
  },
  google: {
    searchParam: 'q',
    name: 'google',
  },
}
