import * as Sentry from '@sentry/react'
import { error } from 'loglevel'
import md5 from 'md5'
import Papa, { ParseError } from 'papaparse'

import { DETECT_ENCODING_LIMIT, HEADER_CHECK_LIMIT } from '../config'
import { ISettings } from '../types/settings.interface'
import { FlatFile } from '../utils/flat.file'
import { clearCache, readCache, writeCache } from '../utils/functions'
import { byteLength } from '../utils/misc'

/**
 * A usage optimized wrapper for quickly parsing and analyzing a file
 */
export class FileParser {
  public start = 0

  private $hasHeader?: boolean
  private length?: number
  private estimatedLength?: number
  private encoding?: string
  private $sample?: RowTuple[]
  private $data?: RowTuple[]
  private errors: ParseError[] = []
  private fillLevels?: IndexTuple<number>[]
  private readonly SAMPLE_SIZE: number

  constructor(private file: File | FlatFile | string, private settings: ISettings) {
    this.SAMPLE_SIZE = Math.max(1000, settings.preloadRowCount ?? 0)
  }

  /**
   * Returns the original file reference
   */
  public getFile() {
    return this.file
  }

  /**
   * Returns a tuple indicating if the count is definite and then the number
   */
  public getLength(): SignedTuple {
    if (this.length) {
      return [this.length, true]
    } else {
      const estimatedLength = this.getEstimatedLength()
      return [estimatedLength, false]
    }
  }

  public get loaded(): boolean {
    return typeof this.length === 'number'
  }

  public get hasHeader(): boolean | undefined {
    return this.$hasHeader
  }

  public set hasHeader(hasHeader: boolean | undefined) {
    this.$hasHeader = hasHeader

    const hash = this.getHash()

    if (!hash) {
      return
    }

    if (typeof hasHeader === 'boolean') {
      writeCache(hash, 'header', [this.start, this.hasHeader ? 1 : 0])
    } else {
      clearCache(hash, 'header')
    }
  }

  public get startIndex() {
    return this.start + (this.hasHeader ? 1 : 0)
  }

  public get fileName(): string {
    if (typeof this.file === 'string') {
      return 'string.csv'
    }
    return this.file.name
  }

  public get fileSize(): number {
    if (typeof this.file === 'string') {
      return this.file.length
    }
    return this.file.size
  }

  public get fileType(): string {
    if (typeof this.file === 'string') {
      return 'text/csv'
    }
    if ('type' in this.file) {
      return this.file.type
    }

    return 'application/octet-stream'
  }

  /**
   * Get a unique hash identifying this file based off the first row of data
   */
  public getHash(): string | null {
    const [row] = this.sample?.[this.start] || []

    if (!row || row.every((v) => !v)) {
      return null
    }

    return md5(JSON.stringify(row))
  }

  public get headersSet(): boolean {
    return typeof this.hasHeader === 'boolean'
  }

  public reset(): void {
    this.hasHeader = undefined
    this.fillLevels = undefined
  }

  /**
   * Get the encoding used for this import
   */
  public async getEncoding(): Promise<string> {
    if (this.encoding) {
      return this.encoding
    } else {
      return (this.encoding = await this.detectEncoding())
    }
  }

  public setEncoding(encoding: string): void {
    this.encoding = encoding
  }

  /**
   * Get a few values from any column - used for matching view
   *
   * @param colIndex
   * @param length
   */
  public previewColumnData(colIndex: number, length: number = 3): IndexTuple[] {
    const rows = this.sample.slice(this.startIndex, this.startIndex + length)
    return rows.map(([row, i]) => [row[colIndex], i])
  }

  /**
   * Get unique values in column - and the count of each value
   *
   * @param colIndex
   * @param max
   */
  public getUniqueColumnValues(colIndex: number, max: number): CountTuple[] {
    const rows = this.$data ? this.data : this.sample
    return rows.slice(this.startIndex).reduce((acc, [row]) => {
      const srcValue = row[colIndex]?.trim()
      const trackedIndex = acc.findIndex(([v]) => v === srcValue)
      if (trackedIndex !== -1) {
        const [tracked, count] = acc[trackedIndex]
        acc[trackedIndex] = [tracked, count + 1]
      } else {
        acc.push([srcValue, 1])
      }
      if (acc.length > max) {
        throw new TooManyUniquesError('Too many unique values')
      }
      return acc
    }, [] as CountTuple[])
  }

  /**
   * Get the sample data without the header row(s)
   */
  public get sample(): RowTuple[] {
    if (!this.$sample || !this.$sample.length) {
      if (this.$data) {
        return this.$data
      }
      return []
    }

    return this.$sample
  }

  /**
   * Get the first row from the CSV File
   */
  public getFirstRow(): RowTuple {
    if (this.sample.length) {
      return this.sample[0]
    } else {
      return [[], 0]
    }
  }

  /**
   * Load the number of specified rows to evaluate - default will evaluate to 1000
   */
  public async loadSampleData(): Promise<void> {
    const output: RowTuple[] = []
    let lastDataIndex = -1
    const isPartial = await this.streamFileUntil((_errors, [row, index]) => {
      output[index] = [row, index]
      if (row.length > 0 && row.some((x) => x?.trim()?.length > 0)) {
        lastDataIndex = index
      }
      return index >= this.SAMPLE_SIZE
    })

    // trim any empty rows at the end of the file
    output.length = lastDataIndex + 1

    // if our file isn't > 1000 rows, no need to read again, sample is same as data
    if (!isPartial) {
      this.$data = output
      this.length = this.$data.length
    }
    this.$sample = output
  }

  /**
   * Read the entire file into memory
   *
   * Because this may be a *lot* of data, avoid using this until absolutely necessary.
   * @todo use async iterator here if possible
   */
  public async loadData(): Promise<RowTuple[]> {
    if (this.$data) {
      return this.$data
    }

    const output: RowTuple[] = []
    let lastDataIndex = -1
    await this.streamFileUntil((_errors, [row, index]) => {
      output.push([row, index])
      if (row.length > 0 && row.some((x) => x?.trim()?.length > 0)) {
        lastDataIndex = index
      }
      return false
    })

    // trim any empty rows at the end of the file
    output.length = lastDataIndex + 1

    this.length = output.length
    return (this.$data = output)
  }

  public get data(): RowTuple[] {
    if (!this.$data) {
      throw new Error('Cannot get data before it is fully loaded')
    }
    return this.$data
  }

  public async reloadHeaderConfig(): Promise<void> {
    const detected = this.detectHeaderStart()

    // cached to be [original_row, has_header, current_row]
    const cached = this.sample
      .slice(0, HEADER_CHECK_LIMIT)
      .filter(([row]) => !row.every((v) => !v))
      .reduce((acc, [row, i]) => {
        const h = readCache(md5(JSON.stringify(row)), 'header')

        if (h) {
          if (
            Array.isArray(h) &&
            h.length === 2 &&
            h.every((n) => typeof n === 'number' && n >= 0)
          ) {
            return acc.concat([[...h, i]])
          }

          if (typeof h === 'number' && h >= 0) {
            return acc.concat([[0, h, i]])
          }
        }

        return acc
      }, [] as number[][])

    if (cached.length) {
      const exactMatch = cached.find((h) => h[0] === h[2])
      if (exactMatch) {
        this.start = exactMatch[2]
        this.$hasHeader = Boolean(exactMatch[1])
        return
      }

      const headerMatch = cached.find((h) => h[1] === 1)
      if (headerMatch) {
        this.start = headerMatch[2] // current row

        // only if it's detected as header
        if (this.detectHasHeader()) {
          this.$hasHeader = true
        }
        return
      }

      if (cached.length) {
        this.start = cached[0][2]
        return
      }
    }

    this.start = detected

    if (this.settings.autoDetectHeaders && this.detectHasHeader()) {
      this.$hasHeader = true
    }
  }

  /**
   * Check the account or setting limits to make sure the file is allowed
   */
  public async checkLimits(): Promise<ERROR_CODE | void> {
    let result: ERROR_CODE | undefined = ERROR_CODE.NO_ROWS

    const { maxRecords } = this.settings

    await this.streamFileUntil((_errors, [, i]) => {
      /**
       * Skip header on first iteration or if headersSet is defined
       */
      i = i - this.startIndex

      if (i >= 0) {
        result = undefined

        if (!maxRecords) {
          return true
        }

        if (i >= maxRecords + (!this.headersSet ? 1 : 0)) {
          result = ERROR_CODE.MAX_ROWS

          return true
        }
      }

      return false
    })

    return result
  }

  public detectHeaderStart(): number {
    const data = this.sample.slice(0, 100).map(([row]) => row.filter((c) => c.length).length)

    if (!data.length) {
      return 0
    }

    const averageRowLength = data.reduce((p, size) => p + size, 0) / data.length
    const longestRowLength = [...data].sort((a, b) => b - a)[0]

    return Math.min(
      Math.max(
        data.findIndex((size) => size === longestRowLength),
        0
      ),
      Math.max(
        data.findIndex((size) => size >= averageRowLength * 0.99),
        0
      )
    )
  }

  public detectHasHeader(overwrite = false): boolean | null {
    const [[firstRow], ...rest] = this.sample.slice(this.start)

    // Don't make predictions on files with <= 20 rows
    if (rest.length <= 20) {
      return null
    }

    const detected = firstRow.map((col, i) => {
      col = col?.trim()
      const rows = rest
        .map(([row]) => row[i])
        .filter((v) => !!v)
        .map((v) => v?.trim())
        .filter((v) => !!v)

      if (!rows.length) {
        return null
      }

      if (rows.every((v) => v === rows[0])) {
        return col !== rows[0]
      }

      const validators = [isNumeric, isAllUppercase, isEmail, isCurrency]
      const relevantValidator = validators.find((aValidator) => rows.every(aValidator))

      if (relevantValidator) {
        return !relevantValidator(col)
      }

      return null
    })

    if (detected.every((v) => v === null)) {
      return null
    }

    const hasHeader =
      detected.filter((v) => v === true).length >= detected.filter((v) => v === false).length

    if (overwrite) {
      this.hasHeader = hasHeader
    }

    return hasHeader
  }

  /**
   * Get the header row of the file
   */
  public getHeaders(): undefined | IndexTuple[] {
    if (!this.hasHeader) {
      return
    }

    if (!this.sample || !Array.isArray(this.sample[this.start])) {
      return []
    }

    return this.sample[this.start][0].map((header, index): IndexTuple => [header, index])
  }

  public getPreviewData(limit: number = 5): RowTuple[] {
    const sample = this.sample
    return sample.slice(0, limit)
  }

  public isReady(): boolean {
    return !!this.$data
  }

  /**
   * Get the sample fill levels of the columns
   */
  public getFillLevel(index: number): IndexTuple<number>
  public getFillLevel(): IndexTuple<number>[]
  public getFillLevel(index?: number): IndexTuple<number> | IndexTuple<number>[] {
    if (!this.fillLevels || !this.fillLevels.length) {
      const [firstRow] = this.getFirstRow()
      this.fillLevels = this.sample
        .slice(this.startIndex)
        .map(([row], i) => [row, i] as Tuple<string[], number>)
        .reduce(
          (acc, [row, n]) => {
            return acc.map(([agg, i]) => [(agg * n + (!row[i] ? 0 : 1)) / (n + 1), i])
          },
          firstRow.map((_v, i): IndexTuple<number> => [0, i])
        )
    }
    if (index !== undefined) {
      return this.fillLevels.find(([, i]) => i === index) ?? ([0, 0] as IndexTuple<number>)
    }
    return this.fillLevels
  }

  /**
   * Sample the file and get the ideal encoding
   */
  public async detectEncoding(): Promise<string> {
    const { file } = this
    if (file instanceof FlatFile) {
      return 'utf-8'
    }
    const jschardet = await import('jschardet')
    const transaction = Sentry.startTransaction({ name: 'detectEncoding' })
    const res = await new Promise((resolve) => {
      if (typeof file === 'string') {
        resolve(jschardet.detect(file))
      } else if (this.sample.length) {
        resolve(
          jschardet.detect(
            [...this.sample].slice(0, DETECT_ENCODING_LIMIT).reduce((str, [t]) => {
              str += `${t.join(',')}\n`
              return str
            }, '')
          )
        )
      } else {
        const encodingReader = new window.FileReader()
        const section = file.slice(0, 1024 * 4)

        encodingReader.onload = (event) => {
          const array = new Uint8Array((event.target as FileReader).result as ArrayBuffer)
          resolve(jschardet.detect(String.fromCharCode(...array)))
        }
        encodingReader.readAsArrayBuffer(section)
      }
    })

    transaction.finish()

    const detectedEncoding =
      res && (res as EncodingGuess).confidence > 0.5 ? (res as EncodingGuess).encoding : null
    return detectedEncoding || 'utf-8'
  }

  /**
   * Sample the first 100 rows and guesttimate the number of total rows in the file
   */
  private getEstimatedLength(): number {
    if (this.estimatedLength) {
      return this.estimatedLength
    }

    const sample = this.sample
    const hc = this.start

    let headerBytes = 0
    const sampledBytes = sample.slice(hc, 100 + hc).reduce((size, [row, i]: RowTuple) => {
      const rowSize = row.reduce((s, val) => s + byteLength(val) + 2, 0)

      if (i < hc) {
        headerBytes += rowSize
        return size
      }

      return size + rowSize
    }, 0)

    const totalBytes = this.fileSize - headerBytes
    return (this.estimatedLength = Math.floor(totalBytes / (sampledBytes / 100)))
  }

  /**
   * Optimized method for sampling file in memory efficient way
   * @param rowCallback
   */
  private async streamFileUntil(
    rowCallback: (errors: ParseError[], row: RowTuple) => boolean
  ): Promise<boolean> {
    const { file } = this

    if (file instanceof FlatFile) {
      let aborted = false
      file.stepUntil((rowData, index) => {
        if (rowCallback([], [rowData, index])) {
          aborted = true
          return true
        }
        return false
      })
      return aborted
    }

    Papa.LocalChunkSize = (1024 * 1024).toString()
    this.errors = []

    const encoding = this.settings.encoding ? this.settings.encoding : await this.getEncoding()

    return new Promise((resolve) => {
      let aborted = false
      let i = 0
      Papa.parse(file, {
        encoding: this.encoding ? this.encoding : encoding,
        delimiter: '',
        delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP],
        step: ({ data, errors }, parser) => {
          if (errors.length) {
            error('PARSE ERRORS', errors)
            this.errors = this.errors.concat(errors)
          }
          // to deal with the papaparse legacy format - check both direct and array form
          const d = (typeof data[0] === 'string' ? data : data[0]) as [string]
          if (rowCallback(errors, [d, i++])) {
            aborted = true
            parser.abort()
          }
        },
        complete() {
          resolve(aborted)
        }
      })
    })
  }
}

export type RowTuple = Tuple<string[], number>

export type IndexTuple<T = string> = Tuple<T, number>

export type CountTuple<T = string> = Tuple<T, number>

export type SignedTuple<T = number> = Tuple<T, boolean>

export type Tuple<L = string, R = string> = [L, R]

export enum ERROR_CODE {
  NO_ROWS,
  MAX_ROWS
}

export class TooManyUniquesError extends Error {
  constructor(m: string) {
    super(m)

    // Set the prototype explicitly.
    Object.setPrototypeOf(this, TooManyUniquesError.prototype)
  }
}

type EncodingGuess = {
  confidence: number
  encoding: string
}

function isNumeric(word: string) {
  return /^[0-9,.-/ ]+$/g.test(word)
}

function isCurrency(word: string) {
  return /^([\$€£#%][0-9,.-/ \-]+)|([0-9,.-/ \-]+[\$€£#%])$/.test(word)
}

function isAllUppercase(word: string) {
  return /^[A-Z]+$/.test(word)
}

function isEmail(word: string) {
  return /^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/.test(
    word
  )
}
