import { fuzzy } from 'fast-fuzzy'
import _ from 'lodash'
import PSPDFKit, {
  Color,
  HighlightAnnotation,
  Instance,
  Rect,
  SearchResult,
  TextLine,
} from 'pspdfkit'

import { Maybe } from 'types'

import { findClosestMatchLev } from 'utils/levenshtein'
import { extractMarkedTextSimple } from 'utils/source'
import { findClosestMatchRegex } from 'utils/string'
import { Source } from 'utils/task'

import {
  HIGHLIGHT_DEFAULT_OPACITY,
  HIGHLIGHT_SELECTED_OPACITY,
} from './pdf-viewer'

export const normalizeString = (input: string) => {
  // Convert to lower case and remove all non-alphanumeric characters
  return input.toLowerCase().replace(/[^a-z0-9]/g, '')
}

export const getOrCreateAnnotations = async (params: {
  isSelectedSource: boolean
  selectedSource: Source
  pdfkitInstance: Instance
  cachedAnnotations: {
    [key: string]: {
      startingPage: number
      textLines: [TextLine[], Maybe<Rect>, Maybe<Rect>]
    }
  }
  updateAnnotationStore: (
    sourceId: string,
    annotation: {
      startingPage: number
      textLines: [TextLine[], Maybe<Rect>, Maybe<Rect>]
    }
  ) => void
}): Promise<{ startingPage: number; annotations: HighlightAnnotation[] }> => {
  const {
    isSelectedSource,
    selectedSource,
    cachedAnnotations,
    pdfkitInstance,
    updateAnnotationStore,
  } = params
  const val = cachedAnnotations[selectedSource.id]
  let startingPage: number
  let textLinesWithBoundingBoxes: [TextLine[], Maybe<Rect>, Maybe<Rect>]

  // Remove final ellipsis from markText
  const markText = extractMarkedTextSimple(selectedSource.text).replace(
    /…$/,
    ''
  )

  if (_.isNil(val)) {
    const extracted = extractMarkedTextSimple(selectedSource.text)
    let highlightLines = selectedSource.highlightLineIndices
      ? await getTextLinesFromHighlightLineIndices(
          selectedSource.highlightLineIndices,
          pdfkitInstance
        )
      : { startingPage: -1, textLines: [] }

    if (
      highlightLines.startingPage === -1 ||
      highlightLines.textLines.length === 0
    ) {
      // If highlight line indices were not provided or found no match, fall back to using the start page and text matching
      highlightLines = await getTextLinesFromTextAndStartingPage(
        extracted,
        selectedSource.page,
        pdfkitInstance
      )
    }

    startingPage = highlightLines.startingPage

    if (highlightLines.textLines.length > 1) {
      // If multiple lines, we determine the bounding boxes of the first and last line separately
      const { firstLineBoundingBox, lastLineBoundingBox } =
        await getFirstAndLastLineBoundingBoxes(
          highlightLines.textLines[0],
          highlightLines.textLines[1],
          highlightLines.textLines[highlightLines.textLines.length - 2],
          highlightLines.textLines[highlightLines.textLines.length - 1],
          markText,
          pdfkitInstance
        )
      textLinesWithBoundingBoxes = [
        highlightLines.textLines,
        firstLineBoundingBox,
        lastLineBoundingBox,
      ]
    } else if (highlightLines.textLines.length === 1) {
      // If single line, we call the normal highlightPdfText
      const boundingBox = await getPreciseBoundingBox(
        highlightLines.textLines[0],
        markText,
        pdfkitInstance,
        true
      )
      textLinesWithBoundingBoxes = [
        highlightLines.textLines,
        boundingBox,
        boundingBox,
      ]
    } else {
      // If no lines, we don't create any annotations
      textLinesWithBoundingBoxes = [[], null, null]
    }
    updateAnnotationStore(selectedSource.id, {
      startingPage,
      textLines: textLinesWithBoundingBoxes,
    })
  } else {
    // Use cached annotations
    startingPage = val.startingPage
    textLinesWithBoundingBoxes = val.textLines
  }

  const [textLines, firstLineBoundingBox, lastLineBoundingBox] =
    textLinesWithBoundingBoxes

  if (textLines.length > 0) {
    const annotations = createAnnotationObjectsForPdfSection(
      textLines,
      firstLineBoundingBox,
      lastLineBoundingBox,
      isSelectedSource
    )

    return { startingPage, annotations }
  } else {
    const annotations = textLines.map((textLine) =>
      // First line bounding box is the same as the last line bounding box
      createPdfTextAnnotationObjects(
        textLine,
        isSelectedSource,
        firstLineBoundingBox
      )
    )
    return { startingPage, annotations }
  }
}

export const getMergedAnnotations = async (
  sources: Source[],
  pdfkitInstance: Instance,
  cachedDocumentAnnotations: {
    [key: number]: [TextLine[], Maybe<Rect>, Maybe<Rect>][]
  },
  updateDocumentAnnotationStore: (
    page: number,
    annotations: [TextLine[], Maybe<Rect>, Maybe<Rect>][]
  ) => void
  // eslint-disable-next-line max-params
) => {
  const textLineIndicesByPage: {
    [key: number]: [number, number, Maybe<Rect>, Maybe<Rect>][]
  } = {}
  const allTextLinesByPage: { [key: number]: TextLine[] } = {}

  const annotations: HighlightAnnotation[] = []

  // calculate index ranges if not cached
  if (_.isEmpty(cachedDocumentAnnotations)) {
    for (const source of sources) {
      const markText = extractMarkedTextSimple(source.text)
      // get all textlines from startPage
      let textLines = Array.from(
        await pdfkitInstance.textLinesForPageIndex(source.page)
      )
      // only add second page if within range
      if (source.page + 1 < pdfkitInstance.totalPageCount) {
        textLines = [
          ...textLines,
          ...Array.from(
            await pdfkitInstance.textLinesForPageIndex(source.page + 1)
          ),
        ]
      }
      allTextLinesByPage[source.page] = textLines
      const indexRangeToAnnotate = getLineIndexRangeToAnnotate(
        markText,
        textLines
      )
      if (_.isNil(textLineIndicesByPage[source.page])) {
        textLineIndicesByPage[source.page] = []
      }

      if (indexRangeToAnnotate) {
        // In terms of bounding boxes, there are two cases to consider:
        if (indexRangeToAnnotate[0] === indexRangeToAnnotate[1]) {
          // 1. There is only one line, in which case the first and last line have the same bounding box and precise text
          const boundingBox = await getPreciseBoundingBox(
            textLines[indexRangeToAnnotate[0]],
            markText,
            pdfkitInstance,
            true
          )
          textLineIndicesByPage[source.page].push([
            indexRangeToAnnotate[0],
            indexRangeToAnnotate[0],
            boundingBox,
            boundingBox,
          ])
        } else {
          // 2. There is more than one line, in which case we treat the first and last line separately
          // For the first line, we want the marked text before the text of the second line
          const { firstLineBoundingBox, lastLineBoundingBox } =
            await getFirstAndLastLineBoundingBoxes(
              textLines[indexRangeToAnnotate[0]],
              textLines[indexRangeToAnnotate[0] + 1],
              textLines[indexRangeToAnnotate[1] - 1],
              textLines[indexRangeToAnnotate[1]],
              markText,
              pdfkitInstance
            )
          textLineIndicesByPage[source.page].push([
            indexRangeToAnnotate[0],
            indexRangeToAnnotate[1],
            firstLineBoundingBox,
            lastLineBoundingBox,
          ])
        }
      }
    }

    for (const page of Object.keys(textLineIndicesByPage).map(Number)) {
      const textLineIndicesForPage = textLineIndicesByPage[page]
      const mergedAnnotationIntervals = getMergedAnnotationIntervals(
        textLineIndicesForPage
      )
      // go through all merged intervals and add text lines
      const textLinesForPage: [TextLine[], Maybe<Rect>, Maybe<Rect>][] = []
      for (const interval of mergedAnnotationIntervals) {
        const textLines = allTextLinesByPage[page].slice(
          interval[0],
          interval[1] + 1
        )
        textLinesForPage.push([textLines, interval[2], interval[3]])
      }
      updateDocumentAnnotationStore(page, textLinesForPage)
      const pageAnnotations = textLinesForPage.flatMap(
        ([textLines, firstLineBoundingBox, lastLineBoundingBox]) =>
          createAnnotationObjectsForPdfSection(
            textLines,
            firstLineBoundingBox,
            lastLineBoundingBox,
            false
          )
      )
      annotations.push(...pageAnnotations)
    }
  } else {
    // fetch from cache instead
    for (const page of Object.keys(cachedDocumentAnnotations).map(Number)) {
      const textLinesForPage = cachedDocumentAnnotations[page]
      const pageAnnotations = textLinesForPage.flatMap(
        ([textLines, firstLineBoundingBox, lastLineBoundingBox]) =>
          createAnnotationObjectsForPdfSection(
            textLines,
            firstLineBoundingBox,
            lastLineBoundingBox,
            false
          )
      )
      annotations.push(...pageAnnotations)
    }
  }

  return annotations
}

export const getTextLinesFromTextAndStartingPage = async (
  text: string,
  startPage: number,
  document: Instance
) => {
  // get all textlines from startPage
  let textLines = Array.from(await document.textLinesForPageIndex(startPage))
  // only add second page if within range
  if (startPage + 1 < document.totalPageCount) {
    textLines = [
      ...textLines,
      ...Array.from(await document.textLinesForPageIndex(startPage + 1)),
    ]
  }

  const relevantTextLines = getLinesToAnnotate(text, textLines)
  const startingPage =
    relevantTextLines.length > 0
      ? relevantTextLines[0].pageIndex ?? startPage
      : startPage

  return { startingPage, textLines: relevantTextLines }
}

export const getTextLinesFromHighlightLineIndices = async (
  highlightLineIndices: number[][],
  pdfkitInstance: Instance
) => {
  /**
   * This function uses the highlight_line_indices stored in LanceDB to determine
   * the relevant text lines to annotate.
   */
  const pagesWithHighlightLines = highlightLineIndices.reduce<
    Record<number, number[]>
  >((acc, [page, line]) => {
    if (!(page in acc)) {
      acc[page] = []
    }
    acc[page].push(line)
    return acc
  }, {})

  const textLinesPerPage = await Promise.all(
    Object.entries(pagesWithHighlightLines).map(async ([page, lineIndices]) => {
      const pageTextLines = Array.from(
        await pdfkitInstance.textLinesForPageIndex(Number(page))
      )
      return lineIndices.map((lineIndex) => pageTextLines[lineIndex])
    })
  )
  const textLines = textLinesPerPage.flat()

  return {
    startingPage: Math.min(...Object.keys(pagesWithHighlightLines).map(Number)),
    textLines,
  }
}

const getMergedAnnotationIntervals = (
  intervals: [number, number, Maybe<Rect>, Maybe<Rect>][]
): [number, number, Maybe<Rect>, Maybe<Rect>][] => {
  const mergedIntervals: [number, number, Maybe<Rect>, Maybe<Rect>][] = []

  if (intervals.length === 0) return mergedIntervals

  intervals.sort((a, b) => a[0] - b[0])
  let currentInterval = intervals[0]

  for (let i = 1; i < intervals.length; i++) {
    const nextInterval = intervals[i]

    // Check if intervals overlap
    if (nextInterval[0] <= currentInterval[1]) {
      // If the next interval extends beyond the current interval, merge them
      if (nextInterval[1] > currentInterval[1]) {
        // Set the end of the current interval to the end of the next interval
        currentInterval[1] = nextInterval[1]
        // Set the last bounding box to the next interval's bounding box
        currentInterval[3] = nextInterval[3]
      }
    } else {
      mergedIntervals.push(currentInterval)
      currentInterval = nextInterval
    }
  }

  mergedIntervals.push(currentInterval)

  return mergedIntervals
}

const getLineIndexRangeToAnnotate = (
  text: string,
  textLines: TextLine[]
): Maybe<[number, number]> => {
  const { indexesOfLines, textNoWhiteSpace } = textLines.reduce(
    ({ contentLength, indexesOfLines, textNoWhiteSpace }, line) => {
      // clean current line of string and remove whitespace
      const contents = normalizeString(line.contents)

      // add its content length to the total content length
      contentLength += contents.length

      // append index
      indexesOfLines.push(contentLength)

      return {
        contentLength,
        indexesOfLines,
        textNoWhiteSpace: textNoWhiteSpace + contents,
      }
    },
    {
      contentLength: 0,
      indexesOfLines: new Array<number>(),
      textNoWhiteSpace: '',
    }
  )

  // normalize snippet text
  const splitText = text.split(' ')
  const noWhiteSpace = normalizeString(text)

  // get the index inside of the textNoWhiteSpace
  let startIdx = textNoWhiteSpace.indexOf(noWhiteSpace)

  // if we can't highlight try a minimizing regex match
  if (startIdx == -1) {
    const firstWord = normalizeString(splitText[0])
    const lastWord = normalizeString(splitText[splitText.length - 1])
    const { index } = findClosestMatchRegex({
      haystack: textNoWhiteSpace,
      firstWord,
      lastWord,
      length: noWhiteSpace.length,
      threshold: 0.3,
    })
    startIdx = index
  }

  // if we still can't find it, we try to find the closest match
  if (startIdx == -1) {
    startIdx = findClosestMatchLev(textNoWhiteSpace, noWhiteSpace)
  }

  if (startIdx == -1) {
    console.warn('text not found in document', {
      snippet: noWhiteSpace,
      full: textNoWhiteSpace,
    })
    return null
  }

  // find the index of the startLine and endLine
  const startLine = _.sortedIndex(indexesOfLines, startIdx)
  const endLine = _.sortedIndex(indexesOfLines, startIdx + noWhiteSpace.length)

  return [startLine, endLine]
}

const getLinesToAnnotate = (
  text: string,
  textLines: TextLine[]
): TextLine[] => {
  const indexRangeToAnnotate = getLineIndexRangeToAnnotate(text, textLines)
  if (!indexRangeToAnnotate) {
    return []
  }
  return textLines.slice(indexRangeToAnnotate[0], indexRangeToAnnotate[1] + 1)
}

const createAnnotationObjectsForPdfSection = (
  textLines: TextLine[],
  firstLineBoundingBox: Maybe<Rect>,
  lastLineBoundingBox: Maybe<Rect>,
  isSelectedSource: boolean
  // eslint-disable-next-line max-params
) => {
  return [
    createPdfTextAnnotationObjects(
      textLines[0],
      isSelectedSource,
      firstLineBoundingBox
    ),
    ...textLines
      .slice(1, -1)
      .map((textLine) =>
        createPdfTextAnnotationObjects(textLine, isSelectedSource)
      ),
    createPdfTextAnnotationObjects(
      textLines[textLines.length - 1],
      isSelectedSource,
      lastLineBoundingBox
    ),
  ]
}

export const createPdfTextAnnotationObjects = (
  textLine: TextLine,
  isSelectedSource = false,
  boundingBox: Maybe<Rect> = null
) => {
  return new PSPDFKit.Annotations.HighlightAnnotation({
    pageIndex: textLine.pageIndex,
    rects: PSPDFKit.Immutable.List([boundingBox ?? textLine.boundingBox]),
    boundingBox: boundingBox ?? textLine.boundingBox,
    opacity: isSelectedSource
      ? HIGHLIGHT_SELECTED_OPACITY
      : HIGHLIGHT_DEFAULT_OPACITY,
    blendMode: 'multiply',
    color: Color.fromHex('#BFDBFE'),
  })
}

const getFirstAndLastLineBoundingBoxes = async (
  firstTextLine: Maybe<TextLine>,
  secondTextLine: Maybe<TextLine>,
  secondToLastTextLine: Maybe<TextLine>,
  lastTextLine: Maybe<TextLine>,
  markText: string,
  pspdfInstance: Instance
): Promise<{
  firstLineBoundingBox: Maybe<Rect>
  lastLineBoundingBox: Maybe<Rect>
  // eslint-disable-next-line max-params
}> => {
  const firstLinePreciseText = secondTextLine
    ? getPreciseTextInTextLine(secondTextLine, markText, true)
    : null
  const lastLinePreciseText = secondToLastTextLine
    ? getPreciseTextInTextLine(secondToLastTextLine, markText, false)
    : null

  const findFirstLineBoundingBox = async (
    firstTextLine: Maybe<TextLine>,
    firstLinePreciseText: Maybe<string>
  ) => {
    if (
      !firstTextLine ||
      firstLinePreciseText === null ||
      firstLinePreciseText === undefined
    ) {
      // Case 1: Not enough information to determine the bounding box -> return null
      return null
    }

    if (firstLinePreciseText.length === 0) {
      // Case 2: firstLinePreciseText is empty -> return an empty bounding box, since there is no need to highlight this line
      return new PSPDFKit.Geometry.Rect({
        left: 0,
        top: 0,
        width: 0,
        height: 0,
      })
    }

    // Case 3: firstLinePreciseText is not empty -> return bounding box
    return await getPreciseBoundingBox(
      firstTextLine,
      firstLinePreciseText,
      pspdfInstance,
      true
    )
  }

  const firstLineBoundingBox = await findFirstLineBoundingBox(
    firstTextLine,
    firstLinePreciseText
  )
  const lastLineBoundingBox =
    lastLinePreciseText && lastTextLine
      ? await getPreciseBoundingBox(
          lastTextLine,
          lastLinePreciseText,
          pspdfInstance,
          false
        )
      : null
  return { firstLineBoundingBox, lastLineBoundingBox }
}

const getPreciseTextInTextLine = (
  textLine: TextLine,
  markText: string,
  isSearchForFirstLine: boolean
): Maybe<string> => {
  // NOTE: isSearchForFirstLine being true means that the textLine should be the SECOND line
  // and isSearchForFirstLine being false means that the textLine should be the SECOND TO LAST line

  // PSPDFkit text may contain smart quotes, which will never match mark text
  const textLineContents = textLine.contents
    // eslint-disable-next-line
    .replaceAll('“', '"')
    // eslint-disable-next-line
    .replaceAll('”', '"')
    // eslint-disable-next-line
    .replaceAll('’', "'")

  for (let i = 0; i < Math.floor(textLineContents.length * 0.9); i++) {
    // For finding the precise text of the first line, we want to extract the marked text before
    // the text of the second line, so we progressively indexOf prefixes of the second textLine

    // For finding the precise text of the last line, we want to extract the marked text after
    // the text of the second to last line, so we progressively indexOf suffixes of the second to last textLine
    const textToCheck = isSearchForFirstLine
      ? textLineContents.substring(0, textLineContents.length - i)
      : textLineContents.substring(i)

    const maybeIndex = markText.indexOf(textToCheck)
    if (maybeIndex !== -1) {
      return isSearchForFirstLine
        ? markText.substring(0, maybeIndex)
        : markText.substring(maybeIndex + textToCheck.length).trim()
    }
  }
  return null
}

const getPreciseBoundingBox = async (
  textLine: TextLine,
  preciseText: string, // text within textLine to highlight
  pspdfInstance: Instance,
  isFirstLine: boolean
  // eslint-disable-next-line max-params
): Promise<Rect> => {
  if (preciseText === '' || textLine.pageIndex === null) {
    return textLine.boundingBox
  }

  const candidates = await pspdfInstance.search(preciseText, {
    startPageIndex: textLine.pageIndex,
    endPageIndex: textLine.pageIndex + 1,
  })
  const targetBoundingBox = textLine.boundingBox

  const computeFuzzyScore = (candidate: SearchResult) => {
    if (
      candidate.locationInPreview === null ||
      candidate.lengthInPreview === null
    ) {
      return 0
    }
    return fuzzy(
      candidate.previewText.substring(
        candidate.locationInPreview,
        candidate.locationInPreview + candidate.lengthInPreview
      ),
      textLine.contents
    )
  }
  const scores = candidates.map((c) => ({
    candidate: c,
    score: computeFuzzyScore(c),
  }))
  const maxScore = scores.reduce((max, { score }) => Math.max(max, score), 0)
  const matches = scores
    .filter(({ score }) => score === maxScore) // && maxScore > 0.9
    .map(({ candidate }) => candidate)

  const isOnTheSameLine = (bbox: Rect, epsilon: number) => {
    return (
      Math.abs(bbox.top - targetBoundingBox.top) <= epsilon &&
      Math.abs(bbox.height - targetBoundingBox.height) <= epsilon
    )
  }

  // Find the closest bounding box by euclidean distance of centers
  const unrolledMatchRects = matches.flatMap((match) =>
    match.rectsOnPage.toArray()
  )
  const matchesOnSameLineStrict = unrolledMatchRects.filter(
    (candidate) =>
      isOnTheSameLine(candidate, 0.1) && candidate.right - candidate.left > 10
  )
  // Try small epsilon, then use a larger epsilon if strict check returns nothing
  const matchesOnSameLine =
    matchesOnSameLineStrict.size > 0
      ? matchesOnSameLineStrict
      : unrolledMatchRects.filter((candidate) => isOnTheSameLine(candidate, 1))

  // Sort primarily by right edge, secondarily by width (descending)
  const sortedMatchesOnSameLine = matchesOnSameLine.sort((a, b) => {
    if (a.right === b.right) {
      const aWidth = a.right - a.left
      const bWidth = b.right - b.left
      return bWidth - aWidth
    }
    return isFirstLine ? b.right - a.right : a.right - b.right
  })
  return sortedMatchesOnSameLine.first()
}
