import * as Sentry from '@sentry/browser'
import {
  convertInchesToTwip,
  convertMillimetersToTwip,
  AlignmentType,
  Document as WordDocument,
  SectionType,
  Paragraph,
  type ParagraphChild,
  TextRun,
  HeadingLevel,
  Table as WordTable,
  TableCell,
  TableRow,
  WidthType,
  ImageRun,
  ExternalHyperlink,
  LevelFormat,
  ShadingType,
  type IParagraphPropertiesOptions,
  type IRunPropertiesOptions,
  IBaseParagraphStyleOptions,
} from 'docx'
import type { FileChild } from 'docx/build/file/file-child'
import _, { isNil } from 'lodash'

import { fetchCourtCitation } from 'models/fetchers/usa-case-law-fetcher'

import { cdnUrls } from 'utils/server-data'

import {
  RESEARCH_ARTIFACT_PATH,
  getCaseDateYear,
} from 'components/research/research-helpers'

import { Source } from './task'

function isParagraphChild(value: unknown): value is ParagraphChild {
  return (
    value instanceof TextRun ||
    value instanceof ImageRun ||
    value instanceof ExternalHyperlink
  )
}

function isNotNull<T>(value: T | null): value is T {
  return value !== null
}

function getElementDefaultDisplay(tagName: string): string {
  let cStyle = 'none'
  try {
    const t = document.createElement(tagName)
    const gcs = 'getComputedStyle' in window
    document.body.appendChild(t)
    // @ts-expect-error eslint-disable-next-line
    cStyle = (gcs ? window.getComputedStyle(t, '') : t.currentStyle).display
    document.body.removeChild(t)
  } catch (error) {
    // TODO: Ideally we just display these nodes as inline text, but at this point
    // they have already been transformed by the DOMParser, so we can't just wrap
    // the node in spans. Currently these will just fall to our unhandled in-line
    // child tags and be left out of the export.
    console.warn(`Failed to create element for tag “${tagName}”:`, error)
  }
  return cStyle
}

function shouldBeParagraphChild(node: Node): boolean {
  // "inline" html tags are roughly equivalent to docx ParagraphChild elements with some exceptions
  if (new Set(['IMG']).has(node.nodeName)) {
    return true
  }
  switch (node.nodeType) {
    case Node.ELEMENT_NODE:
      return getElementDefaultDisplay(node.nodeName) === 'inline'
    case Node.TEXT_NODE:
      return true
    default:
      console.warn('Unexpected node type:', node.nodeType)
  }
  return false
}

const htmlImgToDocx = async (
  htmlImage: HTMLImageElement,
  width: number | null = null,
  height: number | null = null
): Promise<ImageRun | TextRun> => {
  const errorText = new TextRun({
    text: '[Could not export image.]',
    color: 'dc2626',
  })
  try {
    let imgData
    if (htmlImage.src.startsWith('data:')) {
      imgData = htmlImage.src
    } else if (
      _.some(cdnUrls, (cdnUrl) => !!cdnUrl && htmlImage.src.startsWith(cdnUrl))
    ) {
      const imgResult = await convertImageToPng(htmlImage.src, width, height)
      imgData = imgResult.dataUrl
      width = imgResult.width
      height = imgResult.height
    } else {
      return errorText
    }
    if (_.isNil(imgData)) {
      return errorText
    }
    const dimensions =
      width !== null && height !== null
        ? { width, height }
        : await getImageDimensions(htmlImage)
    return new ImageRun({
      data: imgData,
      transformation: { ...dimensions },
    })
  } catch (e) {
    Sentry.captureException(e)
    return errorText
  }
}

function parseStylePx(styleSz: string): number | null {
  if (styleSz.length > 0) {
    const match = styleSz.match(/(\d+)px/)
    if (!_.isNil(match)) {
      try {
        return parseInt(match[1])
      } catch (e) {
        Sentry.captureException(e)
      }
    }
  }
  return null
}

function computeImageDimensions(
  loadedImage: HTMLImageElement,
  maxWidth: number | null,
  maxHeight: number | null
): { width: number; height: number } {
  const imgWidth =
    loadedImage.width > 0 ? loadedImage.width : loadedImage.naturalWidth
  const imgHeight =
    loadedImage.height > 0 ? loadedImage.height : loadedImage.naturalHeight

  const scaleWidth =
    !_.isNil(maxWidth) && imgWidth > maxWidth ? maxWidth / imgWidth : 1.0
  const scaleHeight =
    !_.isNil(maxHeight) && imgHeight > maxHeight ? maxHeight / imgHeight : 1.0
  const scale = Math.min(scaleWidth, scaleHeight)

  return { width: imgWidth * scale, height: imgHeight * scale }
}

async function getImageDimensions(
  htmlImage: HTMLImageElement
): Promise<{ width: number; height: number }> {
  const widthBoundary =
    parseStylePx(htmlImage.style.width) ??
    parseStylePx(htmlImage.style.maxWidth)
  const heightBoundary =
    parseStylePx(htmlImage.style.height) ??
    parseStylePx(htmlImage.style.maxHeight)
  return await new Promise((resolve, reject) => {
    const img = new Image()
    img.onload = () => {
      resolve(computeImageDimensions(img, widthBoundary, heightBoundary))
    }
    img.onerror = () => {
      reject(new Error(`Failed to load image: ${htmlImage.src}`))
    }
    img.src = htmlImage.src
  })
}

async function convertImageToPng(
  imageSrc: string,
  maxWidth: number | null,
  maxHeight: number | null
): Promise<{ dataUrl: string; width: number; height: number }> {
  return await new Promise((resolve, reject) => {
    const img = new Image()

    img.onload = () => {
      const canvas = document.createElement('canvas')
      const ctx = canvas.getContext('2d')
      if (_.isNil(ctx)) {
        reject(new Error('Could not get canvas context'))
        return
      }

      const targetDimensions = computeImageDimensions(img, maxWidth, maxHeight)

      canvas.width = targetDimensions.width
      canvas.height = targetDimensions.height

      ctx.drawImage(img, 0, 0, targetDimensions.width, targetDimensions.height)

      const dataUrl = canvas.toDataURL('image/png')
      resolve({ dataUrl, ...targetDimensions })

      canvas.remove()
    }

    img.onerror = () => {
      reject(new Error(`Image could not be loaded: ${imageSrc}`))
    }

    img.crossOrigin = 'anonymous'
    img.src = imageSrc
  })
}

const inlineHtmlNodeToTextRuns = async (
  node: Node,
  textProperties: IRunPropertiesOptions
): Promise<ParagraphChild[] | null> => {
  if (node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE) {
    console.error('Invalid node', node.nodeName)
    return null
  }
  switch (node.nodeName) {
    case '#text':
      // this is the base case
      return node.textContent !== null && node.textContent.trim().length > 0
        ? [new TextRun(_.merge({}, textProperties, { text: node.textContent }))]
        : null
    case 'SPAN':
    case 'MARK':
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { highlight: 'yellow' })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    case 'B':
    case 'STRONG':
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { bold: true })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    case 'I':
    case 'EM':
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { italics: true })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    case 'CODE':
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { font: { name: 'Monospace' } })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    case 'SUP':
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { superScript: true })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    case 'BR':
      // TODO: Check certain edge cases with newlines if they are reported
      // The BR handling below could also be accomplished by new Paragraph({}), and it used to be TextRun(_.merge({}, textProperties, { text: '\n' })
      // Unclear if we should handle the leading space that comes before the following line in some model output, or if that is a model problem
      return [new TextRun({ break: 1 })]
    case 'IMG': {
      const htmlImage = node as HTMLImageElement
      const docxImage = await htmlImgToDocx(htmlImage)
      return [docxImage]
    }
    case 'A': {
      const anchor = node as HTMLAnchorElement
      const link = new ExternalHyperlink({
        children: [
          new TextRun(
            _.merge({}, textProperties, {
              text: anchor.textContent ?? anchor.innerText,
              style: 'Hyperlink',
            })
          ),
        ],
        link: anchor.href,
      })
      return [link]
    }
    case 'DEL': {
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { strike: true, color: 'EF4444' })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    }
    case 'INS': {
      return (
        await Promise.all(
          Array.from(node.childNodes).map(async (child) => {
            return await inlineHtmlNodeToTextRuns(
              child,
              _.merge({}, textProperties, { color: '0372AA' })
            )
          })
        )
      )
        .filter(isNotNull)
        .flat()
    }
    default:
      console.warn(
        'Unhandled in-line child tag or invalid block tag beneath in-line',
        node.nodeName
      )
  }
  return node.textContent !== null && node.textContent.trim().length > 0
    ? [new TextRun(_.merge({}, textProperties, { text: node.textContent }))]
    : null
}

interface BlockHtmlDocxFlattenParams {
  node: Node
  paragraphProperties: IParagraphPropertiesOptions
  textProperties: IRunPropertiesOptions
  listLevel: number
}

const blockHtmlDocxFlatten = async (
  params: BlockHtmlDocxFlattenParams
): Promise<Array<FileChild | ParagraphChild> | null> => {
  const { node } = params
  let { paragraphProperties, textProperties, listLevel } = params
  if (node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE) {
    console.error('Invalid node', node.nodeName)
    return null
  }
  if (shouldBeParagraphChild(node)) {
    return await inlineHtmlNodeToTextRuns(node, textProperties)
  }
  // ref: https://www.w3schools.com/html/html_blocks.asp
  switch (node.nodeName) {
    case 'BLOCKQUOTE':
      paragraphProperties = _.merge(
        {},
        {
          indent: { left: convertInchesToTwip(0.5) },
        },
        paragraphProperties
      )
      break
    case 'PRE':
      textProperties = _.merge(
        {},
        {
          font: { name: 'Monospace' },
        },
        textProperties
      )
      break
    case 'H1':
      paragraphProperties = _.merge(
        {},
        {
          heading: HeadingLevel.HEADING_1,
        },
        paragraphProperties
      )
      break
    case 'H2':
      paragraphProperties = _.merge(
        {},
        {
          heading: HeadingLevel.HEADING_2,
        },
        paragraphProperties
      )
      break
    case 'H3':
      paragraphProperties = _.merge(
        {},
        {
          heading: HeadingLevel.HEADING_3,
        },
        paragraphProperties
      )
      break
    case 'H4':
      paragraphProperties = _.merge(
        {},
        {
          heading: HeadingLevel.HEADING_4,
        },
        paragraphProperties
      )
      break
    case 'H5':
      paragraphProperties = _.merge(
        {},
        {
          heading: HeadingLevel.HEADING_5,
        },
        paragraphProperties
      )
      break
    case 'H6':
      paragraphProperties = _.merge(
        {},
        {
          heading: HeadingLevel.HEADING_6,
        },
        paragraphProperties
      )
      break
    case 'UL':
      // override numbering and spacing values
      paragraphProperties = {
        ...paragraphProperties,
        numbering: {
          reference: 'default-bullets',
          level: listLevel,
        },
        spacing: {
          before: convertMillimetersToTwip(0.5),
          after: convertMillimetersToTwip(0.5),
        },
      }
      listLevel++
      break
    case 'OL':
      // override numbering and spacing values
      paragraphProperties = {
        ...paragraphProperties,
        numbering: {
          reference: 'default-numbering',
          level: listLevel,
        },
        spacing: {
          before: convertMillimetersToTwip(0.5),
          after: convertMillimetersToTwip(0.5),
        },
      }
      listLevel++
      break
    case 'HR':
      paragraphProperties = _.merge(
        {},
        paragraphProperties,
        {
          thematicBreak: true,
        },
        paragraphProperties
      )
      break
    case 'BODY':
    case 'DIV':
    case 'P':
    case 'LI':
      // no special formatting, just pass through inherited properties
      break
    case 'TABLE': {
      const docxRows = await Promise.all(
        Array.from((node as HTMLTableElement).rows).map(async (row) => {
          const docxCells = await Promise.all(
            Array.from(row.cells).map(async (htmlCell) => {
              return new TableCell({
                children: (
                  await Promise.all(
                    Array.from(htmlCell.childNodes).map((childNode) =>
                      htmlNodeToDocx(childNode)
                    )
                  )
                )
                  .filter(isNotNull)
                  .flat(),
                rowSpan: htmlCell.rowSpan,
                columnSpan: htmlCell.colSpan,
                margins: {
                  top: convertMillimetersToTwip(1),
                  bottom: convertMillimetersToTwip(1),
                  left: convertMillimetersToTwip(1),
                  right: convertMillimetersToTwip(1),
                },
              })
            })
          )
          return new TableRow({
            children: docxCells,
          })
        })
      )
      const fullWidth = 10000 // just a number that is guaranteed to be larger than the page width
      const cellCount = docxRows[0].CellCount
      return [
        new WordTable({
          columnWidths: new Array(cellCount).fill(fullWidth / cellCount),
          rows: docxRows,
          width: {
            size: 100,
            type: WidthType.AUTO,
          },
        }),
      ]
    }
    default:
      console.warn('Unhandled file child tag', node.nodeName)
  }

  // Special handling for <hr> tags. <hr> tags don't have any children, so we add a TextRun with a thematicBreak paragraph style in order to
  // display a horizontal rule. Unfortunately, the horizontal rule displays after an empty line, but additional paragraph spacing styling can be applied to
  // account for this.
  if (node.nodeName === 'HR') {
    return mergeTextRuns([new TextRun({ text: '' })], paragraphProperties)
  }

  const flattenedChildren = (
    await Promise.all(
      Array.from(node.childNodes).map(async (child) => {
        return await blockHtmlDocxFlatten({
          node: child,
          paragraphProperties,
          textProperties,
          listLevel,
        })
      })
    )
  )
    .filter(isNotNull)
    .flat()
  // at this point we know the current node is a Paragraph-type node, so we should merge below ParagraphChild-type nodes to apply parent paragraph styles
  return mergeTextRuns(flattenedChildren, paragraphProperties)
}

const mergeTextRuns = (
  docxElements: Array<FileChild | ParagraphChild>,
  paragraphProperties: IParagraphPropertiesOptions
): FileChild[] => {
  let currentTextRunChain: ParagraphChild[] = []
  const finalParagraphs: FileChild[] = []
  docxElements.forEach((element) => {
    if (isParagraphChild(element)) {
      currentTextRunChain.push(element)
    } else {
      if (currentTextRunChain.length > 0) {
        finalParagraphs.push(
          new Paragraph(
            _.merge({}, paragraphProperties, { children: currentTextRunChain })
          )
        )
        currentTextRunChain = []
      }
      finalParagraphs.push(element)
    }
  })
  if (currentTextRunChain.length > 0) {
    finalParagraphs.push(
      new Paragraph(
        _.merge({}, paragraphProperties, { children: currentTextRunChain })
      )
    )
  }
  return finalParagraphs
}

/**
 * This is the top-level function of a tree flattening operation. It converts an HTML DOM tree to a flat list of DocX paragraphs.
 * Children nodes inherit styles from their parents when applicable. The mechanism for this is the `paragraphProperties` and `textProperties`
 * which accumulate as they are passed down through `blockHtmlDocxFlatten`.
 * Adjacent in-line nodes (e.g. `<span>`, `<em>`, etc.) are merged into a single paragraph via `mergeTextRuns` as the tree is collapsed
 * toward the root whenever there are no longer any sibling in-line elements.
 */
const htmlNodeToDocx = async (
  node: Node,
  paragraphProperties?: IParagraphPropertiesOptions
): Promise<FileChild[] | null> => {
  const flattened = await blockHtmlDocxFlatten({
    node,
    paragraphProperties: paragraphProperties || {},
    textProperties: {},
    listLevel: 0,
  })
  if (_.isNil(flattened)) {
    return null
  }
  return mergeTextRuns(flattened, {})
}

// ************
// "Source" handling Functions below for Document export
// ************

const BACKGROUND_BLUE_HEX_COLOR = 'EFF6FF'
const TEXT_BLUE_HEX_COLOR = '1D4ED8'

/**
 * Treats the source.text property as HTML, and parses HTML string to DocX text runs, applying styles for HTML tags (e.g., mark, bold, italics).
 * @param sourceText - HTML string to convert.
 * @returns Promise with array of ParagraphChild instances.
 */
async function processSourceTextAsHTML(
  sourceText: string
): Promise<ParagraphChild[]> {
  const parser = new DOMParser()
  const docBody = parser.parseFromString(
    `<body>${sourceText}</body>`,
    'text/html'
  ).body
  const childNodes = Array.from(docBody.childNodes)

  const textRuns: ParagraphChild[] = []

  for (const node of childNodes) {
    if (node.nodeType === Node.TEXT_NODE && node.textContent) {
      // Direct text content
      textRuns.push(new TextRun(node.textContent))
    } else if (node.nodeType === Node.ELEMENT_NODE) {
      // Handle inline elements like <b>, <i>, <mark>, etc.
      const elem = node as HTMLElement
      switch (elem.tagName) {
        case 'B':
        case 'STRONG':
          textRuns.push(new TextRun({ text: elem.innerText, bold: true }))
          break
        case 'I':
        case 'EM':
          textRuns.push(new TextRun({ text: elem.innerText, italics: true }))
          break
        case 'MARK':
          textRuns.push(
            new TextRun({
              text: elem.innerText,
              //highlight: 'lightGray',
              color: TEXT_BLUE_HEX_COLOR,
              shading: {
                type: ShadingType.PERCENT_95,
                color: BACKGROUND_BLUE_HEX_COLOR,
                //fill: 'FF0000',
              },
            })
          )
          break
        // Add more cases as needed
        default:
          // For elements without special handling, use their innerText
          textRuns.push(new TextRun(elem.innerText))
          break
      }
    }
    // Other specific HTML elements like <img>, <a>, etc., can be handled here as needed
  }
  return textRuns
}

/**
 * Truncates  text to 50 words, appending an ellipsis (…) if the text exceeds this limit. Ensures the truncated text does not end with partial punctuation
 * @param {Source} s - Object containing the text to be truncated, with a 'text' property.
 * @returns {string} Truncated text or the original text if it's 50 words or less.
 */
const truncateText = (s: Source) => {
  let truncatedText = ''
  if (!_.isNil(s.text)) {
    truncatedText = s.text.trimEnd()
    const words = truncatedText.split(' ')
    truncatedText =
      words.length > 50
        ? `${words
            .slice(0, 50)
            .join(' ')
            .replace(/[.…,]+$/, '')}…`
        : truncatedText
  }
  return truncatedText
}

/**
 * Removes <mark> and </mark> HTML tags from a text string (either HTML or markdown) and returns the text string with the tags removed.
 * @param text The text string to remove the <mark> and </mark> tags from.
 * @returns The text string with the <mark> and </mark> tags removed.
 */
export function removeMarkTag(text: string): string {
  if (_.isNil(text) || _.isEmpty(text) || !_.isString(text)) {
    return text
  }
  // This regular expression matches <mark> and </mark> tags
  const matchMarkTags = /<\/?mark>/g
  return text.replace(matchMarkTags, '')
}

/**
 * Big combo function.  Converts one Source object to a DocX Paragraph with hyperlinks or styled text based on source properties, with appropriate fallbacks.
 * @param s - Source object.
 * @returns Promise resolving to a Paragraph object.
 */
const convertOneSourceToParagraph = async (s: Source, reference?: string) => {
  const truncatedText = truncateText(s)
  let children = null
  if (!_.isEmpty(s.documentUrl)) {
    // Case 1: Source has a document URL
    let documentUrl = s.documentUrl
    let citation = ''
    if (s.documentUrl.startsWith(RESEARCH_ARTIFACT_PATH)) {
      documentUrl = `${window.location.origin}${s.documentUrl}`
      if (s.metadata?.length === 3) {
        // Metadata is expected to be [Court Name, Date, Reporter]
        const citationData = await fetchCourtCitation(s.metadata[0])
        const { courtCitation } = citationData
        const year = getCaseDateYear(s.metadata[1])
        const reporter = s.metadata[2]
        if (!isNil(courtCitation) && year && reporter) {
          citation = `${reporter} (${courtCitation}${
            courtCitation ? ' ' : ''
          }${year})`
        }
      }
    }
    children = [
      new ExternalHyperlink({
        children: [
          new TextRun({
            text: `${s.documentName}`,
            style: 'Hyperlink',
          }),
        ],
        link: documentUrl,
      }),
      ...(citation ? [new TextRun({ text: `, ${citation}` })] : []),
    ]
  } else {
    try {
      // Case 2: Source has no document URL, Process the source text as HTML
      const sourceTextRuns = await processSourceTextAsHTML(truncatedText)

      // Assert that children are all instances of TextRun and the length is no greater than 4
      if (
        !sourceTextRuns.every((child) => child instanceof TextRun) ||
        sourceTextRuns.length > 3
      ) {
        throw new Error(
          'Children must be an array of TextRun instances with length no greater than 4'
        )
      }
      children = [
        ...sourceTextRuns,
        new TextRun({
          text: ` (${s.documentName}, Page ${(s.page + 1).toString()})`,
        }),
      ]
    } catch (e) {
      // Case 3: Source has no document URL, and the Source text is badly behaved HTML.  Just use the raw string, after stripping out <mark> elements.
      children = [
        new TextRun({
          text: `${removeMarkTag(truncatedText)}  (${s.documentName}, Page ${(
            s.page + 1
          ).toString()})`,
        }),
      ]
    }
  }
  return new Paragraph({
    children: children,
    numbering: {
      reference: reference || 'references-numbering-0',
      level: 0,
    },
  })
}

/**
 * Converts sources array to DocX Paragraphs with a 'References' heading.
 * @param sources - Array of source objects or null.
 * @param options - (Optional) Properties to override 'References' heading
 * @returns Promise with array of FileChild objects or null.
 */
const sourcesToDocxParagraphs = async (
  sources: Source[] | null = null,
  options?: IParagraphPropertiesOptions,
  reference?: string
): Promise<FileChild[] | null> => {
  if (_.isNil(sources) || sources.length === 0) {
    return null
  }
  const sourceParagraphs: Paragraph[] = await Promise.all(
    sources.map((s) => convertOneSourceToParagraph(s, reference))
  )
  return [
    new Paragraph('\n'),
    new Paragraph({
      text: 'References',
      heading: HeadingLevel.HEADING_2,
      spacing: {
        before: convertMillimetersToTwip(2),
        after: convertMillimetersToTwip(1),
      },
      ...options,
    }),
    ...sourceParagraphs,
  ]
}

// ************
// end of "source" handling functions
// ************

/**
 * Creates a WordDocument from FileChild objects, setting styles and sections.
 * @param sections - Array of FileChild objects.
 * @param author - [optional] Author name
 * @returns WordDocument object.
 */
const createWordDocument = (
  sections: FileChild[],
  author: string | null = null,
  numberOfSourcesSections: number = 1,
  defaultStyleOverrides: Record<string, IBaseParagraphStyleOptions> = {}
  // eslint-disable-next-line max-params
) => {
  const referencesNumbering = Array.from(
    { length: numberOfSourcesSections },
    (_, i) => `references-numbering-${i}`
  )
  return new WordDocument({
    creator: author || undefined, // will be set as Un-named in the document metadata if undefined
    lastModifiedBy: author || undefined,
    styles: {
      default: _.merge(
        {},
        {
          document: {
            run: {
              size: 10 * 2,
              color: '000000',
            },
          },
          heading1: {
            run: {
              size: 16 * 2,
              color: '374151',
            },
            paragraph: {
              spacing: {
                before: convertMillimetersToTwip(1),
                after: convertMillimetersToTwip(1),
              },
            },
          },
          heading2: {
            run: {
              size: 13 * 2,
              color: '374151',
            },
            paragraph: {
              spacing: {
                before: convertMillimetersToTwip(1),
                after: convertMillimetersToTwip(1),
              },
            },
          },
          heading3: {
            run: {
              size: 12 * 2,
              color: '374151',
            },
            paragraph: {
              spacing: {
                before: convertMillimetersToTwip(1),
                after: convertMillimetersToTwip(1),
              },
            },
          },
          header4: {
            spacing: {
              before: convertMillimetersToTwip(1),
              after: convertMillimetersToTwip(0.75),
            },
          },
          header5: {
            spacing: {
              before: convertMillimetersToTwip(1),
              after: convertMillimetersToTwip(0.75),
            },
          },
          header6: {
            spacing: {
              before: convertMillimetersToTwip(1),
              after: convertMillimetersToTwip(0.75),
            },
          },
        },
        defaultStyleOverrides
      ),
    },
    numbering: {
      config: [
        {
          reference: 'default-numbering',
          levels: [
            {
              level: 0,
              format: LevelFormat.DECIMAL,
              text: '%1.',
              alignment: AlignmentType.START,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(5),
                    hanging: convertMillimetersToTwip(5),
                  },
                },
              },
            },
            {
              level: 1,
              format: LevelFormat.LOWER_LETTER,
              text: '%2.',
              alignment: AlignmentType.START,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(10),
                    hanging: convertMillimetersToTwip(5),
                  },
                },
              },
            },
            {
              level: 2,
              format: LevelFormat.LOWER_ROMAN,
              text: '%3.',
              alignment: AlignmentType.START,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(15),
                    hanging: convertMillimetersToTwip(5),
                  },
                },
              },
            },
          ],
        },
        {
          reference: 'default-bullets',
          levels: [
            {
              level: 0,
              format: LevelFormat.BULLET,
              text: '\u2022',
              alignment: AlignmentType.LEFT,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(5),
                    hanging: convertMillimetersToTwip(5),
                  },
                },
              },
            },
            {
              level: 1,
              format: LevelFormat.BULLET,
              text: '\u25E6',
              alignment: AlignmentType.LEFT,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(10),
                    hanging: convertMillimetersToTwip(5),
                  },
                },
              },
            },
            {
              level: 2,
              format: LevelFormat.BULLET,
              text: '\u25AA',
              alignment: AlignmentType.LEFT,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(15),
                    hanging: convertMillimetersToTwip(5),
                  },
                },
              },
            },
          ],
        },
        ...referencesNumbering.map((reference) => ({
          reference,
          levels: [
            {
              level: 0,
              format: LevelFormat.DECIMAL,
              text: '%1.',
              alignment: AlignmentType.START,
              style: {
                paragraph: {
                  indent: {
                    left: convertMillimetersToTwip(5),
                    hanging: convertMillimetersToTwip(5),
                  },
                  spacing: {
                    after: convertMillimetersToTwip(1),
                    before: convertMillimetersToTwip(1),
                  },
                },
              },
            },
          ],
        })),
      ],
    },
    sections: [
      {
        properties: { type: SectionType.CONTINUOUS },
        children: sections,
      },
    ],
  })
}

export enum WordSectionType {
  MARKDOWN = 'markdown',
  HTML = 'html',
  SOURCES = 'sources',
}

export type WordSection = {
  content: string | Source[]
  type: WordSectionType
  options?: IParagraphPropertiesOptions
}

interface SectionsToDocxParams {
  title: string
  sections: WordSection[]
  addTitleToSections?: boolean
  author?: string | null
  defaultStyleOverrides?: Record<string, IBaseParagraphStyleOptions>
}

/**
 * Converts content sections into a WordDocument, mixing HTML and sources.
 * @param title - Document title.
 * @param sections - Array of section objects with content and type.
 * @param addTitleToSections - Whether to add a title to the document.
 * @param author - [optional] Author name, defaults to Harvey
 * @returns Promise with WordDocument.
 */
export const sectionsToDocx = async ({
  title,
  sections,
  addTitleToSections = true,
  author = 'Harvey',
  defaultStyleOverrides,
}: SectionsToDocxParams): Promise<WordDocument> => {
  const formattedSections: FileChild[] = []

  const numberOfSourcesSections = sections.filter(
    (section) => section.type === 'sources'
  ).length
  let i = 0
  for (const section of sections) {
    const { type, options } = section
    if (type == 'html') {
      const content = section.content as string
      const cleanedHtml = content.replace(/<\/p>\s*<p>/g, '</p><br/><p>')
      const htmlAnswer = new DOMParser().parseFromString(
        cleanedHtml,
        'text/html'
      )
      formattedSections.push(
        ...((await htmlNodeToDocx(htmlAnswer.body, options)) ?? [])
      )
    }
    if (type == 'sources') {
      const sources = section.content as Source[]
      formattedSections.push(
        ...((await sourcesToDocxParagraphs(
          sources,
          options,
          numberOfSourcesSections > 1 ? `references-numbering-${i}` : undefined
        )) ?? [])
      )
      i++
    }
  }

  return createWordDocument(
    [
      ...(addTitleToSections
        ? [
            new Paragraph({
              text: `Harvey – ${title}`,
              heading: HeadingLevel.HEADING_1,
              spacing: {
                before: convertMillimetersToTwip(1),
                after: convertMillimetersToTwip(3),
              },
            }),
          ]
        : []),
      ...formattedSections,
    ],
    author,
    numberOfSourcesSections,
    defaultStyleOverrides
  )
}

type HtmlToDocxParams = {
  html: string
  injectTitle: string | null
  sources: Source[] | null
  author?: string | null
}

/**
 * Converts HTML and sources to a WordDocument, including an optional title.
 * @param html - HTML content.
 * @param injectTitle - Optional document title.
 * @param sources - Optional sources array.
 * @param author - Optional author name, defaults to Harvey
 * @returns Promise with WordDocument.
 */
export const htmlToDocx = async ({
  html,
  injectTitle = null,
  sources = null,
  author = 'Harvey',
}: HtmlToDocxParams): Promise<WordDocument> => {
  // insert <br> between adjacent paragraphs
  html = html.replace(/<\/p>\s*<p>/g, '</p><br/><p>')
  const htmlAnswer = new DOMParser().parseFromString(html, 'text/html')
  const answer: FileChild[] = (await htmlNodeToDocx(htmlAnswer.body)) ?? []
  const references: FileChild[] = (await sourcesToDocxParagraphs(sources)) ?? []
  return createWordDocument(
    [
      ...(injectTitle !== null
        ? [
            new Paragraph({
              text: `Harvey – ${injectTitle}`,
              heading: HeadingLevel.HEADING_1,
              spacing: {
                before: convertMillimetersToTwip(1),
                after: convertMillimetersToTwip(3),
              },
            }),
          ]
        : []),
      ...answer,
      ...references,
    ],
    author
  )
}

export const getDocxContentType = (content: string): WordSectionType => {
  if (content.startsWith('<')) {
    return WordSectionType.HTML
  }
  return WordSectionType.MARKDOWN
}
