import levenshtein from 'fast-levenshtein'

import { validateTriviaListItems } from './validate-list'

type Row = {
  answer: string
  id: number
  question: string
}

type InputRow = Omit<Row, 'id'>

export function normalizeString(text: string): string {
  return text
    .replace(/[\p{P}\p{S}]+/gu, '') // Remove punctuation and symbols
    .replace(/\s+/g, ' ') // Reduce multiple spaces to single space
    .trim()
    .toLowerCase()
}

export function generateHashes(row: Row): { answerHash: string; questionHash: string } {
  return {
    answerHash: normalizeString(row.answer),
    questionHash: normalizeString(row.question)
  }
}

export function isSimilar(
  row1: Row,
  row2: Row,
  options = { thresholds: { answer: 0.75, question: 0.93 } }
): boolean {
  const { thresholds } = options

  const questionSimilarity =
    1 -
    levenshtein.get(normalizeString(row1.question), normalizeString(row2.question)) /
      Math.max(normalizeString(row1.question).length, normalizeString(row2.question).length)

  const answerSimilarity =
    1 -
    levenshtein.get(normalizeString(row1.answer), normalizeString(row2.answer)) /
      Math.max(normalizeString(row1.answer).length, normalizeString(row2.answer).length)

  return questionSimilarity >= thresholds.question || answerSimilarity >= thresholds.answer
}

export function processImportedRows(existingRows: Row[], uploadedRows: InputRow[]) {
  const maxExistingId = Math.max(0, ...existingRows.map(row => row.id ?? 0))

  // Assign unique IDs to the uploaded rows
  const uploadedRowsWithIds = uploadedRows.map((row, index) => ({
    ...row,
    id: maxExistingId + index + 1
  }))

  const validatedTriviaList = validateTriviaListItems(uploadedRowsWithIds, existingRows)

  const deduplicatedRows = validatedTriviaList.items.filter(
    item => !item.validation.isDuplicate && !item.validation.isBlank
  )

  const duplicates = validatedTriviaList.items.filter(item => item.validation.isDuplicate)

  const similar = validatedTriviaList.items.filter(item => item.validation.isSimilar)

  return { deduplicatedRows, duplicates, similar }
}

export type ProcessedRows = ReturnType<typeof processImportedRows>
