import { parse } from 'date-fns';

function isValidDate(date: Date): boolean {
  return date instanceof Date && !isNaN(date.getTime());
}

const dateFormats = [
    'yyyy-MM-dd',    // 2023-10-25
    'MM/dd/yyyy',    // 10/25/2023
    'dd/MM/yyyy',    // 25/10/2023
    'MMM dd, yyyy',  // Oct 25, 2023
    'dd MMM yyyy',   // 25 Oct 2023
    'yyyy/MM/dd',    // 2023/10/25
    'dd.MM.yyyy',    // 25.10.2023
    'MM.dd.yyyy',    // 10.25.2023
    'yyyyMMdd',      // 20231025
    // Add more formats as needed
  ];

  interface DateColumnInfo {
    columnIndex: number;
    successRate: number;
    dateFormat: string;
  }
  interface ValueFrequencyColumnInfo {
    columnIndex: number;
    frequenceny: number;
  }

  export function findAllDateColumns(
    data: any[][],
    threshold: number = 0.8
  ): DateColumnInfo[] {
    const dateColumns: DateColumnInfo[] = [];
    if (data.length === 0) return dateColumns;
  
    const numberOfColumns = data[0].length;
  
    for (let c = 0; c < numberOfColumns; c++) {
      let totalRows = 0;
      let dateParseSuccessCount = 0;
      const dateFormatsCount: { [key: string]: number } = {};
  
      for (let r = 0; r < data.length; r++) {
        const value = data[r][c];
        totalRows++;
  
        if (typeof value !== 'string' || !value.trim()) {
          continue;
        }
  
        for (const format of dateFormats) {
          const parsedDate = parse(value, format, new Date());
          if (isValidDate(parsedDate)) {
            dateParseSuccessCount++;
            dateFormatsCount[format] = (dateFormatsCount[format] || 0) + 1;
            break; // Stop after the first successful parse
          }
        }
      }
  
      const parseSuccessRate = dateParseSuccessCount / totalRows;
  
      if (parseSuccessRate >= threshold) {
        const mostCommonFormat = Object.keys(dateFormatsCount).reduce((a, b) =>
          dateFormatsCount[a] > dateFormatsCount[b] ? a : b
        );
  
        dateColumns.push({
          columnIndex: c,
          successRate: parseSuccessRate,
          dateFormat: mostCommonFormat,
        });
      }
    }
  
    return dateColumns;
  }
  // data is sorted by date column
  // check the first rows that have the same date
  // columns that have unique values in those rows are identifier columns
  export default function findIdentifierColumns(
    data: any[][],
    dateColumns: DateColumnInfo[]
  ): ValueFrequencyColumnInfo[] {
    // initially the identifierColumns with array of length number of columns
    if(data.length === 0 || dateColumns.length === 0) return [];

    const firstDate = data[0][dateColumns[0].columnIndex];
    // iterate over the data until the date changes 
    let i = 0;
    const valueMap:  Set<string>[]= Array.from({length: data[0].length}, () => new Set());
    while(i < data.length) {
      for(let j = 0; j < data[i].length; j++) {
        const value = data[i][j];

          valueMap[j].add(value);
        
      }
      i++;
    }
    console.log(valueMap);
    const columnFrequency = valueMap.map((valueSet, index) => ({columnIndex: index, frequenceny: valueSet.size}));  
    return columnFrequency;
  }
  