import { ModeledData } from '../../domain/ModeledData';
import { Matrix } from 'ml-matrix';
import { kmeans } from 'ml-kmeans';
import { hierarchicalClustering, findOptimalClusters } from './algorithms/hierarchicalClustering';
import { GMM } from './algorithms/gmm';
import { FEATURE_NAMES, CLUSTERING_CONFIG } from './constants';
import { ClusteringMethod, ClusteringOptions } from './types';
import { normalize, standardize } from './utils/mathUtils';
import { 
  extractDateFeatures, 
  extractProfileFeatures, 
  extractTemperatureFeatures 
} from './featureExtraction';

/**
 * Process feature extraction from raw data and normalize features
 * @param rawData Array of ModeledData objects
 * @param selectedFeatures Array of feature names to use
 * @returns Array of normalized feature vectors
 */
function processFeatures(
  rawData: ModeledData[],
  selectedFeatures: string[] = Object.keys(FEATURE_NAMES)
): number[][] {
  // Extract all features
  const dateFeatures = extractDateFeatures(rawData);
  const profileFeatures = extractProfileFeatures(rawData);
  const temperatureFeatures = extractTemperatureFeatures(rawData);
  const readingIndex = normalize(rawData.map((_, i) => i));

  // Prepare feature dictionary for easy access
  const allFeatures = {
    ...dateFeatures,
    ...profileFeatures,
    ...temperatureFeatures,
    READING_INDEX: readingIndex
  };

  // Normalize specific features
  const normalizedMaxIndices = normalize(profileFeatures.maxIndices);
  const normalizedMinIndices = normalize(profileFeatures.minIndices);
  const normalizedMaxIncline = normalize(profileFeatures.maxIncline);
  const normalizedMaxDecline = normalize(profileFeatures.maxDecline);
  const normalizedMaxInclineIndices = normalize(profileFeatures.maxInclineIndices);
  const normalizedMaxDeclineIndices = normalize(profileFeatures.maxDeclineIndices);
  const normalizedRateValues = normalize(profileFeatures.rates);
  const normalizedModeChanges = normalize(profileFeatures.modeChanges);
  const normalizedIsoDow = normalize(dateFeatures.isoDow);
  const normalizedSymmetry = normalize(profileFeatures.symmetry);
  const normalizedSeason = normalize(dateFeatures.season);
  const standardizedOats = standardize(temperatureFeatures.oat);
  const standardizedRates = standardize(profileFeatures.rates);

  // Create a feature map for easy selection
  const featureMap: Record<string, number[] | number[][]> = {
    MAX_INCLINE: normalizedMaxIncline,
    MAX_DECLINE: normalizedMaxDecline,
    MAX_INDEX: normalizedMaxIndices,
    MIN_INDEX: normalizedMinIndices,
    MAX_INCLINE_INDEX: normalizedMaxInclineIndices,
    MAX_DECLINE_INDEX: normalizedMaxDeclineIndices,
    SHARPNESS: profileFeatures.sharpness,
    NORMALIZED_RATE: normalizedRateValues,
    MID_RANGE: profileFeatures.midRange,
    START_STOP_DIFF: profileFeatures.startStopDiff,
    CLUSTER_ERROR: profileFeatures.clusterError,
    POWER_HIGH_CLUSTER: profileFeatures.powerHighCluster,
    RATES: standardizedRates,
    READING_INDEX: readingIndex,
    MODE_CHANGES: normalizedModeChanges,
    ON_OFF_DIFF: profileFeatures.onOffDiff,
    MEAN_RATES: profileFeatures.meanRates,
    STD_RATES: profileFeatures.meanRates,
    MIN_MAX_DIFF: profileFeatures.minMaxDiff,
    PEAK_TO_MEAN: profileFeatures.peakToMeanRatio,
    ISO_DOW: normalizedIsoDow,
    CALENDAR_WEEK: dateFeatures.calendarWeek,
    OAT: standardizedOats,
    SYMMETRY: normalizedSymmetry,
    SEASON: normalizedSeason,
    YEAR: dateFeatures.year,
    MONTH: dateFeatures.month,
    IS_WORKING_DAY: dateFeatures.isWorkingDay,
    IS_SATURDAY: dateFeatures.isSaturday,
    IS_SUNDAY: dateFeatures.isSunday,
    DAYLIGHT_PERCENTAGE: profileFeatures.daylightPercentage
  };

  // Extract and normalize features based on selection
  return rawData.map((_, i) => {
    const selectedFeatureValues: number[] = [];
    
    selectedFeatures.forEach(feature => {
      if (featureMap[feature]) {
        // Add regular feature
        if (Array.isArray(featureMap[feature][i])) {
          // If it's an array, spread it
          selectedFeatureValues.push(...(featureMap[feature][i] as number[]));
        } else {
          // Otherwise add the single value
          selectedFeatureValues.push(featureMap[feature][i] as number);
        }
      }
    });
    
    return selectedFeatureValues;
  });
}

/**
 * Validate features data before clustering
 * @param features Array of feature vectors
 * @returns true if features are valid, false otherwise
 */
function validateFeatures(features: number[][]): boolean {
  // Check if we have features
  if (features.length === 0) {
    console.error("No features to cluster");
    return false;
  }
  
  // Check if any feature arrays are empty
  const emptyFeatures = features.some(f => f.length === 0);
  if (emptyFeatures) {
    console.error("Some feature arrays are empty");
    return false;
  }
  
  // Check for NaN or Infinity values
  const hasInvalidValues = features.some(featureArray => 
    featureArray.some(value => isNaN(value) || !isFinite(value))
  );
  
  if (hasInvalidValues) {
    console.warn("Features contain NaN or Infinity values - replacing with 0");
    // Replace invalid values with 0
    features.forEach(featureArray => {
      for (let i = 0; i < featureArray.length; i++) {
        if (isNaN(featureArray[i]) || !isFinite(featureArray[i])) {
          featureArray[i] = 0;
        }
      }
    });
  }
  
  return true;
}

/**
 * Reorder cluster assignments based on cluster frequency
 * @param clusters Array of cluster assignments
 * @returns Reordered cluster assignments
 */
function applyFrequencyOrdering(clusters: number[]): number[] {
  // Count frequency of each cluster
  const clusterCounts: { [key: number]: number } = {};
  clusters.forEach(cluster => {
    clusterCounts[cluster] = (clusterCounts[cluster] || 0) + 1;
  });

  // Create a mapping from original cluster ID to frequency-ordered ID
  // Sort clusters by frequency (descending) and create a mapping
  const clusterMapping: { [key: number]: number } = {};
  Object.entries(clusterCounts)
    .sort((a, b) => b[1] - a[1]) // Sort by count in descending order
    .forEach((entry, index) => {
      const originalClusterId = parseInt(entry[0]);
      clusterMapping[originalClusterId] = index + 1; // Map to 1-based index
    });

  // Apply the mapping
  return clusters.map(cluster => clusterMapping[cluster]);
}

/**
 * Performs clustering on the provided data
 * @param rawData Array of ModeledData objects to cluster
 * @param options Clustering options
 * @returns Array of ModeledData with cluster assignments as 'mode' property
 */
export function clusterData(
  rawData: ModeledData[] | undefined, 
  options: ClusteringOptions = { 
    numClusters: -1,
    selectedFeatures: Object.keys(FEATURE_NAMES),
    method: ClusteringMethod.HIERARCHICAL
  }
): ModeledData[] | undefined {
  // Handle undefined or empty data
  if (!rawData || rawData.length === 0) return rawData;
  
  try {
    // Extract and process features
    const features = processFeatures(rawData, options.selectedFeatures);
    
    // Validate features
    if (!validateFeatures(features)) {
      return rawData;
    }
    
    // Determine number of clusters
    let effectiveNumClusters = options.numClusters;
    if (effectiveNumClusters <= 0) {
      console.log('Auto-detecting optimal number of clusters...');
      effectiveNumClusters = findOptimalClusters(
        features, 
        CLUSTERING_CONFIG.DEFAULT_MIN_CLUSTERS, 
        CLUSTERING_CONFIG.DEFAULT_MAX_CLUSTERS
      );
      console.log(`Auto-detected optimal number of clusters: ${effectiveNumClusters}`);
    }
    
    // Apply clustering algorithm
    let clusters: number[] = [];
    
    switch (options.method) {
      case ClusteringMethod.KMEANS:
        try {
          const result = kmeans(features, effectiveNumClusters, {
            maxIterations: CLUSTERING_CONFIG.MAX_ITERATIONS,
            initialization: 'kmeans++'
          });
          clusters = result.clusters;
        } catch (e) {
          console.error('Error in k-means clustering:', e);
          // Fallback to hierarchical clustering
          clusters = hierarchicalClustering(features, effectiveNumClusters);
        }
        break;
        
      case ClusteringMethod.GMM:
        try {
          const gmm = new GMM(effectiveNumClusters, features[0].length);
          const dataMatrix = new Matrix(features);
          clusters = gmm.fit(dataMatrix);
        } catch (e) {
          console.error('Error in GMM clustering:', e);
          // Fallback to hierarchical clustering
          clusters = hierarchicalClustering(features, effectiveNumClusters);
        }
        break;
        
      case ClusteringMethod.HIERARCHICAL:
      default:
        clusters = hierarchicalClustering(features, effectiveNumClusters);
        break;
    }
    
    // Apply frequency-based ordering
    const orderedClusters = applyFrequencyOrdering(clusters);
    
    // Assign clusters to original data
    const clusteredData = rawData.map((profile, index) => ({
      ...profile,
      mode: orderedClusters[index]
    }));
    
    return clusteredData;
  } catch (error) {
    console.error('Error clustering data:', error);
    return rawData;
  }
}

// Export all clustering utilities and constants
export * from './constants';
export * from './types';
export * from './featureExtraction';
export * from './algorithms/hierarchicalClustering';
export * from './utils/mathUtils';
