import { Face } from '@tensorflow-models/face-landmarks-detection'
import {
  WEBCAM_WIDTH,
  WEBCAM_HEIGHT,
  BLINKING_THRESHOLDED,
  YAWNING_THRESHOLDED,
} from '../constant'

import attentionStore, { AttentionData } from './attentionStore'
import cv from "@techstark/opencv-js"

const calculateEAR = (
  upper0: any,
  upper1: any,
  lower0: any,
  lower1: any,
  left: any,
  right: any
) => {
  const A = Math.sqrt((upper0.x - lower0.x) ** 2 + (upper0.y - lower0.y) ** 2)
  const B = Math.sqrt((upper1.x - lower1.x) ** 2 + (upper1.y - lower1.y) ** 2)
  const C = Math.sqrt((left.x - right.x) ** 2 + (left.y - right.y) ** 2)

  return (A + B) / (2.0 * C)
}

const blinking = (
  REUpper0: any,
  REUpper1: any,
  RELower0: any,
  RELower1: any,
  RELeft: any,
  RERight: any,
  LEUpper0: any,
  LEUpper1: any,
  LELower0: any,
  LELower1: any,
  LELeft: any,
  LERight: any
) => {
  const REERA = calculateEAR(
    REUpper0,
    REUpper1,
    RELower0,
    RELower1,
    RELeft,
    RERight
  )

  const LEERA = calculateEAR(
    LEUpper0,
    LEUpper1,
    LELower0,
    LELower1,
    LELeft,
    LERight
  )

  if ((REERA + LEERA) / 2 < BLINKING_THRESHOLDED) {
    return 0
  } else {
    return 1
  }
}

const yawning = (
  mouthUpper0: any,
  mouthUpper1: any,
  mouthLower0: any,
  mouthLower1: any,
  mouthLeft: any,
  mouthRight: any
) => {
  const EAR = calculateEAR(
    mouthUpper0,
    mouthUpper1,
    mouthLower0,
    mouthLower1,
    mouthLeft,
    mouthRight
  )

  if (EAR > YAWNING_THRESHOLDED) {
    return 0
  }
  return 1
}

const estimate_pose = (ns: any, le: any, re: any) => {
  const size = { width: WEBCAM_WIDTH, height: WEBCAM_HEIGHT }
  const focalLength = size.width
  const center = [size.width / 2, size.height / 2]

  const cameraMatrix = cv.matFromArray(3, 3, cv.CV_64FC1, [
    focalLength,
    0,
    center[0],
    0,
    focalLength,
    center[1],
    0,
    0,
    1,
  ])

  const numRows = 4
  const modelPoints = cv.matFromArray(numRows, 3, cv.CV_64FC1, [
    0.0,
    0.0,
    0.0, // Nose tip
    0.0,
    0.0,
    0.0, // HACK! solvePnP doesn't work with 3 points, so copied the
    //   first point to make the input 4 points
    // 0.0, -330.0, -65.0,  // Chin
    -225.0,
    170.0,
    -135.0, // Left eye left corner
    225.0,
    170.0,
    -135.0, // Right eye right corne
    // -150.0, -150.0, -125.0,  // Left Mouth corner
    // 150.0, -150.0, -125.0,  // Right mouth corner
  ])

  const imagePoints = cv.Mat.zeros(numRows, 2, cv.CV_64FC1)
  const distCoeffs = cv.Mat.zeros(4, 1, cv.CV_64FC1) // Assuming no lens distortion
  const rvec = new cv.Mat({ width: 1, height: 3 }, cv.CV_64FC1)
  const tvec = new cv.Mat({ width: 1, height: 3 }, cv.CV_64FC1)

  // 2D image points. If you change the image, you need to change vector
  ;[
    ns.x,
    ns.y, // Nose tip
    ns.x,
    ns.y, // Nose tip (see HACK! above)
    // 399, 561, // Chin
    le.x,
    le.y, // Left eye left corner
    re.x,
    re.y, // Right eye right corner
    // 345, 465, // Left Mouth corner
    // 453, 469 // Right mouth corner
  ].map((v, i) => {
    imagePoints.data64F[i] = v
  })

  // Hack! initialize transition and rotation matrixes to improve estimation
  tvec.data64F[0] = -100
  tvec.data64F[1] = 100
  tvec.data64F[2] = 1000
  const distToLeftEyeX = Math.abs(le.x - ns.x)
  const distToRightEyeX = Math.abs(re.x - ns.x)
  if (distToLeftEyeX < distToRightEyeX) {
    // looking at left
    rvec.data64F[0] = -1.0
    rvec.data64F[1] = -0.75
    rvec.data64F[2] = -3.0
    // console.log("left");
  } else {
    // looking at right
    rvec.data64F[0] = 1.0
    rvec.data64F[1] = -0.75
    rvec.data64F[2] = -3.0
    // console.log("right");
  }

  const success = cv.solvePnP(
    modelPoints,
    imagePoints,
    cameraMatrix,
    distCoeffs,
    rvec,
    tvec,
    true
  )
  if (!success) {
    return 0
  }

  const degrees = rvec.data64F.map((d: any) => (d / Math.PI) * 180)
  const degree = degrees[0]
  return degree
}

export const attention = (predictions: Face[]) => {
  const data: AttentionData = {
    time: new Date().getTime(),
    blinking: 0,
    face: 0,
    pose: 0,
  }

  if (predictions.length > 0) {
    data.face = 1

    const prediction = predictions[0]
    const keypoints = prediction.keypoints
    const re = keypoints[130]
    const le = keypoints[359]
    const ns = keypoints[1]

    data.pose = estimate_pose(ns, le, re)

    data.yawning = yawning(
      keypoints[39],
      keypoints[269],
      keypoints[181],
      keypoints[405],
      keypoints[78],
      keypoints[308]
    )

    data.blinking = blinking(
      keypoints[160],
      keypoints[158],
      keypoints[144],
      keypoints[153],
      keypoints[33],
      keypoints[133],
      keypoints[385],
      keypoints[387],
      keypoints[380],
      keypoints[373],
      keypoints[362],
      keypoints[263]
    )
  }

  attentionStore.add(data)
}
