/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */

import { times, size, isNil, identity } from "lodash";
import { isBrowser } from "browser-or-node";

export const producesOverhang = enzyme =>
  enzyme.forwardSnipPosition < enzyme.reverseSnipPosition;

/**
 * Get the length of overhang produced by a given restriction enzyme.
 * @param {Object} enzyme
 * @returns {int}
 */
export const getOverhangSize = enzyme =>
  Math.abs(enzyme.forwardSnipPosition - enzyme.reverseSnipPosition);

/**
 * A given regex may contain quantifier of the form `g{3}`, which
 * corresponds to the character `g` repeated three times. This function
 * will replace those instances with the quantified character repeated.
 * For example, `atc{2}g{3}t[ag]{2}` would become `atccgggt[ag][ag]`.
 * @param {string} regex
 * @returns {string}
 */
export const removeQuantifiers = function(regex) {
  return regex.replace(/(\[.+\]|.)\{(\d+)\}/g, (match, char, num) =>
    times(Number(num), () => char).join("")
  );
};

/**
 * A given regex may contain character sets of the form `[agtc]`, which
 * matches any of the the characters in the list. This function
 * will collapse those options into a single character based on the given collapse fn.
 * For example, `at[gc]` could become `atg`.
 * @param {string} regex
 * @returns {string}
 */
export const collapseCharSets = function(regex, collapseFn = identity) {
  return regex.replace(/(\[.*?\])/g, match => collapseFn(match));
};

/**
 * Given a restriction enzyme object, get the length of its recognition
 * site in bps. For example, a restriction enzyme with recognition site
 * `gatcta` will have a recognition site length of 6.
 * @param {Object} enzyme
 * @returns {int}
 */
export const getRecognitionSiteLength = function(enzyme) {
  let { recognitionRegex, sequence } = enzyme;
  sequence = sequence || removeQuantifiers(recognitionRegex);
  sequence = collapseCharSets(sequence, () => "*");
  return sequence.length;
};

/**
 * Type IIs enzymes will have a recognition site distant from the cut site.
 * This function gives the length of the separation in bps.
 * @param {Object} enzyme Should be a type IIs enzyme.
 * @returns {int}
 */
export const getGapLength = function(enzyme) {
  return (
    Math.min(enzyme.forwardSnipPosition, enzyme.reverseSnipPosition) -
    getRecognitionSiteLength(enzyme)
  );
};

export const getGapSequence = (enzyme, bp = "n") =>
  times(getGapLength(enzyme), () => bp).join("");

export const getParamsForRestrictionEnzyme = restrictionEnzyme => {
  const {
    sequence: _sequence,
    recognitionRegex,
    forwardSnipPosition,
    reverseSnipPosition
  } = restrictionEnzyme || {};

  if (
    !(!!size(_sequence) || !!size(recognitionRegex)) ||
    isNil(forwardSnipPosition) ||
    isNil(reverseSnipPosition)
  ) {
    throw new Error(
      `Restriction Enzyme is missing fields: ${JSON.stringify(
        restrictionEnzyme
      )}`
    );
  }

  const sequence = _sequence || removeQuantifiers(recognitionRegex);

  /**
   * J5 doesnt support degerenate base pairs.
   *
   * But we could eventually add support of them.
   */
  // const hasDegenarateBases = /^[bdhkmnrsvwy]*$/i.test(sequence);
  // if (hasDegenarateBases) {
  //   const j5CollapseFn = match => {
  //     if (/^[agtcbdhkmnrsvwy]*$/i.test(sequence).test(match)) return "n";
  //     throw new Error(`${match} not supported.`);
  //   };
  //   sequence = collapseCharSets(sequence, j5CollapseFn);
  // }

  if (!/^[gatc]*$/i.test(sequence))
    throw new Error(
      `Recognition sequence '${sequence}' must be only base pairs.`
    );

  const spacerLength = getGapLength(restrictionEnzyme);

  if (spacerLength <= 0) {
    const message = `Restriction Enzyme ${restrictionEnzyme.name} is wrongfully configured as TypeIIs`;
    if (isBrowser) window.toastr.warning(message);
    else throw new Error(message);
  }

  const ggateRecognitionSeq = sequence.toUpperCase();
  const ggateOverhangBps = Math.abs(reverseSnipPosition - forwardSnipPosition);
  /**
   * NOTE: From the j5 manual is not clear how the upstream sequence
   * influences in the assembly, "CACACCA" will be used.
   * NOTE: From the j5 manual it is clear that the downstream sequence should
   * have a length of 'forwardSnipPosition - sequence.length', however the actual
   * base pairs to choose seems like it could be anything, pure "A"s will be used.
   *
   * ref: https://j5.jbei.org/j5manual/pages/33.html
   */
  const upstreamTermini = "CACACCA";
  const downstreamTermini = "A".repeat(spacerLength);
  const ggateTerminiExtraSeq =
    upstreamTermini + sequence.toUpperCase() + downstreamTermini;
  // NOTE: same as 'ggateTerminiExtraSeq' until needed otherwise
  const ggateTerminiExtraSeqAlt = ggateTerminiExtraSeq;

  // Update customJ5ParameterToUse with the freshly computed GG params
  return {
    GOLDEN_GATE_RECOGNITION_SEQ: ggateRecognitionSeq,
    GOLDEN_GATE_OVERHANG_BPS: ggateOverhangBps,
    GOLDEN_GATE_TERMINI_EXTRA_SEQ: ggateTerminiExtraSeq,
    GOLDEN_GATE_TERMINI_EXTRA_SEQ_DNA_SYNTHESIS_ALT: ggateTerminiExtraSeqAlt
  };
};

export const bpsToBpMap = {
  a: "a",
  c: "c",
  g: "g",
  t: "t",
  u: "u",
  at: "w",
  cg: "s",
  ac: "m",
  gt: "k",
  ag: "r",
  ct: "y",
  cgt: "b",
  agt: "d",
  act: "h",
  acg: "v",
  acgt: "n"
};
export const bpToBpsMap = {
  a: "a",
  c: "c",
  g: "g",
  t: "t",
  u: "u",
  w: "at",
  s: "cg",
  m: "ac",
  k: "gt",
  r: "ag",
  y: "ct",
  b: "cgt",
  d: "agt",
  h: "act",
  v: "acg",
  n: "acgt"
};

export const bpToRegexStrMap = {
  a: "a",
  c: "c",
  g: "g",
  t: "t",
  u: "u",
  w: "[atw]",
  s: "[cgs]",
  m: "[acm]",
  k: "[gtk]",
  r: "[agr]",
  y: "[cty]",
  b: "[cgtb]",
  d: "[agtd]",
  h: "[acth]",
  v: "[acgv]",
  n: "[abcdghkmnrstvwy]"
};

const stringOfBpsToSingleBp = s => {
  const bps = { a: false, c: false, g: false, t: false };
  for (const c of s) {
    for (const bp of bpToBpsMap[c]) {
      bps[bp] = true;
    }
  }
  return bpsToBpMap[
    Object.keys(bps)
      .filter(bp => bps[bp])
      .sort()
      .join("")
  ];
};

export const regexStrToRegularSequence = s =>
  s
    .replace(/\[([acgtwsmkrybdhvn]+)\]/g, (match, val) => {
      return stringOfBpsToSingleBp(val);
    })
    .replace(/(.)\{(\d+)\}/g, (match, char, num) =>
      times(Number(num), () => char).join("")
    );

export const toJsRegexStr = regex => {
  regex = (regex || "").toLowerCase();
  let s = "";
  for (const c of regex) {
    s += bpToRegexStrMap[c] || c;
  }
  s = s.replace(/(.)\{(\d+)\}/g, (match, char, num) =>
    times(Number(num), () => char).join("")
  );
  return s;
};

const bpToComplementMap = {
  a: "t",
  c: "g",
  g: "c",
  t: "a",
  w: "w",
  s: "s",
  m: "k",
  k: "m",
  r: "y",
  y: "r",
  b: "v",
  d: "h",
  h: "d",
  v: "b",
  n: "n"
};

export const generalComplement = seq =>
  seq
    .toLowerCase()
    .split("")
    .map(c => bpToComplementMap[c])
    .join("");

export const generalReverseComplement = seq =>
  seq
    .toLowerCase()
    .split("")
    .reverse()
    .map(c => bpToComplementMap[c])
    .join("");

//Finds reverse of bps (the other side of the strand) including placeholder characters
const reverseBps = s => {
  let bps = { a: false, c: false, g: false, t: false };
  s = s.toLowerCase();
  let complement = "";
  for (const c of s) {
    for (const bp of bpToBpsMap[c]) {
      bps[bp] = true;
    }
    let tmp = "";
    for (const bp in bps) {
      if (bps[bp] === true) {
        switch (bp) {
          case "a":
            tmp += "t";
            break;
          case "c":
            tmp += "g";
            break;
          case "g":
            tmp += "c";
            break;
          case "t":
            tmp += "a";
            break;
          default:
            break;
        }
      }
    }
    tmp = tmp
      .split("")
      .sort()
      .join("");
    complement += bpsToBpMap[tmp];
    bps = { a: false, c: false, g: false, t: false };
  }
  return complement;
};

//Reverses the complement then finds the regex of that sequence
export const reverseRegex = sequence => {
  const complement = reverseBps(sequence);
  const reverseComp = complement
    .split("")
    .reverse()
    .join("");

  let regex = "";
  let numOccur = 1;
  for (let i = 0; i < reverseComp.length; i++) {
    if (!["a", "c", "g", "t"].includes(reverseComp.charAt(i))) {
      regex += bpToRegexStrMap[reverseComp.charAt(i)];
    } else {
      if (
        i + 1 < reverseComp.length &&
        reverseComp.charAt(i) === reverseComp.charAt(i + 1)
      ) {
        numOccur++;
      } else {
        if (numOccur > 1) {
          regex += reverseComp.charAt(i) + "{" + numOccur + "}";
          numOccur = 1;
        } else {
          regex += reverseComp.charAt(i);
        }
      }
    }
  }
  return regex;
};

export const isRecognitionSiteDegenerate = enzyme => {
  const sequence =
    enzyme.site ||
    enzyme.sequence ||
    removeQuantifiers(enzyme.recognitionRegex);

  return /[bdhkmnrsvwy]/i.test(sequence);
};

export const sequenceHasRecognitionSite = (enzyme, sequence) => {
  const forwardMatch = new RegExp(enzyme.forwardRegex, "i").test(sequence);
  const reverseMatch = new RegExp(enzyme.reverseRegex, "i").test(sequence);
  return forwardMatch || reverseMatch;
};

export const validRestrictionEnzyme = (restrictionEnzyme, opts) => {
  const { REQUIRED_ERROR } = opts;
  const errors = {};

  if (
    !restrictionEnzyme.forwardSnipPosition &&
    restrictionEnzyme.forwardSnipPosition !== 0
  )
    errors.forwardSnipPosition = REQUIRED_ERROR;
  if (
    !restrictionEnzyme.reverseSnipPosition &&
    restrictionEnzyme.reverseSnipPosition !== 0
  )
    errors.reverseSnipPosition = REQUIRED_ERROR;

  if (
    !restrictionEnzyme.forwardSnipPosition &&
    restrictionEnzyme.forwardSnipPosition !== 0
  )
    errors.forwardSnipPosition = REQUIRED_ERROR;
  if (restrictionEnzyme.forwardSnipPosition > 100)
    errors.forwardSnipPosition = "Forward snip position is too large";
  if (restrictionEnzyme.forwardSnipPosition < 0)
    errors.forwardSnipPosition = "Forward snip position cannot be negative";

  if (
    !restrictionEnzyme.reverseSnipPosition &&
    restrictionEnzyme.reverseSnipPosition !== 0
  )
    errors.reverseSnipPosition = REQUIRED_ERROR;
  if (restrictionEnzyme.reverseSnipPosition > 100)
    errors.reverseSnipPosition = "Reverse snip position is too large";
  if (restrictionEnzyme.reverseSnipPosition < 0)
    errors.reverseSnipPosition = "Reverse snip position cannot be negative";

  // Pattern should have valid nucleotide letters (including degenerate)
  // and repetition quantifiers with up to one digit (I think that 2 digits is highly unlikely)
  if (
    !/^([acgtwsmkrybdhvn]|\{[0-9]\})*$/i.test(
      restrictionEnzyme.recognitionRegex
    )
  )
    errors.recognitionRegex = "Invalid value";

  return errors;
};
