/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
import { groupBy, keyBy, some } from "lodash";
import {
  getAminoAcidStringFromSequenceString,
  getReverseComplementSequenceString
} from "@teselagen/sequence-utils";
import { getSequenceWithinRange } from "@teselagen/range-utils";
import shortid from "shortid";
import {
  computeSequenceHash,
  aaSequenceJSONtoGraphQLInput,
  sequenceJSONtoGraphQLInput
} from "./utils";
import { checkDuplicateSequences } from "./checkDuplicateSequences";
import { isoContext } from "@teselagen/utils";
import upsertUniqueAliases from "./upsertUniqueAliases";
import { getSequence } from "../utils/getSequence";
import { isBrowser } from "browser-or-node";

/**
 * Takes in sequence ids and extracts their cds features. Creates new cds sequences for those features
 * and then calculates the amino acid sequence and creates those as well.
 * @param {array of ids} sequenceIds - ids for sequences to process
 */
export default async function processSequences(
  sequenceIds = [],
  ctx = isoContext
) {
  let showInternalStopWarning;
  const { safeQuery, safeUpsert } = ctx;
  if (!sequenceIds.length) return;
  const sequenceFragments = await safeQuery(
    ["sequenceFragment", "id index fragment sequenceId"],
    {
      pageSize: 400,
      variables: {
        filter: {
          sequenceId: sequenceIds
        }
      }
    }
  );
  const keyedFragments = groupBy(sequenceFragments, "sequenceId");
  const sequenceMap = {};
  sequenceIds.forEach(sequenceId => {
    sequenceMap[sequenceId] = getSequence({
      sequenceFragments: keyedFragments[sequenceId] || []
    });
  });
  // get all of the sequence features of type CDS that are linked to those sequences
  const sequenceFeatures = await safeQuery(
    ["sequenceFeature", "id name type start end strand sequenceId"],
    {
      pageSize: 400,
      variables: {
        filter: { sequenceId: sequenceIds, type: "CDS" }
      }
    }
  );
  const taggedItems = `taggedItems {
      id
      tagId
      tagOptionId
    }`;

  const cdsHashes = [];
  const aminoAcidHashes = [];
  const aminoAcidHashToCdsHashes = {};
  const cdsHashToCds = {};
  const cdsHashToAminoAcidHash = {};
  const aminoAcidHashToAA = {};
  const aminoAcidHashToId = {};
  const cdsHashToSequenceIdsMap = {};
  const sequenceFeatureIdToHash = {};
  let aminoAcidSequencesToCreate = [];
  const aliasesToCreate = [];
  const cdsUpdates = [];
  let cdsToCreate = [];
  const sequenceCDSToCreate = [];
  // For each sequence feature, get the base pairs, hash these
  // fragments and collect them in an array
  sequenceFeatures.forEach(seqFeature => {
    let featureFragment = getSequenceWithinRange(
      { start: seqFeature.start, end: seqFeature.end },
      sequenceMap[seqFeature.sequenceId]
    );
    if (seqFeature.strand === -1) {
      featureFragment = getReverseComplementSequenceString(featureFragment);
    }
    const sequenceFeatureName = seqFeature.name;
    const aaSequenceString =
      getAminoAcidStringFromSequenceString(featureFragment);
    // tgreen: because users can add cds label to any feature sometimes we can see an invalid cds
    // which doesn't translate to an amino acid. Ignore them.
    if (!aaSequenceString || !aaSequenceString.replace(/\*/g, "")) {
      return;
    }
    const aminoAcidSequence = aaSequenceJSONtoGraphQLInput({
      name: sequenceFeatureName,
      sequence: aaSequenceString
    });
    const aminoAcidHash = aminoAcidSequence.hash;
    if (!aminoAcidHashes.includes(aminoAcidHash)) {
      aminoAcidSequencesToCreate.push(aminoAcidSequence);
      aminoAcidHashes.push(aminoAcidHash);
    }
    const cdsHash = computeSequenceHash(featureFragment, "CDS");
    const cdsInfo = sequenceJSONtoGraphQLInput({
      name: seqFeature.name,
      sequence: featureFragment,
      isCds: true,
      sequenceTypeCode: "LINEAR_DNA",
      circular: false
    });
    cdsInfo.proteinSequence = aaSequenceString;
    cdsInfo.inventoryItems = [];
    if (!cdsHashes.includes(cdsHash)) {
      cdsToCreate.push(cdsInfo);
      cdsHashes.push(cdsHash);
    }
    cdsHashToAminoAcidHash[cdsHash] = aminoAcidHash;
    if (!aminoAcidHashToCdsHashes[aminoAcidHash]) {
      aminoAcidHashToCdsHashes[aminoAcidHash] = [];
    }
    if (!aminoAcidHashToCdsHashes[aminoAcidHash].includes(cdsHash)) {
      aminoAcidHashToCdsHashes[aminoAcidHash].push(cdsHash);
    }
    if (!cdsHashToSequenceIdsMap[cdsHash]) {
      cdsHashToSequenceIdsMap[cdsHash] = [];
    }
    if (
      cdsHashToSequenceIdsMap[cdsHash] &&
      !cdsHashToSequenceIdsMap[cdsHash].includes(seqFeature.sequenceId)
    ) {
      cdsHashToSequenceIdsMap[cdsHash].push(seqFeature.sequenceId);
    }
    sequenceFeatureIdToHash[seqFeature.id] = cdsHash;
  });

  const existingAminoAcidSequences = await checkDuplicateSequences(
    aminoAcidHashes,
    {
      fragment: `id name hash aliases { id name } ${taggedItems}`,
      isProtein: true
    },
    ctx
  );
  const keyedExistingAA = keyBy(existingAminoAcidSequences, "hash");

  aminoAcidSequencesToCreate = aminoAcidSequencesToCreate.filter(aa => {
    const existingAminoAcidSequence = keyedExistingAA[aa.hash];
    if (existingAminoAcidSequence) {
      aminoAcidHashToAA[aa.hash] = existingAminoAcidSequence;
      aminoAcidHashToId[aa.hash] = existingAminoAcidSequence.id;

      return false;
    } else {
      aa.cid = shortid();
      aminoAcidHashToAA[aa.hash] = aa;
      aminoAcidHashToId[aa.hash] = `&${aa.cid}`;
      return true;
    }
  });

  const newSequenceFragment = `id name isCds hash sequenceFragments { id index fragment } aminoAcidSequenceId sequenceCodingSequences { id sequenceId } codingDnaSequenceSequenceCodingSequences { id sequenceId }`;
  // If the CDS sequence already exists, create a sequenceCDS join record that links
  // the recently upserted sequence with its new CDS (if that linkage does not already exist)
  const existingCds = await checkDuplicateSequences(
    cdsHashes,
    {
      fragment: newSequenceFragment
    },
    ctx
  );
  for (const cds of existingCds) {
    const aminoAcid = aminoAcidHashToAA[cdsHashToAminoAcidHash[cds.hash]];
    if (aminoAcid && aminoAcid.cid) {
      aminoAcid.name = cds.name;
    }
    const aminoAcidId = aminoAcidHashToId[cdsHashToAminoAcidHash[cds.hash]];
    if (!aminoAcidId) {
      console.error("cds amino acid id not found:", cds);
      const errMsg = "Corrupted data.";
      if (isBrowser) return window.toastr.error(errMsg);
      else throw new Error(errMsg);
    }
    if (cds.aminoAcidSequenceId && cds.aminoAcidSequenceId !== aminoAcidId) {
      const errMsg = `The cds ${cds.name} was linked to a corrupted amino acid sequence.`;
      if (isBrowser) return window.toastr.error(errMsg);
      else throw new Error(errMsg);
    }
    if (!cds.isCds) {
      cdsUpdates.push({
        id: cds.id,
        isCds: true
      });
    }
    if (!cds.aminoAcidSequenceId) {
      cdsUpdates.push({
        id: cds.id,
        aminoAcidSequenceId: aminoAcidId
      });
    }
    cdsHashToSequenceIdsMap[cds.hash].length &&
      cdsHashToSequenceIdsMap[cds.hash].forEach(sequenceId => {
        const alreadyLinked =
          !!cds.codingDnaSequenceSequenceCodingSequences.length &&
          cds.codingDnaSequenceSequenceCodingSequences.some(
            sequenceCds => sequenceCds.sequenceId === sequenceId
          );
        if (!alreadyLinked) {
          sequenceCDSToCreate.push({
            codingDnaSequenceId: cds.id,
            sequenceId
          });
        }
      });
  }

  const keyedExistingCds = keyBy(existingCds, "hash");
  cdsToCreate = cdsToCreate.filter(cds => {
    if (keyedExistingCds[cds.hash]) {
      cdsHashToCds[cds.hash] = keyedExistingCds[cds.hash];
    } else {
      cdsHashToCds[cds.hash] = cds;
    }
    return !keyedExistingCds[cds.hash];
  });
  for (const cds of cdsToCreate) {
    cds.cid = shortid();
    const aminoAcidId = aminoAcidHashToId[cdsHashToAminoAcidHash[cds.hash]];
    if (!aminoAcidId) {
      const errMsg = `No amino acid found for the cds ${cds.name}`;
      if (isBrowser) return window.toastr.error(errMsg);
      else throw new Error(errMsg);
    }
    cds.aminoAcidSequenceId = aminoAcidId;
    cdsHashToSequenceIdsMap[cds.hash].length &&
      cdsHashToSequenceIdsMap[cds.hash].forEach(sequenceId => {
        sequenceCDSToCreate.push({
          sequenceId,
          codingDnaSequenceId: `&${cds.cid}`
        });
      });
  }
  existingAminoAcidSequences.forEach(aa => {
    aminoAcidHashToCdsHashes[aa.hash].forEach(cdsHash => {
      const cds = cdsHashToCds[cdsHash];
      const cdsName = cds.name;
      if (cdsName) {
        if (aa.name !== cdsName) {
          const hasAlias = aa.aliases.some(alias => alias.name === cdsName);
          const cdsInventoryItemFields = {};
          if (cds.inventoryItems && cds.inventoryItems.length) {
            cdsInventoryItemFields.targetInventoryItemId =
              cds.InventoryItems[0].id;
          } else {
            if (!cds.id && !cds.cid) cds.cid = shortid();
            cdsInventoryItemFields.targetInventoryItem = {
              inventoryItemTypeCode: "DNA_SEQUENCE",
              sequenceId: cds.id || `&${cds.cid}`
            };
          }
          if (!hasAlias) {
            aliasesToCreate.push({
              name: cdsName,
              aminoAcidSequenceId: aa.id,
              ...cdsInventoryItemFields
            });
          }
        }
      }
    });
  });
  // loop thru aa's and cds's and check if there is a stop codon in the aa sequence and tag them
  [...aminoAcidSequencesToCreate, ...cdsToCreate].forEach(aaOrCds => {
    if (
      aaOrCds.proteinSequence.includes("*") &&
      !some(
        aaOrCds.taggedItems,
        t => t.tag.name !== "Contains Internal Stop Codon"
      )
    ) {
      showInternalStopWarning = true;
      aaOrCds.taggedItems = [
        ...(aaOrCds.taggedItems || []),
        {
          tagId: "e20c3465-1ff4-498d-87fc-ec92f97b330e"
        }
      ];
    }
  });
  cdsToCreate.forEach(cds => {
    delete cds.proteinSequence;
  });
  aminoAcidSequencesToCreate.forEach(aaSequence => {
    aaSequence.cid = aaSequence.cid || shortid();
    aminoAcidHashToCdsHashes[aaSequence.hash].forEach(cdsHash => {
      const cds = cdsHashToCds[cdsHash];
      if (cds.name !== aaSequence.name) {
        const cdsInventoryItemFields = {};
        if (cds.inventoryItems && cds.inventoryItems.length) {
          cdsInventoryItemFields.targetInventoryItemId =
            cds.inventoryItems[0].id;
        } else {
          if (!cds.id && !cds.cid) cds.cid = shortid();
          cdsInventoryItemFields.targetInventoryItem = {
            inventoryItemTypeCode: "DNA_SEQUENCE",
            sequenceId: cds.id || `&${cds.cid}`
          };
        }
        aliasesToCreate.push({
          name: cds.name,
          aminoAcidSequenceId: `&${aaSequence.cid}`,
          ...cdsInventoryItemFields
        });
      }
    });
  });
  await safeUpsert("aminoAcidSequence", aminoAcidSequencesToCreate, {
    excludeResults: true
  });
  await safeUpsert("sequence", cdsToCreate, {
    excludeResults: true
  });
  await safeUpsert("sequence", cdsUpdates, {
    excludeResults: true
  });
  await safeUpsert("sequenceCodingSequence", sequenceCDSToCreate, {
    excludeResults: true
  });
  await upsertUniqueAliases(aliasesToCreate, ctx);
  if (showInternalStopWarning && typeof window !== "undefined") {
    window?.toastr?.warning(
      `Internal stop codons were detected on your CDS features and auto-translated AAs. We've tagged those features and AAs with an "Internal Stop Codon" tag. Please review and edit the sequences as needed.`
    );
  }
}
