/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
import shortid from "shortid";
import {
  aaSequenceJSONtoGraphQLInput,
  chunkSequenceToFragments,
  computeSequenceHash
} from "./utils";
import {
  cleanCommaSeparatedCell,
  parseCsvFile,
  parseCsvString
} from "../utils/fileUtils";
import { getBoundExtendedPropertyUploadHelpers } from "../utils/extendedPropertiesUtils";
import {
  filterSequenceString,
  getFeatureTypes,
  filterRnaString
} from "@teselagen/sequence-utils";
import { flatMap } from "lodash";
import { extname } from "path";
import preventDuplicationOfSequenceCsvTags from "./preventDuplicationOfSequenceCsvTags";
import addSequenceTypeOrDefault from "./addSequenceTypeOrDefault";

function trimRowData(row, data, opts = {}) {
  let {
    name = "_unnamed",
    sequence = "",
    description = "",
    circularLinear = "linear",
    tags = "",
    part = "",
    partTags = "",
    feature = "",
    featureType = "",
    ...rest
  } = row;

  const { useFilenameAsSequenceName, filename } = opts;

  name =
    data.length === 1 && useFilenameAsSequenceName
      ? filename.replace(extname(filename), "")
      : name.trim();
  sequence = sequence.trim();
  circularLinear = circularLinear.trim().toLowerCase();
  description = description.trim();
  part = part.trim();
  partTags = partTags.trim();
  tags = tags.trim();
  feature = feature.trim();
  featureType = featureType.trim();

  return {
    name,
    sequence,
    description,
    circularLinear,
    tags,
    part,
    partTags,
    feature,
    featureType,
    ...rest
  };
}

const logDebug = (...args) => {
  if (process.env.DEBUG_SEQUENCE_UPLOAD) {
    // eslint-disable-next-line no-console
    console.log(...args);
  }
};

function getTaggedItems(tagsCsvList) {
  if (tagsCsvList && tagsCsvList.length > 0) {
    return tagsCsvList.split(", ").map(name => ({ tag: { name } }));
  } else {
    return [];
  }
}
function getParts(row, { isProtein }) {
  const { sequence, part, partTags, partType } = row;

  if (part) {
    return [
      {
        name: part,
        start: 0,
        end: sequence.length * (isProtein ? 3 : 1) - 1,
        strand: 1,
        taggedItems: getTaggedItems(partTags),
        type: getFeatureType(partType)
      }
    ];
  } else {
    return [];
  }
}

function getFeatures(row, { isProtein }) {
  const { sequence, feature, featureType } = row;
  if (feature) {
    return [
      {
        name: feature,
        start: 0,
        end: sequence.length * (isProtein ? 3 : 1) - 1,
        strand: 1,
        type: getFeatureType(featureType)
      }
    ];
  } else {
    return [];
  }
}

function getFeatureType(featureType) {
  if (featureType && getFeatureTypes().indexOf(featureType) > -1) {
    return featureType;
  } else {
    return "misc_feature";
  }
}

async function processSequenceUploadCSVContent(options) {
  const {
    isProtein,
    useFilenameAsSequenceName,
    filename,
    invalidLines,
    isMaterial,
    warnings = [],
    file,
    apolloMethods,
    sequenceTypeCode,
    isGuideRNA
  } = options;

  let parsedInfo;
  if (typeof file === "string") {
    parsedInfo = await parseCsvString(file.trim(), {
      header: true,
      camelCaseHeaders: true
    });
  } else {
    parsedInfo = await parseCsvFile(file, {
      header: true,
      camelCaseHeaders: true
    });
  }
  const { data, errors, meta } = parsedInfo;

  logDebug(`CSV Data:`, data);
  logDebug(`CSV Errors:`, errors);
  logDebug(`CSV Meta:`, meta);

  if (errors.length) throw new Error(JSON.stringify(errors));

  const { getCsvRowExtProps } = await getBoundExtendedPropertyUploadHelpers(
    meta.fields,
    apolloMethods
  );

  let filteredDataCount = 0;
  let hasNameAndSequence;
  const filteredData = data.filter(
    (
      { name, sequence, description, scaffoldSequence, spacerSequence },
      index
    ) => {
      hasNameAndSequence = isGuideRNA
        ? name && scaffoldSequence && spacerSequence
        : name && sequence;
      if (
        !hasNameAndSequence &&
        (name || sequence || description || scaffoldSequence || spacerSequence)
      ) {
        // if there's no name, sequence, or description then it's probably just a empty line
        // no need to tell user about that
        filteredDataCount++;
        invalidLines.push(
          `Line ${index + 1} is missing "${!name ? "name" : "sequence"}"`
        );
      }
      return hasNameAndSequence;
    }
  );

  if (filteredDataCount > 0) {
    warnings.push(
      `${filteredDataCount} lines in the CSV were left out of the import due to either a missing name or sequence`
    );
  }

  const scaffoldSequences = {};
  if (isGuideRNA) {
    const gRNAScaffoldSequences = Array.from(
      new Set(filteredData.map(r => filterRnaString(r.scaffoldSequence)))
    );

    (
      await apolloMethods.safeQuery(["scaffoldSequence", "id sequence"], {
        variables: {
          filter: { sequence: gRNAScaffoldSequences }
        }
      })
    ).forEach(({ id, sequence }) => {
      scaffoldSequences[sequence] = id;
    });
  }

  const result = flatMap(filteredData, (rawRow, index) => {
    const row = trimRowData(rawRow, data, {
      useFilenameAsSequenceName,
      filename,
      index
    });

    let {
      name,
      aliases = "",
      sequence,
      description,
      tags,
      circularLinear: _circ = "",
      hisTagLoc = "",
      isoPoint = "",
      uniprotId = "",
      designId = "",
      pdbId = "",
      cofactor = ""
    } = row;

    const cleanedAliases = cleanCommaSeparatedCell(aliases);
    let materialProps;
    //tnw: add logic here to check for material properties on the CSV upload and add them to the sequence output
    //we'll also need to add handling to the sequence save happening higher up if a material is included with the sequence
    if (isMaterial) {
      const materialTypeCode = sequenceTypeCode === "RNA" ? "RNA" : "DNA";
      materialProps = {
        name: row.materialName || row.name,
        externalAvailability: JSON.parse(row.externallyAvailable) || false,
        materialTypeCode,
        aliases: cleanedAliases.map(a => ({
          name: a
        })),
        ...getCsvRowExtProps({
          row,
          rowIndex: index,
          returnValues: true,
          model: "material",
          recordId: shortid(),
          warnings
        })
      };
    }

    const extValues = getCsvRowExtProps({
      row,
      rowIndex: index,
      returnValues: true,
      model: isProtein ? "aminoAcidSequence" : "sequence",
      recordId: shortid(),
      warnings
    });
    const parts = getParts(row, { isProtein });
    const features = getFeatures(row, { isProtein });

    let scaffoldSequenceId;
    let scaffoldSequence;
    let scaffoldSequenceStart;
    if (isGuideRNA) {
      const spacerSequence = filterRnaString(row.spacerSequence);
      const tmpScaffoldSequence = filterRnaString(row.scaffoldSequence);
      sequence = spacerSequence + tmpScaffoldSequence;
      scaffoldSequenceStart = spacerSequence.length;
      // Check if scaffold sequence already exists in database
      if (tmpScaffoldSequence in scaffoldSequences) {
        scaffoldSequenceId = scaffoldSequences[tmpScaffoldSequence];
      } else {
        scaffoldSequence = {
          sequence: tmpScaffoldSequence
        };
      }
    } else {
      const [seq] = filterSequenceString(sequence, {
        name: name,
        isProtein
      });
      if (!seq) return [];
    }

    const circularLinear = _circ.toLowerCase();
    const _sequenceTypeCode = isProtein
      ? "AMINO_ACID_SEQUENCE"
      : sequenceTypeCode ||
        (circularLinear === "circular" ? "CIRCULAR_DNA" : "LINEAR_DNA");
    const hash = computeSequenceHash(sequence, _sequenceTypeCode);

    const seq = {
      ...(isProtein
        ? {
            ...aaSequenceJSONtoGraphQLInput({
              sequence,
              features,
              parts
            }),
            hisTagLoc,
            isoPoint,
            uniprotId,
            designId,
            pdbId,
            cofactor
          }
        : {
            parts,
            hash,
            size: sequence.length,
            scaffoldSequenceId,
            scaffoldSequence,
            scaffoldSequenceStart,
            sequenceFeatures: features,
            sequenceFragments: chunkSequenceToFragments(sequence),
            circular: !!circularLinear === "circular",
            sequenceTypeCode: _sequenceTypeCode
          }),
      name,
      description,
      taggedItems: getTaggedItems(tags),
      ...(isMaterial && { polynucleotideMaterial: materialProps }),
      ...(!isMaterial && {
        aliases: cleanedAliases.map(a => ({
          name: a
        }))
      }),
      ...extValues
    };
    if (!isProtein) {
      addSequenceTypeOrDefault(seq, sequenceTypeCode);
    }
    return seq;
  });

  logDebug(`CSV Parsing Result:`, result);
  logDebug(`Warnings:`, warnings);
  logDebug(`Invalid Lines:`, invalidLines);
  const { tagsNotFoundInDb } = await preventDuplicationOfSequenceCsvTags(
    apolloMethods.safeQuery,
    result
  );

  if (tagsNotFoundInDb.length) {
    warnings.push(
      `These tags were specified in the uploaded file(s) but were not found in the database: ${tagsNotFoundInDb.join(
        ", "
      )}`
    );
  }
  return result;
}

export default processSequenceUploadCSVContent;
