/* Copyright (C) 2018 TeselaGen Biotechnology, Inc. */
import React from "react";

const statusConstants = {
  creating: {
    icon: "info-sign", // IconNames.INFO_SIGN,
    intent: "warning", // Intent.WARNING,
    description: "Model will soon start training",
    shortDescription: "Model will soon start training"
  },
  created: {
    icon: "refresh", // IconNames.REFRESH,
    intent: "warning", // Intent.WARNING,
    description: "Model is being trained",
    shortDescription: "Model being trained"
  },
  pending: {
    icon: "info-sign", //IconNames.INFO_SIGN,
    intent: "warning", // Intent.WARNING,
    description: "Model will soon start training",
    shortDescription: "Model will soon start training"
  },
  submitting: {
    icon: "info-sign", //IconNames.INFO_SIGN,
    intent: "warning", // Intent.WARNING,
    description: "Model will soon start training",
    shortDescription: "Model will soon start training"
  },
  "in-progress": {
    icon: "refresh", // IconNames.REFRESH,
    intent: "primary", // Intent.PRIMARY,
    description: "Model is being trained",
    shortDescription: "Model is being trained"
  },
  completing: {
    icon: "refresh", // IconNames.REFRESH,
    intent: "warning", // Intent.WARNING,
    description: "Model is being trained",
    shortDescription: "Model is being trained"
  },
  "completed-successfully": {
    icon: "tick-circle", //IconNames.TICK_CIRCLE,
    intent: "success", // Intent.SUCCESS,
    description: "Completed training",
    shortDescription: "Completed"
  },
  "completed-failed": {
    icon: "error", // IconNames.ERROR,
    intent: "danger", // Intent.DANGER,
    description: "There was an error while training the model",
    shortDescription: "Error while training the model"
  },
  cancel: {
    icon: "disable", // IconNames.DISABLE,
    intent: "warning", // Intent.WARNING,
    description: "This model has been cancelled",
    shortDescription: "Model cancelled"
  },
  "completed-cancelled": {
    icon: "disable", // IconNames.DISABLE,
    intent: "warning", // Intent.WARNING,
    description: "This model has been cancelled",
    shortDescription: "Model cancelled"
  }
};

// TODO: refactor this into a Singleton so we can override it with frontEndConfigs.
const EvolveConfig = {
  evolveViews: {
    dashboard: true,
    settings: false,
    models: true,
    evolutions: true,
    generations: true
  },
  constants: {
    // Evolve API endpoints.
    GENERATIONS_ADVANCED_MODE_SECRET: "IAMATESELAGENIUS",
    EVOLVE_BASEURL_ENDPOINT: "/evolve-routes/",
    TRAIN_MODEL_ENDPOINT: "/evolve-routes/submit-model/",
    GET_MODEL_ENDPOINT: "/evolve-routes/get-model/",
    GET_MODELS_ENDPOINT: "/evolve-routes/get-models-by-type/",
    GET_MODEL_DATAPOINTS_ENDPOINT: "/evolve-routes/get-model-datapoints/",
    DELETE_MODEL_ENDPOINT: "/evolve-routes/delete-model/",
    CANCEL_MODEL_ENDPOINT: "/evolve-routes/cancel-model/",

    // Evolve UI URLs.
    EVOLVE_APP_ROUTE: "/",
    EVOLVE_SETTINGS_ROUTE: "/settings",
    VIEW_ALL_PREDICTIVE_MODELS_ROUTE: "/predictive-models",
    VIEW_ALL_EVOLUTIVE_MODELS_ROUTE: "/evolutive-models",
    VIEW_ALL_GENERATIVE_MODEL_ROUTE: "/generative-models",
    TRAIN_PREDICTIVE_MODEL_ROUTE: "/predictive-models/prediction-steps",
    TRAIN_EVOLUTIVE_MODEL_ROUTE: "/evolutive-models/evolution-steps",
    TRAIN_GENERATIVE_MODEL_ROUTE: "/generative-models/generation-steps",
    MODEL_DETAIL_VIEW_ROUTE: "/models/:id",
    PREDICTIVE_MODEL_DETAIL_VIEW_ROUTE: "/predictive-models/models/:id",
    EVOLUTIVE_MODEL_DETAIL_VIEW_ROUTE: "/evolutive-models/models/:id",
    GENERATIVE_MODEL_DETAIL_VIEW_ROUTE: "/generative-models/models/:id",

    // Other string constants.
    TRAIN_MODEL_JOB: "modeling-tool",
    TRAIN_GENERATIVE_MODEL_JOB: "gan-tool",
    AA_SEQUENCE_VALUE: "aa-sequence",
    SMILES_VALUE: "smiles",
    CATEGORIC_VALUE: "categoric",
    NUMERIC_VALUE: "numeric",
    UNASSIGNED_VALUE: "",
    UNASSIGNED_COLUMN_TYPE: "unassigned",
    DESCRIPTOR_COLUMN_TYPE: "descriptor",
    TARGET_COLUMN_TYPE: "target",
    PREDICTIVE_MODEL: "predictive",
    EVOLUTIVE_MODEL: "evolutive",
    GENERATIVE_MODEL: "generative",

    // Validation constants
    GENERATIVE_MODEL_MIN_ALLOWED_SEQUENCES: 1000,
    GENERATIVE_MODEL_MAX_ALLOWED_SEQUENCES: 10000,
    MIN_ALLOWED_AA_SEQ_LENGTH: 5,
    MAX_ALLOWED_AA_SEQ_LENGTH: 50,
    VALID_AMINOACIDS_TOKENS: "ACDEFGHIKLMNPQRSTVWY"
  },
  modelConfigs: {
    predictive: {
      modeling_tool: {
        general: {
          model_type: "predictive",
          max_number_of_output_designs: 400000, // This is strictly related to the RAM avaulable on the AI Worker instance.
          prediction_filter: {
            n_max_samples: 1000,
            score_columns: "prediction"
          }
          //hardware: formValues.hardware || "cpu"
        }
      },
      predictive_params: {
        general: {
          model: "XGBRegressor",
          model_params: {}
        }
      },
      sequence_encoder_params: {
        general: {
          encoders_cfg: [
            {
              column_value_type: "aa-sequence",
              algorithm: "esm_encoding",
              algorithm_params: {
                model_name: "esm1_t6_43M_UR50S",
                num_workers: 1
              }
            },
            {
              column_value_type: "smiles",
              algorithm: "random_encoding",
              algorithm_params: {}
            }
          ]
        }
      }
    },
    evolutive: {
      modeling_tool: {
        general: {
          model_type: "evolutive",
          max_number_of_output_designs: 400000, // This is strictly related to the RAM avaulable on the AI Worker instance.
          prediction_filter: {
            n_max_samples: 1000,
            score_columns: "prediction"
          }
          //hardware: formValues.hardware || "cpu"
        }
      },
      evolutive_params: {
        general: {
          n_repetitions: 2,
          model_params: {
            n_estimators: 2048,
            max_depth: 8
          },
          gen_new_batch_params: {
            batch_size: 30,
            ask_n_factor: 0,
            use_ask_from_skopt: false,
            top_factor: 0.6
          }
        }
      },
      sequence_encoder_params: {
        general: {
          encoders_cfg: [
            {
              column_value_type: "aa-sequence",
              algorithm: "esm_encoding",
              algorithm_params: {
                model_name: "esm1_t6_43M_UR50S",
                num_workers: 1
              }
            },
            {
              column_value_type: "smiles",
              algorithm: "random_encoding",
              algorithm_params: {}
            }
          ]
        }
      }
    },
    generative: {
      modeling_tool: {
        general: {
          model_type: "generative",
          hardware: "gpu"
        }
      },
      //** Evaluate if we should make configurable some of these parameters from the UI. Specially the "iterations" and "max_number_samples"
      main: {
        general: {
          sigmas: [0.01, 0.1, 1.0],
          max_number_of_samples: 100,
          max_sequence_length_allowed: 50
        }
      },
      model: {
        general: {
          iterations: 10000,
          batch_size: 64,
          model_dimensionality: 128,
          latent_dim: 128,
          n_critic: 10
        }
      }
    }
  },
  appModes: {
    // This app mode is disabled for now because there's no View for letting the user set advanced condfigs.
    advancedModelConfiguration: {
      hide: true,
      label: "Enable advance model configurations",
      tooltip:
        "This allows access to advanced model-specific configurations when creating a new model.",
      default: false
    },
    // This app mode is enabled and turned on by default. This enables the "aa-sequence" (constants.AA_SEQUENCE_VALUE) value type for descriptors.
    aaSequenceDescriptors: {
      hide: false,
      label: "Enable amino acid sequences as model descriptors",
      tooltip:
        "This allows using amino acid sequences as descriptors for Predictive and Evolutive models.",
      default: true
    },
    // This app mode is enabled but turned off by default. This enables suport for degenerate "aa-sequence" (constants.AA_SEQUENCE_VALUE) value type for descriptors.
    aaDegenerateSequences: {
      hide: false,
      label: "Support degenerate amino acid sequences as model descriptors",
      tooltip:
        "This allows using degenerate amino acid sequences as descriptors for Predictive and Evolutive models.",
      default: false
    },
    // This app mode is enabled but turned off by default. This enables suport for SMILES value type for molecular/chemical compound descriptors.
    smilesDescriptors: {
      hide: false,
      label: "Support SMILES notation as model descriptors",
      tooltip:
        "This allows using molecular descriptors represented by the SMILES notation for Predictive models.",
      default: false
    }
  }
};

EvolveConfig["noticeMessages"] = {
  GENERATIVE_MODEL_INPUT_DATA_RESTRICTIONS: `Currently, generative models only support training sequences of ${EvolveConfig.constants.MIN_ALLOWED_AA_SEQ_LENGTH} to ${EvolveConfig.constants.MAX_ALLOWED_AA_SEQ_LENGTH} amino acids. Only IUPAC 20 amino acids are supported.`,
  EVOLVE_MODEL_DETAIL_UNKNOWN_ERROR:
    "Error rendering model details. Please contact the TeselaGen Team",
  EVOLVE_GENERAL_UNKNOWN_ERROR: "Uknown error."
};

const tooltips = {
  designComplexity:
    "This is the number of all possible combinations of your descriptors or features.",
  cvTestR2:
    "Estimated Coefficient of Determination of the predictions of the model in a new and independent dataset. This value was obtained by using k-fold cross-validation.",
  cvTestMae:
    "The mean of the Mean Absolute Error\nfor the test sets obtained during cross-validation.",
  trainingR2Score:
    "The coefficient of determination of the predicted target given the predictions of the model on the training samples.",
  trainingMetrics: (
    <span>
      These metrics show the performance of the model on the training set, which
      includes the entire dataset.
      <br />
      <br /> These metrics are printed for reference, but they don’t predict the
      performance of the model on new and unseen sets of samples.
    </span>
  ),
  crossValidationMetrics: (
    <span>
      Cross-validation is a technique for assessing how the results of a
      statistical analysis will generalize to an independent data set. This
      technique consists of evaluating the performance of the model when trained
      and tested with different partitions of the dataset.
      <br />
      <br />
      These metrics are computed from a 5-fold cross-validation run, repeated 5
      times for different dataset partitions. On each of the 25 experiments, a
      model is trained on 80% of samples (train set) and evaluated on the
      remaining 20%, (the holdout or test set). Metrics are computed for both
      the train and test sets and their statistics over all experiments are
      reported in this section.
    </span>
  ),
  predictiveModelData: (
    <span>
      The <b>output data</b> option shows predictions for new samples generated
      from the permutations of features variants. If the design space complexity
      is too big to list all possible combinations, only the predicted samples
      with the highest estimated target values are shown. You can also set this
      view to show the predictions for the training samples.
      <br />
      <br />
      The <b>input data</b> option shows the data provided for model training.
    </span>
  ),
  evolutiveModelData: (
    <span>
      The <b>output data</b> option shows the candidates that the tool
      recommends for experimental evaluation. You can also set this view to show
      the predicted samples with the highest estimated target values. Those
      samples weren’t considered as recommended candidates because the predicted
      target value, however important, is not the only criterion when looking
      for promising designs.
      <br />
      <br />
      The <b>input data</b> option shows the data provided for model training.
    </span>
  ),
  generativeModelData: (
    <span>
      The <b>output data</b> option shows a set of new generated sequences by
      the model.
      <br />
      <br />
      The <b>input data</b> option shows the sequences provided for model
      training.
    </span>
  ),
  targetVsPredictionChart:
    "This scatter plot shows the actual values from the target (horizontal axis) of the samples \
    provided for training versus the predictions from the model (vertical axis).",
  cvR2LowWarning:
    "The R2 score of the cross-validation testing set is too low and probably the model \
    won’t perform well on new samples. Please ask for advice from TeselaGen's Data Science team."
};

const evolveHelperContent = {
  designComplexity: {
    title: "Design Complexity",
    code: "designComplexity",
    definition: [
      {
        keyWord: "Design Complexity",
        description:
          "This is the number of all possible combinations of your descriptors.\
            In other words, it defines the total quantity of designs that can be\
            built from different combinations of the descriptor values.  \n  \nIn the case of \
            **categorical descriptors**, the design space complexity is the\
            multiplication of the number of levels (categories) per descriptor.\
            For example, if a design has 2 descriptors: a promoter and a gene,\
            with 2 different possible promoters and 6 different gene variants,\
            the design space complexity is 2 x 6 = 12.  \n  \nWhen **numerical descriptors** are \
            included in the design, the number of combinations is calculated by discretizing the \
            numerical range of these descriptors into a finite number of 'categories'."
        // the number of combinations is calculated by considering just a finite (and small) number of\
        // levels for these features. Thus, when calculating the complexity for\
        // these designs, numerical features are considered the same as a\
        // categorical descriptor with 4 levels.  \n  \n When the design contains **sequence descriptors**, \
        // these features aren’t considered as **control variables** so, as dependent features, \
        // they can’t be obtained for new combinations and no new designs are generated. \
        // However, when present, the complexity space is calculated by omitting the \
        // sequence columns (number of variants = 1). We are looking to change this restriction in the near future."
      }
    ],
    FAQ: [
      {
        title: "How new samples are generated?",
        msg:
          "In the case of categorical descriptors, DISCOVER just builds candidates from the combinatory \
          of the categories of different descriptors. When numeric descriptors are included, DISCOVER first discretizes \
          numeric descriptors into a finite number of levels, evenly spaced, within the numeric range of the feature in the training dataset. \
          These levels are combined in the same way as categorical features."
      }
    ]
  },
  crossvalidation: {
    title: "Cross-validation",
    code: "crossvalidation",
    definition: [
      {
        keyWord: "Cross-validation",
        description:
          "Cross-validation (CV) is any of various similar model validation techniques\
         for assessing how the results of a statistical analysis will generalize to an independent data set. \
         In a prediction problem, a model is usually given a dataset of known data on which training is run (training dataset), \
         and a dataset of unknown data (or first seen data) against which the model is tested (called the validation dataset or testing set). \
         The goal of cross-validation is to test the model's ability to predict new data that was not used in training, \
         in order to flag problems like overfitting or selection bias and to give an insight on how the model will generalize to an independent dataset \
         (i.e., an unknown dataset, for instance from a real problem).  \n  \nOne round of cross-validation involves partitioning a sample of data \
         into complementary subsets, performing the analysis on one subset (called the training set), and validating the analysis on the other subset \
         (called the validation set or testing set). To reduce variability, in most methods multiple rounds of cross-validation are performed using \
         different partitions, and the validation results are combined (e.g. averaged) over the rounds to give an estimate of \
         the model's predictive performance.  \n  \n In k-fold cross-validation, the original sample is randomly partitioned into k equal sized subsamples. \
         Of the k subsamples, a single subsample is retained as the validation data for testing the model, and the remaining k − 1 subsamples are used as training data. \
         The cross-validation process is then repeated k times, with each of the k subsamples used exactly once as the validation data. \
         The k results can then be averaged to produce a single estimation. The advantage of this method over repeated random sub-sampling is that all observations \
         are used for both training and validation, and each observation is used for validation exactly once. 10-fold cross-validation is commonly used, \
         but in general k remains an unfixed parameter.  \n  \n In repeated k-fold CV the data is randomly split into k partitions r times. \
         The performance of the model can thereby be averaged over several runs. Repetition is useful to reduce variance in score estimation, \
         and especially useful when datasets are small (<= 200 samples)."
      },
      {
        keyWord: "DISCOVER CV Metrics",
        description:
          "By default DISCOVER uses k = 5 and r = 5 and the CV metrics displayed are in terms of their statistics over the many CV partitions.  \n  \n  ###### These \
        statistics are:  \n  \n  **μ ± σ:** The mean (μ) and standard deviations (σ) of the metric over all evaluation sets. \
        Standard deviation is calculated with 0 degrees of freedom.  \n  \n  **Median**: The median of the metric for all sets in evaluation. \
        It is the value separating the higher half from the lower half of the calculated score values. The median is a location estimator that \
        is commonly considered to be more robust to outliers than the mean. \n  \n  **IQR:** The interquartile range, also called the midspread, \
        middle 50%, or H‑spread, is a measure of statistical dispersion, being equal to the difference between 75th and 25th percentiles. \
        It is usually considered more robust to outliers than the standard deviation."
      }
    ],
    FAQ: [
      {
        title: "Can I change cross-validation parameters used by the platform?",
        msg:
          "Not right now, but you can ask the TeselaGen’s Data Science team to do it for you. In future releases we are planning to enable configurable CV parameters to our users."
      }
    ]
  },
  candidateSelection: {
    title: "Candidate Selection",
    code: "candidateSelection",
    definition: [
      {
        keyWord: "Candidate selection",
        description:
          "After training, the model is used to evaluate hundreds or thousands of new samples that are generated from the combinatory of available feature values. \
        This evaluation not only considers the predicted value of the target to optimize, but also the estimated uncertainty of each prediction. \
        This value may represent the information that can be learnt from testing a particular sample, and its use can help to attain a better \
        optima after 2 or more experimental iterations.  \n  \nOnce the evaluation is finished, the predicted value and uncertainty are weighted into a new score (Acquisition) \
        that is used to rank the candidates. Candidates are selected sequentially and Acquisition scores are changed dynamically to reflect \
        the expected information gain of each selected candidate.  \n  \nAs only the original acquisition scores are reported in the platform, \
        sometimes it may seem that some candidates with high Acquisition scores were omitted from the list of suggested candidates. \
        However, this is due to the fact that previous selected candidates, on sequential selection, can modify the original Acquisition scores for the remaining samples."
      }
    ],
    FAQ: [
      {
        title: "When is candidate selection used?",
        msg:
          "Candidate selection is only performed for DISCOVER Evolutive models."
      }
    ]
  },
  statisticalMetrics: {
    title: "Statistical Metrics",
    code: "statisticalMetrics",
    definition: [
      {
        keyWord: "Regression metrics",
        description:
          "**MAE**: Mean Absolute Error. It is the average of the absolute distances between the predicted and the real values. \
          MAE units are the same as the target’s.  \n  \n  **RMSE**: Root Mean Square Error. It is the square root of the average of \
          the squared errors.  \n  \n  **R2 score**: Coefficient of determination, is the proportion of the variation in the real target \
          that is predictable from the prediction. It normally ranges from 0 to 1, but can also be negative when the fit is worse than \
          a horizontal prediction located at the target's average. A value of 1 indicates that the regression predictions perfectly fits the data."
      }
    ],
    FAQ: [
      {
        title: "What are these metrics used for?",
        msg:
          "These metrics help to evaluate the performance of a model. Cross-validation test metrics tell us what can be expected \
      from the model when applied to new and unseen samples or designs.  \n  \nCV test MAE error is easy to interpret, as it is in the same units as the target, \
      it signals the average absolute error we can expect on the predictions. RMSE and R2 can be more helpful when comparing 2 or more models' performance in similar datasets."
      }
    ]
  }
};

const evolveViews = {
  studies: true,
  dashboard: true,
  importer: true,
  settings: true,
  MvpAssays: false
};

export {
  statusConstants,
  EvolveConfig,
  evolveHelperContent,
  evolveViews,
  tooltips
};
