Tripal Plant PopGen Submit
submit_all.php File Reference

Go to the source code of this file.

Functions

 tpps_check_organisms ($row, array &$options=array())
 
 tpps_clean_state (array &$form_state)
 
 tpps_generate_popstruct ($study_accession, $vcf_location)
 
 tpps_get_code_parts ($part)
 
 tpps_get_env_response ($layer_id, $lat, $long)
 
 tpps_get_environmental_layer_data ($layer_id, $lat, $long, $param)
 
 tpps_get_species_codes ($genus, $species)
 
 tpps_job_logger_write ($string, $replacements=[])
 
 tpps_other_marker_headers ($fid, array $cols)
 
 tpps_process_accession ($row, array &$options, $job=NULL)
 
 tpps_process_environment_layers ($row, array &$options=array())
 
 tpps_process_genotype_spreadsheet ($row, array &$options=array())
 
 tpps_process_phenotype_data ($row, array &$options=array())
 
 tpps_process_phenotype_meta ($row, array &$options=array())
 
 tpps_process_snp_association ($row, array &$options=array())
 
 tpps_refine_phenotype_meta (array &$meta, array $time_options=array(), TripalJob &$job=NULL)
 
 tpps_ssrs_headers ($fid, $ploidy)
 
 tpps_submit_all ($accession, TripalJob $job=NULL)
 
 tpps_submit_environment (array &$form_state, $i, TripalJob &$job=NULL)
 
 tpps_submit_genotype (array &$form_state, array $species_codes, $i, TripalJob &$job=NULL)
 
 tpps_submit_page_1 (array &$form_state, TripalJob &$job=NULL)
 
 tpps_submit_page_2 (array &$form_state, TripalJob &$job=NULL)
 
 tpps_submit_page_3 (array &$form_state, TripalJob &$job=NULL)
 
 tpps_submit_page_4 (array &$form_state, TripalJob &$job=NULL)
 
 tpps_submit_phenotype (array &$form_state, $i, TripalJob &$job=NULL)
 
 tpps_submit_summary (array &$form_state)
 
 tpps_submit_vcf_render_genotype_combination ($raw_value, $ref, $alt)
 

Variables

 $tpps_job = NULL
 
 $tpps_job_logger = NULL
 

Detailed Description

Defines function tpps_submit_all and its helper functions.

The functions defined in this file do not actually submit the genotype, phenotype, or environmental data collected from page 4. That data is instead submitted through a Tripal job due to the size of the data.

Definition in file submit_all.php.

Function Documentation

◆ tpps_check_organisms()

tpps_check_organisms (   $row,
array &  $options = array() 
)

This function will process a row from an accession file.

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 2206 of file submit_all.php.

2206  {
2207  $cols = $options['cols'];
2208  $search = &$options['search'];
2209  $org_full_name = $row[$cols['org']] ?? "{$row[$cols['genus']]} {$row[$cols['species']]}";
2210  if ($search == $org_full_name) {
2211  $options['found'] = TRUE;
2212  }
2213 }

◆ tpps_clean_state()

tpps_clean_state ( array &  $form_state)

Cleans unnecessary information from the form state.

Uses tpps_form_state_info() as a helper function.

Parameters
array$form_stateThe form state to be cleaned.

Definition at line 3471 of file submit_all.php.

3471  {
3472  $new = array();
3473  unset($form_state['ids']);
3474  tpps_form_state_info($new, $form_state);
3475  $form_state = $new;
3476 }
tpps_form_state_info(array &$new, array &$old)
Definition: form_utils.inc:290

◆ tpps_generate_popstruct()

tpps_generate_popstruct (   $study_accession,
  $vcf_location 
)

TPPS Generate Population Structure FastStructure requires pip install pip==9.0.1 to install dependencies

Definition at line 1821 of file submit_all.php.

1821  {
1822  // Perform basic checks
1823  if ($study_accession == "") {
1824  tpps_job_logger_write("[FATAL ERROR] You must enter a non-empty study accession. Aborting.\n");
1825  return;
1826  }
1827 
1828  if ($vcf_location == "") {
1829  tpps_job_logger_write("[FATAL ERROR] You must enter a non-empty vcf_location. Aborting.\n");
1830  return;
1831  }
1832 
1833  // Get the correct path of the public directory
1834  $path = 'public://';
1835  $public_path = drupal_realpath($path);
1836  tpps_job_logger_write('[PUBLIC PATH] ' . $public_path . "\n");
1837  echo('[PUBLIC PATH] ' . $public_path . "\n");
1838 
1839  // Get the module path
1840  $module_path = DRUPAL_ROOT . '/' . drupal_get_path('module', 'tpps');
1841  tpps_job_logger_write('[MODULE PATH] ' . $module_path . "\n");
1842  echo('[MODULE PATH] ' . $module_path . "\n");
1843 
1844  // Tools path
1845  $tools_path = $module_path . "/tools";
1846  tpps_job_logger_write('[TOOLS PATH] ' . $tools_path . "\n");
1847  echo('[TOOLS PATH] ' . $tools_path . "\n");
1848 
1849  // Make temp directory just in case for vcf files etc
1850  $popstruct_temp_dir = $public_path . '/popstruct_temp/' . $study_accession;
1851  mkdir($popstruct_temp_dir, 0755, true);
1852 
1853  // In case there are already files in here, delete them
1854  $files = glob($popstruct_temp_dir . '/*'); // get all file names
1855  foreach($files as $file){ // iterate files
1856  if(is_file($file)) {
1857  tpps_job_logger_write("[CLEAN UP BEFORE BEGIN] Removing $file from the popstruct directory");
1858  echo("[CLEAN UP BEFORE BEGIN] Removing $file from the popstruct directory\n");
1859  tpps_job_logger_write("[FILE CLEAN/DELETE] $file");
1860  echo("[FILE CLEAN/DELETE] $file\n");
1861  // echo "TODO: Perform the actual delete\n";
1862  unlink($file); // delete file
1863  }
1864  }
1865 
1866  $flag_using_temp_file = false;
1867 
1868  // This variable is used to process the vcf since we may have to gunzip
1869  // the file. So we need to keep the original location variable (by not overwriting it).
1870  $vcf_location_temp = $vcf_location;
1871  if (stripos($vcf_location, '.gz') !== FALSE) {
1872  // we need to gunzip the file
1873  // Set flag to true that we are using a temp file
1874  // This will need to be deleted afterwards
1875  $flag_using_temp_file = true;
1876 
1877  // Get file name without extension so we use that as the gunzipped filename
1878  $file_name_without_ext = basename($vcf_location, ".gz");
1879 
1880  // Gunzip the the file
1881  shell_exec("gunzip -c " . $vcf_location . " > " . $popstruct_temp_dir . "/" . $file_name_without_ext);
1882 
1883  // Set the vcf_location_temp to where the gunzip file is
1884  $vcf_location_temp = $popstruct_temp_dir . "/" . $file_name_without_ext;
1885  }
1886 
1887  tpps_job_logger_write("[VCF_LOCATION_TEMP] $vcf_location_temp");
1888  echo("[VCF_LOCATION_TEMP] $vcf_location_temp");
1889 
1890  // So now we have th $vcf_location_temp which should be used accordingly
1891 
1892 
1893  // Step 1 - Perform PLINK
1894  // TODO: RESTORE THIS
1895  tpps_job_logger_write("PERFORM PLINK");
1896  echo("PERFORM PLINK");
1897  echo shell_exec($tools_path . '/plink/plink --vcf ' . $vcf_location_temp . " --allow-extra-chr --double-id --make-bed --out " . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink');
1898 
1899 
1900  // Step 2 by x - Fast Structure run
1901  // To get fastStruct installed, we need the dependenices
1902  // These dependencies seem to need Python 3.8 / pip3
1903  // For CENTOS
1904  // sudo yum -y groupinstall "Development Tools"
1905  // sudo yum -y install openssl-devel bzip2-devel libffi-devel xz-devel
1906 
1907  // TODO: RESTORE THIS
1908  for($i=1; $i <= 10; $i++) {
1909  tpps_job_logger_write("Performing FastStructure for k = $i\n");
1910  echo("Performing FastStructure for k = $i\n");
1911  $fast_structure_cmd = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib; export CFLAGS="-I/usr/local/include"; export LDFLAGS="-L/usr/local/lib"; python ' . $tools_path . "/fastStructure/structure.py -K " . $i . " --input=" . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink' . " --output=" . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink' . ' --full;';
1912  echo shell_exec($fast_structure_cmd);
1913  }
1914 
1915 
1916  // Step 3 is to select K from previous runs
1917  // TODO: RESTORE THIS
1918  tpps_job_logger_write("[INFO] Perform chooseK...\n");
1919  echo("[INFO] Perform chooseK...\n");
1920  $chooseK_cmd = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib; export CFLAGS="-I/usr/local/include"; export LDFLAGS="-L/usr/local/lib"; python ' . $tools_path . '/fastStructure/chooseK.py --input=' . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink';
1921  $chooseK_output = shell_exec($chooseK_cmd);
1922  echo $chooseK_output . "\n";
1923 
1924  // Step 3b - from the output, get the suggested K value
1925  // Go through each line in $chooseK_output
1926  // TODO: RESTORE THIS
1927  $chooseK_lines = explode("\n", $chooseK_output);
1928  $chooseK_lines_count = count($chooseK_lines);
1929  $chooseK_optimal = 0;
1930  for ($i=0; $i<$chooseK_lines_count; $i++) {
1931  $line = $chooseK_lines[$i];
1932  if ($i == 0) {
1933  $chooseK_parts = explode('Model complexity that maximizes marginal likelihood = ', $line);
1934  }
1935  else if ($i == 1) {
1936  $chooseK_parts = explode('Model components used to explain structure in data = ', $line);
1937  }
1938 
1939  // Determine the highest value for use
1940  if($chooseK_parts[1] > $chooseK_optimal) {
1941  $chooseK_optimal = $chooseK_parts[1];
1942  }
1943  }
1944  tpps_job_logger_write("Optimal K is " . $chooseK_optimal . "\n");
1945  echo("Optimal K is " . $chooseK_optimal . "\n");
1946 
1947 
1948 
1949  // Step 4 - awk and sed to clean up files
1950  // TODO: RESTORE THIS
1951  tpps_job_logger_write("AWK AND SED adjustments");
1952  echo("AWK AND SED adjustments");
1953  $cmd_custom_cmds1 = "awk 'BEGIN { OFS = \"_\" } ;{print $1,$2}' " . $popstruct_temp_dir . '/' . $study_accession . '_popstruct_plink.fam > ' . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPanel.txt;";
1954  $cmd_custom_cmds1 .= "sed 's/_/\t/g' " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPanel.txt > " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPaneltab.txt;";
1955  $cmd_custom_cmds1 .= "awk '{print $1,$2}' " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPaneltab.txt > " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDfamPanel.txt;";
1956  echo shell_exec($cmd_custom_cmds1);
1957 
1958  // // Step 5 - count the population
1959  $count_output = shell_exec("wc -l " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPanel.txt");
1960  tpps_job_logger_write($count_output . "\n");
1961  echo($count_output . "\n");
1962  $count_output_parts = explode(' ', $count_output);
1963  $population_count = $count_output_parts[0];
1964  tpps_job_logger_write("Population count:" . $population_count . "\n");
1965  echo("Population count:" . $population_count . "\n");
1966 
1967  // Step 6 - Execute R script which generates popstruct from Panel using chooseK optimal value
1968  // TODO: RESTORE THIS
1969  tpps_job_logger_write("RScript popstruct_from_panel execution\n");
1970  echo("RScript popstruct_from_panel execution\n");
1971  $cmd_custom_r_code = "Rscript " . $tools_path . "/popstruct_from_panel.R ";
1972  $cmd_custom_r_code .= $study_accession . " ";
1973  $cmd_custom_r_code .= $population_count . " ";
1974  $cmd_custom_r_code .= $popstruct_temp_dir . '/' . $study_accession . "_popstruct_plink." . $chooseK_optimal. ".meanQ ";
1975  $cmd_custom_r_code .= $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDfamPanel.txt ";
1976  $cmd_custom_r_code .= $popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel.txt";
1977 
1978  echo shell_exec($cmd_custom_r_code);
1979 
1980  // Step 7 - Cleaning up PopPanel columns...
1981  // TODO: RESTORE THIS
1982  $cmd_remove_column_code = "cut -d\\ -f2- " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel.txt > " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel_final.txt";
1983  echo shell_exec($cmd_remove_column_code);
1984 
1985 
1986  // TODO: Push to postgres popstruct table
1987  // READ THE OUTPUT FILE, GET THE TREE_IDS AND LOCATIONS
1988  // THEN GO THROUGH THE OUTPUT FILE AND GET THE POPULATION GROUPS
1989  // THEN ADD THIS TO THE TABLE
1990  $file_handle = fopen($popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel_final.txt", "r");
1991  $tree_data = [];
1992  if ($file_handle) {
1993  while (($line = fgets($file_handle)) !== false) {
1994  // process the line read.
1995  $line_space_parts = explode(" ", $line);
1996  $tree_id = $study_accession . '-' . $line_space_parts[0];
1997  $tree_info = [
1998  'tree_id' => $tree_id,
1999  'population' => 0,
2000  'latitude' => 0,
2001  'longitude' => 0,
2002  'study_accession' => $study_accession
2003  ];
2004  if(count($line_space_parts) >= 4) {
2005  $population_group = $line_space_parts[3];
2006  if (strpos($population_group, 'e') !== FALSE) {
2007  $population_group = 1;
2008  }
2009  else {
2010  $population_group = intval(ceil($population_group)) + 1;
2011  }
2012  echo $population_group . ',';
2013  $tree_info['population'] = $population_group;
2014  $tree_data[$tree_id] = $tree_info;
2015  }
2016  }
2017  // echo "\n";
2018  fclose($file_handle);
2019 
2020  // Remove all records from the popstruct table for this study
2021  tpps_job_logger_write("Removing all popstruct data for accession $study_accession\n");
2022  echo("Removing all popstruct data for accession $study_accession\n");
2023  chado_query("DELETE FROM public.cartogratree_popstruct_layer WHERE study_accession = '" . $study_accession . "';");
2024 
2025 
2026  // Now query the locations of these tree_ids, so build an SQL statement
2027  $sql_locations = 'SELECT * FROM public.ct_trees WHERE uniquename IN (';
2028  $sql_tree_ids_list = '';
2029  $tree_id_count = 0;
2030  $sql_tree_ids_list = '';
2031  foreach($tree_data as $tree_info) {
2032  if($tree_id_count != 0) {
2033  $sql_tree_ids_list .= ',';
2034  }
2035  $sql_tree_ids_list .= "'" . $tree_info['tree_id'] . "'";
2036  $tree_id_count = $tree_id_count + 1;
2037  }
2038  $sql_locations .= $sql_tree_ids_list;
2039  // echo $sql_locations . "\n";
2040  $sql_locations .= ')';
2041  $results = chado_query($sql_locations);
2042  foreach($results as $row) {
2043  $tree_id = $row->uniquename;
2044  // echo $tree_id . "\n";
2045  $tree_data[$tree_id]['latitude'] = $row->latitude;
2046  $tree_data[$tree_id]['longitude'] = $row->longitude;
2047  $insert_sql = "INSERT INTO public.cartogratree_popstruct_layer (uniquename,population,study_accession,latitude,longitude) ";
2048  $insert_sql .= "VALUES (";
2049  $insert_sql .= "'" . $tree_id ."',". $tree_data[$tree_id]['population'] .",";
2050  $insert_sql .= "'" . $study_accession ."',". $tree_data[$tree_id]['latitude'] ."," . $tree_data[$tree_id]['latitude'] . "";
2051  $insert_sql .= ")";
2052  // echo $insert_sql . "\n";
2053  chado_query($insert_sql);
2054  }
2055 
2056  tpps_job_logger_write("POPSTRUCT completed.\n");
2057  echo("POPSTRUCT completed.\n");
2058 
2059  }
2060 
2061 }
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160

◆ tpps_get_code_parts()

tpps_get_code_parts (   $part)

Helper function for tpps_get_species_codes().

Generate all possible 2-letter organism code parts.

Parameters
string$partThe part of the organism name, either genus or species.
Returns
Generator|array Yields each possible code part in the desired order.

Definition at line 3514 of file submit_all.php.

3514  {
3515  for ($char1 = 0; $char1 <= strlen($part) - 2; $char1++) {
3516  for ($char2 = $char1 + 1; $char2 <= strlen($part) - 1; $char2++) {
3517  // Code parts should not repeat letters.
3518  if ($part[$char1] == $part[$char2]) {
3519  continue;
3520  }
3521 
3522  yield strtolower($part[$char1] . $part[$char2]);
3523  }
3524  }
3525 }

◆ tpps_get_env_response()

tpps_get_env_response (   $layer_id,
  $lat,
  $long 
)

This function loads data for a CartograPlant layer at a lat/long coordinate.

Parameters
int$layer_idThe identifier of the CartograPlant environmental layer.
float$latThe latitude coordinate being queried.
float$longThe longitude coordinate being queried.
Returns
string The environmental data for that layer at that lat/long coordinate.

Definition at line 3122 of file submit_all.php.

3122  {
3123  if (db_table_exists('cartogratree_layers')) {
3124  $query = db_select('cartogratree_layers', 'l')
3125  ->fields('l', array('name'))
3126  ->condition('layer_id', $layer_id)
3127  ->execute();
3128 
3129  $result = $query->fetchObject();
3130  $layers = $result->name;
3131 
3132  $url = "http://treegenesdev.cam.uchc.edu:8080/geoserver/ct/wms?";
3133  $serv = "WMS";
3134  $ver = "1.3.0";
3135  $req = "GetFeatureInfo";
3136  $srs = "EPSG:4326";
3137  $format = "application/json";
3138  $bigger_lat = $lat + 0.0000001;
3139  $bigger_long = $long + 0.0000001;
3140  $bbox = "$lat,$long,$bigger_lat,$bigger_long";
3141  $pixels = "width=1&height=1&X=0&Y=0";
3142 
3143  $url .= "service=$serv&version=$ver&request=$req&layers=$layers&srs=$srs&format=$format&query_layers=$layers&bbox=$bbox&$pixels";
3144 
3145  return file_get_contents($url);
3146  }
3147 }

◆ tpps_get_environmental_layer_data()

tpps_get_environmental_layer_data (   $layer_id,
  $lat,
  $long,
  $param 
)

This function parses and returns a data point from a CartograPlant layer.

The data point for the layer at the specified location is obtained by calling tpps_get_env_response, and the resulting response string is parsed to return the specified parameter.

Parameters
int$layer_idThe identifier of the CartograPlant environmental layer.
float$latThe latitude coordinate being queried.
float$longThe longitude coordinate being queried.
string$paramThe name of the parameter type.
Returns
mixed The parsed environmental data. If no valid data was found, return NULL.

Definition at line 3093 of file submit_all.php.

3093  {
3094 
3095  $response = tpps_get_env_response($layer_id, $lat, $long);
3096  $response = explode("\n", $response);
3097  if ($response) {
3098  $response = array_slice($response, 2, -2);
3099  foreach ($response as $line) {
3100  $item = explode("=", $line);
3101  if ($item and trim($item[0]) == $param) {
3102  return trim($item[1]);
3103  }
3104  }
3105  }
3106  return NULL;
3107 }
tpps_get_env_response($layer_id, $lat, $long)

◆ tpps_get_species_codes()

tpps_get_species_codes (   $genus,
  $species 
)

Generate all possible 4-letter TreeGenes organism codes.

Parameters
string$genusThe genus of the organism.
string$speciesThe species of the organism.
Returns
Generator|array Yields each possible organism code in the desired order.

Definition at line 3489 of file submit_all.php.

3489  {
3490  $codes = array();
3491 
3492  foreach (tpps_get_code_parts($genus) as $genus_part) {
3493  foreach (tpps_get_code_parts($species) as $species_part) {
3494  $code = ucfirst($genus_part . $species_part);
3495  if (!array_key_exists($code, $codes)) {
3496  yield $code;
3497  $codes[$code] = TRUE;
3498  }
3499  }
3500  }
3501 }
tpps_get_code_parts($part)

◆ tpps_job_logger_write()

tpps_job_logger_write (   $string,
  $replacements = [] 
)

Writes data to the tpps_job_logger_handle

Parameters
string$stringWrite string to the job log file using the tpps_job_logger object

Definition at line 160 of file submit_all.php.

160  {
161  global $tpps_job_logger;
162  try {
163  foreach ($replacements as $key_string => $replace_string) {
164  $string = str_replace($key_string, $replace_string, $string);
165  }
166 
167  // Add timestamp
168  $time_now = time();
169  $timestamp_now = date('m/d/y g:i:s A', $time_now);
170 
171  $string = "\n" . $timestamp_now . " " . $string;
172 
173  fwrite($tpps_job_logger['log_file_handle'],$string);
174  fflush($tpps_job_logger['log_file_handle']);
175  }
176  catch (Exception $e) {
177  print_r($e->getMessage());
178  }
179 }
$tpps_job_logger
Definition: submit_all.php:13

◆ tpps_other_marker_headers()

tpps_other_marker_headers (   $fid,
array  $cols 
)

This function formats headers for the "other" type genotype markers.

The headers for the "other" genotype marker types are set by the users, so we need to return the names of the headers they have indicated, rather than the values provided in the file-groups array.

Parameters
int$fidThe Drupal managed file id of the file.
array$colsAn array of columns indicating which of the columns contain genotype data.
Returns
array The array of standardized headers for the spreadsheet.

Definition at line 2957 of file submit_all.php.

2957  {
2958  $headers = tpps_file_headers($fid);
2959  $results = array();
2960  foreach ($cols as $col) {
2961  $results[$col] = $headers[$col];
2962  }
2963  return $results;
2964 }
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:972

◆ tpps_process_accession()

tpps_process_accession (   $row,
array &  $options,
  $job = NULL 
)

This function processes a single row of an accession file.

This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 3218 of file submit_all.php.

3218  {
3219  global $tpps_job;
3220  $job = $tpps_job;
3221  $cvterm = $options['cvterms'];
3222  $records = &$options['records'];
3223  $accession = $options['accession'];
3224  $cols = $options['column_ids'];
3225  $saved_ids = &$options['saved_ids'];
3226  $stock_count = &$options['stock_count'];
3227  $multi_insert_options = $options['multi_insert'];
3228  $tree_info = &$options['tree_info'];
3229  $record_group = variable_get('tpps_record_group', 10000);
3230  $geo_api_key = variable_get('tpps_geocode_api_key', NULL);
3231  $site_based = FALSE;
3232  $exact = $options['exact'] ?? NULL;
3233  $precision = $options['precision'] ?? NULL;
3234 
3235  $tree_id = $row[$cols['id']];
3236  $id = $saved_ids['organism_ids'][$options['org_num']];
3237  if ($options['org_names']['number'] != 1 and $options['single_file']) {
3238  $org_full_name = $row[$cols['org']] ?? "{$row[$cols['genus']]} {$row[$cols['species']]}";
3239  $id = $saved_ids['organism_ids'][array_search($org_full_name, $options['org_names'])];
3240  }
3241 
3242  $records['stock'][$tree_id] = array(
3243  'uniquename' => "$accession-$tree_id",
3244  'type_id' => $cvterm['org'],
3245  'organism_id' => $id,
3246  );
3247  $tree_info[$tree_id] = array(
3248  'organism_id' => $id,
3249  );
3250 
3251  $records['project_stock'][$tree_id] = array(
3252  'project_id' => $saved_ids['project_id'],
3253  '#fk' => array(
3254  'stock' => $tree_id,
3255  ),
3256  );
3257 
3258  if (isset($row[$cols['clone']]) and $row[$cols['clone']] !== $options['empty']) {
3259  $clone_name = $tree_id . '-' . $row[$cols['clone']];
3260 
3261  $records['stock'][$clone_name] = array(
3262  'uniquename' => $accession . '-' . $clone_name,
3263  'type_id' => $cvterm['clone'],
3264  'organism_id' => $id,
3265  );
3266  $tree_info[$clone_name] = array(
3267  'organism_id' => $id,
3268  );
3269 
3270  $records['project_stock'][$clone_name] = array(
3271  'project_id' => $saved_ids['project_id'],
3272  '#fk' => array(
3273  'stock' => $clone_name,
3274  ),
3275  );
3276  $job->logMessage('[INFO] CV Terms Data' . print_r($cvterm, 1));
3277  $records['stock_relationship'][$clone_name] = array(
3278  'type_id' => $cvterm['has_part'],
3279  '#fk' => array(
3280  'subject' => $tree_id,
3281  'object' => $clone_name,
3282  ),
3283  );
3284 
3285  $tree_id = $clone_name;
3286  }
3287 
3288  if (!empty($row[$cols['lat']]) and !empty($row[$cols['lng']])) {
3289  $raw_coord = $row[$cols['lat']] . ',' . $row[$cols['lng']];
3290  $standard_coord = explode(',', tpps_standard_coord($raw_coord));
3291  $lat = $standard_coord[0];
3292  $lng = $standard_coord[1];
3293  }
3294  elseif (!empty($row[$cols['state']]) and !empty($row[$cols['country']])) {
3295  $exact = FALSE;
3296  $records['stockprop']["$tree_id-country"] = array(
3297  'type_id' => $cvterm['country'],
3298  'value' => $row[$cols['country']],
3299  '#fk' => array(
3300  'stock' => $tree_id,
3301  ),
3302  );
3303 
3304  $records['stockprop']["$tree_id-state"] = array(
3305  'type_id' => $cvterm['state'],
3306  'value' => $row[$cols['state']],
3307  '#fk' => array(
3308  'stock' => $tree_id,
3309  ),
3310  );
3311 
3312  $location = "{$row[$cols['state']]}, {$row[$cols['country']]}";
3313 
3314  if (!empty($row[$cols['county']])) {
3315  $records['stockprop']["$tree_id-county"] = array(
3316  'type_id' => $cvterm['county'],
3317  'value' => $row[$cols['county']],
3318  '#fk' => array(
3319  'stock' => $tree_id,
3320  ),
3321  );
3322  $location = "{$row[$cols['county']]}, $location";
3323  }
3324 
3325  if (!empty($row[$cols['district']])) {
3326  $records['stockprop']["$tree_id-district"] = array(
3327  'type_id' => $cvterm['district'],
3328  'value' => $row[$cols['district']],
3329  '#fk' => array(
3330  'stock' => $tree_id,
3331  ),
3332  );
3333  $location = "{$row[$cols['district']]}, $location";
3334  }
3335 
3336  $tree_info[$tree_id]['location'] = $location;
3337 
3338  if (isset($geo_api_key) and !array_key_exists($location, $options['locations'])) {
3339  $query = urlencode($location);
3340  $url = "https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
3341  $response = json_decode(file_get_contents($url));
3342  $options['locations'][$location] = $response->results[0]->geometry ?? NULL;
3343 
3344  if ($response->total_results and $response->total_results > 1 and !isset($cols['district']) and !isset($cols['county'])) {
3345  foreach ($response->results as $item) {
3346  if ($item->components->_type == 'state') {
3347  $options['locations'][$location] = $item->geometry;
3348  break;
3349  }
3350  }
3351  }
3352  }
3353  $lat = $options['locations'][$location]->lat ?? NULL;
3354  $lng = $options['locations'][$location]->lng ?? NULL;
3355  }
3356  elseif (!empty($row[$cols['pop_group']])) {
3357  $site_based = TRUE;
3358  $location = $options['pop_group'][$row[$cols['pop_group']]];
3359  $coord = tpps_standard_coord($location);
3360 
3361  if ($coord) {
3362  $parts = explode(',', $coord);
3363  $lat = $parts[0];
3364  $lng = $parts[1];
3365  }
3366 
3367  if (!$coord) {
3368  $records['stockprop']["$tree_id-location"] = array(
3369  'type_id' => $cvterm['loc'],
3370  'value' => $location,
3371  '#fk' => array(
3372  'stock' => $tree_id,
3373  ),
3374  );
3375 
3376  $tree_info[$tree_id]['location'] = $location;
3377 
3378  if (isset($geo_api_key)) {
3379  $result = $options['locations'][$location] ?? NULL;
3380  if (empty($result)) {
3381  $query = urlencode($location);
3382  $url = "https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
3383  $response = json_decode(file_get_contents($url));
3384  $result = ($response->total_results) ? $response->results[0]->geometry : NULL;
3385  $options['locations'][$location] = $result;
3386  }
3387 
3388  if (!empty($result)) {
3389  $lat = $result->lat;
3390  $lng = $result->lng;
3391  }
3392  }
3393  }
3394  }
3395 
3396  if (!empty($lat) and !empty($lng)) {
3397  $records['stockprop']["$tree_id-lat"] = array(
3398  'type_id' => $cvterm['lat'],
3399  'value' => $lat,
3400  '#fk' => array(
3401  'stock' => $tree_id,
3402  ),
3403  );
3404 
3405  $records['stockprop']["$tree_id-long"] = array(
3406  'type_id' => $cvterm['lng'],
3407  'value' => $lng,
3408  '#fk' => array(
3409  'stock' => $tree_id,
3410  ),
3411  );
3412  $tree_info[$tree_id]['lat'] = $lat;
3413  $tree_info[$tree_id]['lng'] = $lng;
3414 
3415  $gps_type = "Site-based";
3416  if (!$site_based) {
3417  $gps_type = "Exact";
3418  if (!$exact) {
3419  $gps_type = "Approximate";
3420  }
3421  }
3422 
3423  $records['stockprop']["$tree_id-gps-type"] = array(
3424  'type_id' => $cvterm['gps_type'],
3425  'value' => $gps_type,
3426  '#fk' => array(
3427  'stock' => $tree_id,
3428  ),
3429  );
3430 
3431  if ($gps_type == "Approximate" and !empty($precision)) {
3432  $records['stockprop']["$tree_id-precision"] = array(
3433  'type_id' => $cvterm['precision'],
3434  'value' => $precision,
3435  '#fk' => array(
3436  'stock' => $tree_id,
3437  ),
3438  );
3439  }
3440  }
3441 
3442  $stock_count++;
3443  if ($stock_count >= $record_group) {
3444  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
3445  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
3446  $new_ids = tpps_chado_insert_multi($records, $multi_insert_options);
3447  tpps_job_logger_write('[INFO] - Done.');
3448  $job->logMessage('[INFO] - Done.');
3449  foreach ($new_ids as $t_id => $stock_id) {
3450  $tree_info[$t_id]['stock_id'] = $stock_id;
3451  }
3452 
3453  $records = array(
3454  'stock' => array(),
3455  'stockprop' => array(),
3456  'stock_relationship' => array(),
3457  'project_stock' => array(),
3458  );
3459  $stock_count = 0;
3460  }
3461 }
tpps_standard_coord($raw_coordinate)
$tpps_job
Definition: submit_all.php:14
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160

◆ tpps_process_environment_layers()

tpps_process_environment_layers (   $row,
array &  $options = array() 
)

This function processes a single row of a plant accession file.

This function populates the db with environmental data provided through CartograPlant layers. This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 2978 of file submit_all.php.

2978  {
2979  global $tpps_job;
2980  $job = $tpps_job;
2981  $id_col = $options['tree_id'];
2982  $records = &$options['records'];
2983  $tree_info = &$options['tree_info'];
2984  $layers_params = $options['layers_params'];
2985  $env_count = &$options['env_count'];
2986  $accession = $options['accession'];
2987  $suffix = &$options['suffix'];
2988  $env_cvterm = $options['env_cvterm'];
2989  $record_group = variable_get('tpps_record_group', 10000);
2990 
2991  $tree_id = $row[$id_col];
2992  $stock_id = $tree_info[$tree_id]['stock_id'];
2993 
2994  $gps_query = chado_select_record('stockprop', array('value'), array(
2995  'stock_id' => $stock_id,
2996  'type_id' => tpps_load_cvterm('gps_latitude')->cvterm_id,
2997  ), array(
2998  'limit' => 1,
2999  ));
3000  $lat = current($gps_query)->value;
3001 
3002  $gps_query = chado_select_record('stockprop', array('value'), array(
3003  'stock_id' => $stock_id,
3004  'type_id' => tpps_load_cvterm('gps_longitude')->cvterm_id,
3005  ), array(
3006  'limit' => 1,
3007  ));
3008  $long = current($gps_query)->value;
3009 
3010  foreach ($layers_params as $layer_id => $params) {
3011  $layer_query = db_select('cartogratree_layers', 'l')
3012  ->fields('l', array('title'))
3013  ->condition('layer_id', $layer_id)
3014  ->execute();
3015 
3016  $layer_name = $layer_query->fetchObject()->title;
3017 
3018  foreach (array_keys($params) as $param_id) {
3019  $param_query = db_select('cartogratree_fields', 'f')
3020  ->fields('f', array('field_name'))
3021  ->condition('field_id', $param_id)
3022  ->execute();
3023 
3024  $param_name = $param_query->fetchObject()->field_name;
3025  $phenotype_name = "$accession-$tree_id-$layer_name-$param_name-$suffix";
3026 
3027  $value = tpps_get_environmental_layer_data($layer_id, $lat, $long, $param_name);
3028  $type = variable_get("tpps_param_{$param_id}_type", 'attr_id');
3029 
3030  $records['phenotype'][$phenotype_name] = array(
3031  'uniquename' => $phenotype_name,
3032  'name' => "$param_name",
3033  'value' => "$value",
3034  );
3035 
3036  $records['stock_phenotype'][$phenotype_name] = array(
3037  'stock_id' => $stock_id,
3038  '#fk' => array(
3039  'phenotype' => $phenotype_name,
3040  ),
3041  );
3042 
3043  if ($type == 'attr_id') {
3044  $records['phenotype'][$phenotype_name]['attr_id'] = $env_cvterm;
3045  }
3046  if ($type != 'attr_id') {
3047  $records['phenotype_cvterm'][$phenotype_name] = array(
3048  'cvterm_id' => $env_cvterm,
3049  '#fk' => array(
3050  'phenotype' => $phenotype_name,
3051  ),
3052  );
3053  }
3054 
3055  $env_count++;
3056  if ($env_count >= $record_group) {
3057  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
3058  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
3059  tpps_chado_insert_multi($records);
3060  tpps_job_logger_write('[INFO] - Done.');
3061  $job->logMessage('[INFO] - Done.');
3062  $records = array(
3063  'phenotype' => array(),
3064  'phenotype_cvterm' => array(),
3065  'stock_phenotype' => array(),
3066  );
3067  $env_count = 0;
3068  }
3069  }
3070  }
3071  $suffix++;
3072 }
$tpps_job
Definition: submit_all.php:14
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
tpps_get_environmental_layer_data($layer_id, $lat, $long, $param)

◆ tpps_process_genotype_spreadsheet()

tpps_process_genotype_spreadsheet (   $row,
array &  $options = array() 
)

This function processes a single row of a genotype spreadsheet.

This function is used for SNP assay files, SSR spreadsheets, and other marker type spreadsheets. The functionality is slightly different based on the type of marker being processed (this is set in the options array). This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 2625 of file submit_all.php.

2625  {
2626  global $tpps_job;
2627  $job = $tpps_job;
2628  $type = $options['type'];
2629  $records = &$options['records'];
2630  $headers = $options['headers'];
2631  $tree_info = &$options['tree_info'];
2632  $species_codes = $options['species_codes'];
2633  $genotype_count = &$options['genotype_count'];
2634  $project_id = $options['project_id'];
2635  $marker = $options['marker'];
2636  $type_cvterm = $options['type_cvterm'];
2637  $seq_var_cvterm = $options['seq_var_cvterm'];
2638  $multi_insert_options = $options['multi_insert'];
2639  $associations = $options['associations'] ?? array();
2640 
2641  $record_group = variable_get('tpps_record_group', 10000);
2642  $stock_id = NULL;
2643 
2644  if (!empty($options['tree_id'])) {
2645  $val = $row[$options['tree_id']];
2646  $stock_id = $tree_info[trim($val)]['stock_id'];
2647  $current_id = $tree_info[trim($val)]['organism_id'];
2648  $species_code = $species_codes[$current_id];
2649  }
2650  foreach ($row as $key => $val) {
2651  if (empty($headers[$key])) {
2652  continue;
2653  }
2654 
2655  if (!isset($stock_id)) {
2656  $stock_id = $tree_info[trim($val)]['stock_id'];
2657  $current_id = $tree_info[trim($val)]['organism_id'];
2658  $species_code = $species_codes[$current_id];
2659  continue;
2660  }
2661  $genotype_count++;
2662 
2663  if ($type == 'ssrs' and !empty($options['empty']) and $val == $options['empty']) {
2664  continue;
2665  }
2666 
2667  if ($type == 'ssrs' and ($val === 0 or $val === "0")) {
2668  $val = "NA";
2669  }
2670 
2671  $variant_name = $headers[$key];
2672  $marker_name = $variant_name . $marker;
2673  $genotype_name = "$marker-$variant_name-$species_code-$val";
2674 
2675  $records['feature'][$marker_name] = array(
2676  'organism_id' => $current_id,
2677  'uniquename' => $marker_name,
2678  'type_id' => $seq_var_cvterm,
2679  );
2680 
2681  $records['feature'][$variant_name] = array(
2682  'organism_id' => $current_id,
2683  'uniquename' => $variant_name,
2684  'type_id' => $seq_var_cvterm,
2685  );
2686 
2687  if (!empty($associations) and !empty($associations[$variant_name])) {
2688  $association = $associations[$variant_name];
2689  $assoc_feature_name = "{$variant_name}-{$options['associations_type']}-{$association['trait']}";
2690 
2691  $records['feature'][$association['scaffold']] = array(
2692  'organism_id' => $current_id,
2693  'uniquename' => $association['scaffold'],
2694  'type_id' => $options['scaffold_cvterm'],
2695  );
2696 
2697  $records['feature'][$assoc_feature_name] = array(
2698  'organism_id' => $current_id,
2699  'uniquename' => $assoc_feature_name,
2700  'type_id' => $seq_var_cvterm,
2701  );
2702 
2703  if (!empty($association['trait_attr'])) {
2704  $records['feature_cvterm'][$assoc_feature_name] = array(
2705  'cvterm_id' => $association['trait_attr'],
2706  'pub_id' => $options['pub_id'],
2707  '#fk' => array(
2708  'feature' => $assoc_feature_name,
2709  ),
2710  );
2711 
2712  if (!empty($association['trait_obs'])) {
2713  $records['feature_cvtermprop'][$assoc_feature_name] = array(
2714  'type_id' => $association['trait_obs'],
2715  '#fk' => array(
2716  'feature_cvterm' => $assoc_feature_name,
2717  ),
2718  );
2719  }
2720  }
2721 
2722  $records['featureprop'][$assoc_feature_name] = array(
2723  'type_id' => $options['associations_type'],
2724  '#fk' => array(
2725  'feature' => $assoc_feature_name,
2726  ),
2727  );
2728 
2729  $records['featureloc'][$variant_name] = array(
2730  'fmin' => $association['start'],
2731  'fmax' => $association['stop'],
2732  'residue_info' => $association['allele'],
2733  '#fk' => array(
2734  'feature' => $variant_name,
2735  'srcfeature' => $association['scaffold'],
2736  ),
2737  );
2738 
2739  $records['feature_relationship'][$assoc_feature_name] = array(
2740  'type_id' => $options['associations_type'],
2741  'value' => $association['confidence'],
2742  '#fk' => array(
2743  'subject' => $variant_name,
2744  'object' => $assoc_feature_name,
2745  ),
2746  );
2747  }
2748 
2749  $records['genotype'][$genotype_name] = array(
2750  'name' => $genotype_name,
2751  'uniquename' => $genotype_name,
2752  'description' => $val,
2753  'type_id' => $type_cvterm,
2754  );
2755 
2756  $records['genotype_call']["$stock_id-$genotype_name"] = array(
2757  'project_id' => $project_id,
2758  'stock_id' => $stock_id,
2759  '#fk' => array(
2760  'genotype' => $genotype_name,
2761  'variant' => $variant_name,
2762  'marker' => $marker_name,
2763  ),
2764  );
2765 
2766  $records['stock_genotype']["$stock_id-$genotype_name"] = array(
2767  'stock_id' => $stock_id,
2768  '#fk' => array(
2769  'genotype' => $genotype_name,
2770  ),
2771  );
2772 
2773  if ($genotype_count >= $record_group) {
2774  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
2775  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
2776  tpps_chado_insert_multi($records, $multi_insert_options);
2777  tpps_job_logger_write('[INFO] - Done.');
2778  $job->logMessage('[INFO] - Done.');
2779  $records = array(
2780  'feature' => array(),
2781  'genotype' => array(),
2782  'genotype_call' => array(),
2783  'stock_genotype' => array(),
2784  );
2785  if (!empty($associations)) {
2786  $records['featureloc'] = array();
2787  $records['featureprop'] = array();
2788  }
2789  $options['genotype_total'] += $genotype_count;
2790  tpps_job_logger_write('[INFO] - Genotypes inserted:' + $options['genotype_total']);
2791  $job->logMessage('[INFO] - Genotypes inserted:' + $options['genotype_total']);
2792  $genotype_count = 0;
2793  }
2794  }
2795 }
$tpps_job
Definition: submit_all.php:14
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160

◆ tpps_process_phenotype_data()

tpps_process_phenotype_data (   $row,
array &  $options = array() 
)

This function will process a row from a phenotype data file.

This function is used for standard phenotypes of both phenotype formats, as well as phenotype isotope files. The functionality is slightly different based on the type of phenotype file being processed (set in the options array). This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 2342 of file submit_all.php.

2342  {
2343  global $tpps_job;
2344  $job = $tpps_job;
2345  $iso = $options['iso'] ?? FALSE;
2346  $records = &$options['records'];
2347  $meta_headers = $options['meta_headers'] ?? NULL;
2348  $file_headers = $options['file_headers'] ?? NULL;
2349  $cvterms = $options['cvterms'];
2350  $meta = $options['meta'];
2351  $empty = $options['file-empty'] ?? NULL;
2352  $accession = $options['accession'];
2353  $suffix = &$options['suffix'];
2354  $tree_info = &$options['tree_info'];
2355  $phenotype_count = &$options['phenotype_count'];
2356  $organism_name = &$options['organism_name'];
2357  $record_group = variable_get('tpps_record_group', 10000);
2358  // $record_group = 1;
2359 
2360  // Get genus and species from the organism name
2361  $organism_name_parts = explode(' ', $organism_name, 2);
2362  $genus = $organism_name_parts[0];
2363  $species = $organism_name_parts[1];
2364 
2365  // Ensure that we got the genus and species or error out
2366  if ($genus == "" || $species == "") {
2367  throw new Exception('Organism genus and species could not be processed. Please ensure you added an organism that exists within the chado.organism table!');
2368  }
2369 
2370  // Query the organism table to get the organism id
2371  $organism_id_results = chado_query('SELECT * FROM chado.organism WHERE genus = :genus and species = :species ORDER BY organism_id ASC LIMIT 1', array(
2372  ':genus' => $genus,
2373  ':species' => $species
2374  ));
2375 
2376  // Dummy value for organism_id until we get it from the sql results row
2377  $organism_id = -1;
2378  foreach($organism_id_results as $organism_id_row) {
2379  $organism_id = $organism_id_row->organism_id;
2380  }
2381 
2382  // Check that the organism id is valid
2383  if($organism_id == -1 || $organism_id == "") {
2384  throw new Exception('Could not find organism id for ' . $organism_name. '. This organism does not seem to exist in the chado.organism table!');
2385  }
2386 
2387  $cvterm_id_4lettercode = -1;
2388  // Get the cvterm_id (which is the type_id) for the organism 4 letter code
2389  $cvterm_results = chado_query('SELECT * FROM chado.cvterm WHERE name = :name LIMIT 1', array(
2390  ':name' => 'organism 4 letter code'
2391  ));
2392  foreach($cvterm_results as $cvterm_row) {
2393  $cvterm_id_4lettercode = $cvterm_row->cvterm_id;
2394  }
2395  if($cvterm_id_4lettercode == -1 || $cvterm_id_4lettercode == "") {
2396  throw new Exception('Could not find the cvterm id for organism 4 letter code within the chado.cvterm table. This is needed to generate the phenotype name.');
2397  }
2398 
2399  // We need to use the cvterm_id 4 letter code to find the actual code within the organismprop table (using the organism_id)
2400  $value_4lettercode = "";
2401  $organismprop_results = chado_query('SELECT * FROM chado.organismprop WHERE type_id = :type_id AND organism_id = :organism_id LIMIT 1', array(
2402  ':type_id' => $cvterm_id_4lettercode,
2403  ':organism_id' => $organism_id
2404  ));
2405  foreach ($organismprop_results as $organismprop_row) {
2406  $value_4lettercode = $organismprop_row->value;
2407  }
2408 
2409  if($value_4lettercode == "" || $value_4lettercode == null) {
2410  throw new Exception('4 letter code could not be found for ' . $organism_name . ' in the chado.organismprop table. This is needed to create the phenotype_name.');
2411  }
2412 
2413  if (!$iso) {
2414  if (isset($meta_headers['name']) and (isset($meta_headers['value']))) {
2415  $id = $row[$meta_headers['value']];
2416  $values = array($id => $row[$meta_headers['name']]);
2417  }
2418 
2419  if (!empty($options['data_columns'])) {
2420  $values = $options['data_columns'];
2421  }
2422 
2423  $tree_id = $row[$options['tree_id']];
2424  $clone_col = $meta_headers['clone'] ?? NULL;
2425  if (isset($clone_col) and !empty($row[$clone_col]) and $row[$clone_col] !== $empty) {
2426  $tree_id .= "-" . $row[$clone_col];
2427  }
2428  }
2429  if ($iso) {
2430  foreach ($row as $id => $value) {
2431  if (empty($tree_id)) {
2432  $tree_id = $value;
2433  continue;
2434  }
2435  $values[$id] = $file_headers[$id];
2436  }
2437  }
2438 
2439  if($tree_id == null || $tree_id == "") {
2440  throw new Exception('tree_id was null or empty - there might be a problem with the format of the phenotype data file or selected column options for the file via the user information, cannot continue until resolved.');
2441  }
2442 
2443 
2444  // print_r($values);
2445  // throw new Exception('DEBUG');
2446  $phenotype_name_previous = "<none set>";
2447  foreach ($values as $id => $name) {
2448  if($name == null || $name == "") {
2449  throw new Exception('Phenotype name was null or empty - there might be a problem with the format of the phenotype data file or selected column options for the file via the user information, cannot continue until resolved.');
2450  }
2451  $attr_id = $iso ? $meta['attr_id'] : $meta[strtolower($name)]['attr_id'];
2452  // throw new Exception('debug');
2453  if($attr_id == null || $attr_id == "") {
2454  print_r('$meta[attr_id]:' . $meta['attr_id'] . "\n");
2455  print_r('$name:' . $name . "\n");
2456  print_r('$meta[$name]:' . $meta[strtolower($name)]['attr_id'] . "\n");
2457  print_r('$attr_id:' . $attr_id . "\n");
2458  throw new Exception('Attribute id is null which causes phenotype data to not be added to database correctly.');
2459  }
2460  $value = $row[$id];
2461  $phenotype_name = "$accession-$tree_id-$name-$suffix";
2462  $phenotype_name .= '-' . $value_4lettercode;
2463  $options['data']["$tree_id-$name-$suffix"] = array(
2464  'uniquename' => "$tree_id-$name-$suffix",
2465  'name' => $name,
2466  'stock_id' => $tree_info[$tree_id]['stock_id'],
2467  'time' => NULL,
2468  'value' => $value,
2469  );
2470 
2471 
2472  $records['phenotype'][$phenotype_name] = array(
2473  'uniquename' => $phenotype_name,
2474  'name' => $name,
2475  'attr_id' => $attr_id,
2476  'observable_id' => $meta[strtolower($name)]['struct_id'] ?? NULL,
2477  'value' => $value,
2478  );
2479  // print_r($records['phenotype'][$phenotype_name]);
2480 
2481  $records['stock_phenotype'][$phenotype_name] = array(
2482  'stock_id' => $tree_info[$tree_id]['stock_id'],
2483  '#fk' => array(
2484  'phenotype' => $phenotype_name,
2485  ),
2486  );
2487  // print_r($records['stock_phenotype'][$phenotype_name]);
2488 
2489  if (isset($meta[strtolower($name)]['time'])) {
2490  $records['phenotypeprop']["$phenotype_name-time"] = array(
2491  'type_id' => $cvterms['time'],
2492  'value' => $meta[strtolower($name)]['time'],
2493  '#fk' => array(
2494  'phenotype' => $phenotype_name,
2495  ),
2496  );
2497  // print_r($records['phenotypeprop']["$phenotype_name-time"]);
2498  $options['data'][$phenotype_name]['time'] = $meta[strtolower($name)]['time'];
2499  }
2500  elseif (isset($meta_headers['time'])) {
2501  $val = $row[$meta_headers['time']];
2502  if (is_int($val)) {
2503  $val = tpps_xlsx_translate_date($val);
2504  }
2505  $records['phenotypeprop']["$phenotype_name-time"] = array(
2506  'type_id' => $cvterms['time'],
2507  'value' => $val,
2508  '#fk' => array(
2509  'phenotype' => $phenotype_name,
2510  ),
2511  );
2512  // print_r($records['phenotypeprop']["$phenotype_name-time"]);
2513  $options['data'][$phenotype_name]['time'] = $val;
2514  }
2515 
2516  // print_r($meta);
2517  $records['phenotypeprop']["$phenotype_name-desc"] = array(
2518  'type_id' => $cvterms['desc'],
2519  'value' => $iso ? $meta['desc'] : $meta[strtolower($name)]['desc'],
2520  '#fk' => array(
2521  'phenotype' => $phenotype_name,
2522  ),
2523  );
2524  // print_r($phenotype_name-desc . "\n");
2525  // print_r($records['phenotypeprop']["$phenotype_name-desc"]);
2526 
2527  if ($iso) {
2528  $records['phenotypeprop']["$phenotype_name-unit"] = array(
2529  'type_id' => $cvterms['unit'],
2530  'value' => $meta['unit'],
2531  '#fk' => array(
2532  'phenotype' => $phenotype_name,
2533  ),
2534  );
2535  // print_r($records['phenotypeprop']["$phenotype_name-unit"]);
2536  }
2537 
2538  if (!$iso) {
2539  $records['phenotype_cvterm']["$phenotype_name-unit"] = array(
2540  'cvterm_id' => $meta[strtolower($name)]['unit_id'],
2541  '#fk' => array(
2542  'phenotype' => $phenotype_name,
2543  ),
2544  );
2545  // print_r($records['phenotype_cvterm']["$phenotype_name-unit"]);
2546  }
2547 
2548  if (isset($meta[strtolower($name)]['min'])) {
2549  $records['phenotypeprop']["$phenotype_name-min"] = array(
2550  'type_id' => $cvterms['min'],
2551  'value' => $meta[strtolower($name)]['min'],
2552  '#fk' => array(
2553  'phenotype' => $phenotype_name,
2554  ),
2555  );
2556  // print_r($records['phenotypeprop']["$phenotype_name-min"]);
2557  }
2558 
2559  if (isset($meta[strtolower($name)]['max'])) {
2560  $records['phenotypeprop']["$phenotype_name-max"] = array(
2561  'type_id' => $cvterms['max'],
2562  'value' => $meta[strtolower($name)]['max'],
2563  '#fk' => array(
2564  'phenotype' => $phenotype_name,
2565  ),
2566  );
2567  // print_r($records['phenotypeprop']["$phenotype_name-max"]);
2568  }
2569 
2570  if (!empty($meta[strtolower($name)]['env'])) {
2571  $records['phenotype_cvterm']["$phenotype_name-env"] = array(
2572  'cvterm_id' => $cvterms['environment'],
2573  '#fk' => array(
2574  'phenotype' => $phenotype_name,
2575  ),
2576  );
2577  // print_r($records['phenotype_cvterm']["$phenotype_name-env"]);
2578  }
2579 
2580 
2581 
2582  if ($phenotype_count > $record_group) {
2583  // print_r($records);
2584  // print_r('------------' . "\n");
2585  tpps_job_logger_write('[INFO] -- Inserting data into database using insert_multi...');
2586  $job->logMessage('[INFO] -- Inserting data into database using insert_multi...');
2587  // print_r($records);
2588  tpps_chado_insert_multi($records);
2589  tpps_job_logger_write('[INFO] - Done.');
2590  $job->logMessage('[INFO] - Done.');
2591 
2592  // $temp_results = chado_query('SELECT * FROM chado.phenotype WHERE uniquename ILIKE :phenotype_name', array(
2593  // ':phenotype_name' => $phenotype_name
2594  // ));
2595  // foreach($temp_results as $temp_row) {
2596  // echo "Found phenotype saved: " . $temp_row->uniquename . "\n";
2597  // }
2598 
2599  $records = array(
2600  'phenotype' => array(),
2601  'phenotypeprop' => array(),
2602  'stock_phenotype' => array(),
2603  );
2604  $phenotype_count = 0;
2605  }
2606 
2607  $phenotype_count++;
2608  }
2609  $suffix++;
2610 }
$tpps_job
Definition: submit_all.php:14
tpps_xlsx_translate_date($date)
Definition: file_utils.inc:791
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160

◆ tpps_process_phenotype_meta()

tpps_process_phenotype_meta (   $row,
array &  $options = array() 
)

This function will process a row from a phenotype metadata file.

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 2223 of file submit_all.php.

2223  {
2224  global $tpps_job;
2225  $job = $tpps_job;
2226  $columns = $options['meta_columns'];
2227  $meta = &$options['meta'];
2228 
2229  $name = strtolower($row[$columns['name']]);
2230  $meta[$name] = array();
2231  $meta[$name]['attr'] = 'other';
2232  $meta[$name]['attr-other'] = $row[$columns['attr']];
2233  $meta[$name]['desc'] = $row[$columns['desc']];
2234  $meta[$name]['unit'] = 'other';
2235  $meta[$name]['unit-other'] = $row[$columns['unit']];
2236  if (!empty($columns['struct']) and isset($row[$columns['struct']]) and $row[$columns['struct']] != '') {
2237  $meta[$name]['struct'] = 'other';
2238  $meta[$name]['struct-other'] = $row[$columns['struct']];
2239  }
2240  if (!empty($columns['min']) and isset($row[$columns['min']]) and $row[$columns['min']] != '') {
2241  $meta[$name]['min'] = $row[$columns['min']];
2242  }
2243  if (!empty($columns['max']) and isset($row[$columns['max']]) and $row[$columns['max']] != '') {
2244  $meta[$name]['max'] = $row[$columns['max']];
2245  }
2246 }
$tpps_job
Definition: submit_all.php:14

◆ tpps_process_snp_association()

tpps_process_snp_association (   $row,
array &  $options = array() 
)

This function processes a single row of a genotype association file.

This function is used for SNP association files. This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 2808 of file submit_all.php.

2808  {
2809  global $tpps_job;
2810  $job = $tpps_job;
2811  $groups = $options['associations_groups'];
2812  $associations = &$options['associations'];
2813 
2814  $id = $row[$groups['SNP ID'][1]];
2815 
2816  preg_match('/^(\d+):(\d+)$/', $row[$groups['Position'][3]], $matches);
2817  $start = $matches[1];
2818  $stop = $matches[2];
2819  if ($start > $stop) {
2820  $temp = $start;
2821  $start = $stop;
2822  $stop = $temp;
2823  }
2824 
2825  $trait = $row[$groups['Associated Trait'][5]];
2826 
2827  $associations[$id] = array(
2828  'id' => $id,
2829  'scaffold' => $row[$groups['Scaffold'][2]],
2830  'start' => $start,
2831  'stop' => $stop,
2832  'allele' => $row[$groups['Allele'][4]],
2833  'trait' => $trait,
2834  'trait_attr' => $options['phenotype_meta'][strtolower($trait)]['attr_id'],
2835  'trait_obs' => $options['phenotype_meta'][strtolower($trait)]['struct_id'] ?? NULL,
2836  'confidence' => $row[$groups['Confidence Value'][6]],
2837  );
2838 }
$tpps_job
Definition: submit_all.php:14

◆ tpps_refine_phenotype_meta()

tpps_refine_phenotype_meta ( array &  $meta,
array  $time_options = array(),
TripalJob &  $job = NULL 
)

This function will further refine existing phenotype metadata.

The function mostly just adds cvterm ids where applicable.

Parameters
array$metaThe existing metadata array.
array$time_optionsThe array of options for time-based phenotypes.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 2260 of file submit_all.php.

2260  {
2261  $cvt_cache = array();
2262  $local_cv = chado_get_cv(array('name' => 'local'));
2263  $local_db = variable_get('tpps_local_db');
2264  $term_types = array(
2265  'attr' => array(
2266  'label' => 'Attribute',
2267  'ontology' => 'pato',
2268  ),
2269  'unit' => array(
2270  'label' => 'Unit',
2271  'ontology' => 'po',
2272  ),
2273  'struct' => array(
2274  'label' => 'Structure',
2275  'ontology' => 'po',
2276  ),
2277  );
2278  print_r($meta);
2279  foreach ($meta as $name => $data) {
2280  foreach ($term_types as $type => $info) {
2281  $meta[$name]["{$type}_id"] = $data["{$type}"];
2282  if ($data["{$type}"] == 'other') {
2283  $meta[$name]["{$type}_id"] = $cvt_cache[$data["{$type}-other"]] ?? NULL;
2284  if (empty($meta[$name]["{$type}_id"])) {
2285  $result = tpps_ols_install_term("{$info['ontology']}:{$data["{$type}-other"]}");
2286  if ($result !== FALSE) {
2287  $meta[$name]["{$type}_id"] = $result->cvterm_id;
2288  $job->logMessage("[INFO] New OLS Term {$info['ontology']}:{$data["{$type}-other"]} installed");
2289  }
2290 
2291  if (empty($meta[$name]["{$type}_id"])) {
2292  $term = chado_select_record('cvterm', array('cvterm_id'), array(
2293  'name' => array(
2294  'data' => $data["{$type}-other"],
2295  'op' => 'LIKE',
2296  ),
2297  ), array(
2298  'limit' => 1,
2299  ));
2300  $meta[$name]["{$type}_id"] = current($term)->cvterm_id ?? NULL;
2301  }
2302 
2303  if (empty($meta[$name]["{$type}_id"])) {
2304  $meta[$name]["{$type}_id"] = chado_insert_cvterm(array(
2305  'id' => "{$local_db->name}:{$data["{$type}-other"]}",
2306  // 'name' => $data["{$type}-other"],
2307  'name' => $data["{$type}"] . '-other',
2308  'definition' => '',
2309  'cv_name' => $local_cv->name,
2310  ))->cvterm_id;
2311  if (!empty($meta[$name]["{$type}_id"])) {
2312  $job->logMessage("[INFO] New Local {$info['label']} Term {$data["{$type}-other"]} installed");
2313  }
2314  }
2315  $cvt_cache[$data["{$type}-other"]] = $meta[$name]["{$type}_id"];
2316  }
2317  }
2318  }
2319 
2320  if (!empty($time_options['time_phenotypes'][strtolower($name)])) {
2321  $meta[$name]['time'] = $time_options['time_values'][strtolower($name)];
2322  if (empty($meta[$name]['time'])) {
2323  $meta[$name]['time'] = TRUE;
2324  }
2325  }
2326  }
2327 }
tpps_ols_install_term($info)

◆ tpps_ssrs_headers()

tpps_ssrs_headers (   $fid,
  $ploidy 
)

This function formats headers for a microsatellite spreadsheet.

SSR/cpSSR spreadsheets will often have blank or duplicate headers, depending on the ploidy of the organism they are meant for. This file standardizes the headers for the spreadsheet so that they can be used with the tpps_process_genotype_spreadsheet() function.

Parameters
int$fidThe Drupal managed file id of the file.
string$ploidyThe ploidy of the organism, as indicated by the user.
Returns
array The array of standardized headers for the spreadsheet.

Definition at line 2856 of file submit_all.php.

2856  {
2857  $headers = tpps_file_headers($fid);
2858  if ($ploidy == 'Haploid') {
2859  return $headers;
2860  }
2861  $row_len = count($headers);
2862  $results = $headers;
2863 
2864  while (($k = array_search(NULL, $results))) {
2865  unset($results[$k]);
2866  }
2867 
2868  $marker_num = 0;
2869  $first = TRUE;
2870  reset($headers);
2871  $num_headers = count($results);
2872  $num_unique_headers = count(array_unique($results));
2873 
2874  foreach (array_keys($headers) as $key) {
2875  next($headers);
2876  $next_key = key($headers);
2877  if ($first) {
2878  $first = FALSE;
2879  continue;
2880  }
2881 
2882  switch ($ploidy) {
2883  case 'Diploid':
2884  if ($num_headers == ($row_len + 1) / 2) {
2885  // Every other marker column name is left blank.
2886  if (array_key_exists($key, $results)) {
2887  $last = $results[$key];
2888  $results[$key] .= "_A";
2889  break;
2890  }
2891  $results[$key] = $last . "_B";
2892  break;
2893  }
2894 
2895  if ($num_headers == $row_len) {
2896  // All of the marker column names are filled out.
2897  if ($num_headers != $num_unique_headers) {
2898  // The marker column names are duplicates, need to append
2899  // _A and _B.
2900  if ($results[$key] == $results[$next_key]) {
2901  $results[$key] .= "_A";
2902  break;
2903  }
2904  $results[$key] .= "_B";
2905  }
2906  }
2907  break;
2908 
2909  case 'Polyploid':
2910  if ($num_headers == $row_len) {
2911  // All of the marker column names are filled out.
2912  if ($num_unique_headers != $num_headers) {
2913  // The marker column names are duplicates, need to append
2914  // _1, _2, up to X ploidy.
2915  // The total number of headers divided by the number of
2916  // unique headers should be equal to the ploidy.
2917  $ploidy_suffix = ($marker_num % ($num_headers - 1 / $num_unique_headers - 1)) + 1;
2918  $results[$key] .= "_$ploidy_suffix";
2919  }
2920  $marker_num++;
2921  break;
2922  }
2923  $ploidy_suffix = ($marker_num % ($row_len - 1 / $num_headers - 1)) + 1;
2924  if (array_key_exists($key, $results)) {
2925  $last = $results[$key];
2926  $results[$key] .= "_$ploidy_suffix";
2927  $marker_num++;
2928  break;
2929  }
2930  $results[$key] = "{$last}_$ploidy_suffix";
2931  $marker_num++;
2932  break;
2933 
2934  default:
2935  break;
2936  }
2937  }
2938 
2939  return $results;
2940 }
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:972

◆ tpps_submit_all()

tpps_submit_all (   $accession,
TripalJob  $job = NULL 
)

Creates a record for the project and calls the submission helper functions.

Parameters
string$accessionThe accession number of the form being submitted.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 25 of file submit_all.php.

25  {
26  global $tpps_job;
27  $tpps_job = $job;
28  // Get public path
29  $log_path = drupal_realpath('public://') . '/tpps_job_logs/';
30 
31  mkdir($log_path);
32 
33  // Update the global $tpps_job_logger variable
34  global $tpps_job_logger;
35  $tpps_job_logger = [];
36  $tpps_job_logger['job_object'] = $job;
37  $tpps_job_logger['log_file_path'] = $log_path . $accession . '_' . $tpps_job_logger['job_object']->getJobID() . '.txt';
38  $tpps_job_logger['log_file_handle'] = fopen($tpps_job_logger['log_file_path'], "w+");
39 
40  tpps_job_logger_write('[INFO] Setting up...');
41  $job->logMessage('[INFO] Setting up...');
42  $job->setInterval(1);
43  $form_state = tpps_load_submission($accession);
44  $form_state['status'] = 'Submission Job Running';
45  tpps_update_submission($form_state, array('status' => 'Submission Job Running'));
46  $transaction = db_transaction();
47 
48 
49 
50  try {
51 
52  tpps_job_logger_write('[INFO] Clearing Database...');
53  $job->logMessage('[INFO] Clearing Database...');
54  tpps_submission_clear_db($accession);
55  tpps_job_logger_write('[INFO] Database Cleared');
56  $job->logMessage('[INFO] Database Cleared.');
57  $project_id = $form_state['ids']['project_id'] ?? NULL;
58 
59  $form_state = tpps_load_submission($accession);
60  tpps_clean_state($form_state);
62  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
63  $form_state['file_rank'] = 0;
64  $form_state['ids'] = array();
65 
66  tpps_job_logger_write('[INFO] Creating project record...');
67  $job->logMessage('[INFO] Creating project record...');
68  $form_state['title'] = $firstpage['publication']['title'];
69  $form_state['abstract'] = $firstpage['publication']['abstract'];
70  $project_record = array(
71  'name' => $firstpage['publication']['title'],
72  'description' => $firstpage['publication']['abstract'],
73  );
74  if (!empty($project_id)) {
75  $project_record['project_id'] = $project_id;
76  }
77  $form_state['ids']['project_id'] = tpps_chado_insert_record('project', $project_record);
78  tpps_job_logger_write("[INFO] Project record created. project_id: @pid\n", array('@pid' => $form_state['ids']['project_id']));
79  $job->logMessage("[INFO] Project record created. project_id: @pid\n", array('@pid' => $form_state['ids']['project_id']));
80 
81  tpps_tripal_entity_publish('Project', array(
82  $firstpage['publication']['title'],
83  $form_state['ids']['project_id'],
84  ));
85 
86  tpps_job_logger_write("[INFO] Submitting Publication/Species information...");
87  $job->logMessage("[INFO] Submitting Publication/Species information...");
88  tpps_submit_page_1($form_state, $job);
89  tpps_job_logger_write("[INFO] Publication/Species information submitted!\n");
90  $job->logMessage("[INFO] Publication/Species information submitted!\n");
91 
92  tpps_job_logger_write("[INFO] Submitting Study Details...");
93  $job->logMessage("[INFO] Submitting Study Details...");
94  tpps_submit_page_2($form_state, $job);
95  tpps_job_logger_write("[INFO] Study Details sumbitted!\n");
96  $job->logMessage("[INFO] Study Details sumbitted!\n");
97 
98  tpps_job_logger_write("[INFO] Submitting Accession information...");
99  $job->logMessage("[INFO] Submitting Accession information...");
100  tpps_submit_page_3($form_state, $job);
101  tpps_job_logger_write("[INFO] Accession information submitted!\n");
102  $job->logMessage("[INFO] Accession information submitted!\n");
103 
104  tpps_job_logger_write("[INFO] Submitting Raw data...");
105  $job->logMessage("[INFO] Submitting Raw data...");
106  tpps_submit_page_4($form_state, $job);
107  tpps_job_logger_write("[INFO] Raw data submitted!\n");
108  $job->logMessage("[INFO] Raw data submitted!\n");
109 
110  tpps_job_logger_write("[INFO] Submitting Summary information...");
111  $job->logMessage("[INFO] Submitting Summary information...");
112  tpps_submit_summary($form_state);
113  tpps_job_logger_write("[INFO] Summary information submitted!\n");
114  $job->logMessage("[INFO] Summary information submitted!\n");
115 
116  tpps_update_submission($form_state);
117 
118  tpps_job_logger_write("[INFO] Renaming files...");
119  $job->logMessage("[INFO] Renaming files...");
120  tpps_submission_rename_files($accession);
121  tpps_job_logger_write("[INFO] Files renamed!\n");
122  $job->logMessage("[INFO] Files renamed!\n");
123  $form_state = tpps_load_submission($accession);
124  $form_state['status'] = 'Approved';
125  $form_state['loaded'] = time();
126  tpps_job_logger_write("[INFO] Finishing up...");
127  $job->logMessage("[INFO] Finishing up...");
128  tpps_update_submission($form_state, array('status' => 'Approved'));
129  tpps_job_logger_write("[INFO] Complete!");
130  $job->logMessage("[INFO] Complete!");
131 
132  fclose($tpps_job_logger['log_file_handle']);
133 
134  }
135  catch (Exception $e) {
136  $transaction->rollback();
137  $form_state = tpps_load_submission($accession);
138  $form_state['status'] = 'Pending Approval';
139  tpps_update_submission($form_state, array('status' => 'Pending Approval'));
140 
141  tpps_job_logger_write('[ERROR] Job failed');
142  $job->logMessage('[ERROR] Job failed', array(), TRIPAL_ERROR);
143  tpps_job_logger_write('[ERROR] Error message: @msg', array('@msg' => $e->getMessage()));
144  $job->logMessage('[ERROR] Error message: @msg', array('@msg' => $e->getMessage()), TRIPAL_ERROR);
145  tpps_job_logger_write("[ERROR] Trace: \n@trace", array('@trace' => $e->getTraceAsString()));
146  $job->logMessage("[ERROR] Trace: \n@trace", array('@trace' => $e->getTraceAsString()), TRIPAL_ERROR);
147 
148  fclose($tpps_job_logger['log_file_handle']);
149  watchdog_exception('tpps', $e);
150  throw new Exception('Job failed.');
151  }
152 }
tpps_submission_clear_db($accession)
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_update_submission(array $state, array $options=array())
$tpps_job
Definition: submit_all.php:14
tpps_submit_page_3(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:630
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_submit_page_1(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:189
tpps_submit_page_4(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:854
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_submission_rename_files($accession)
tpps_submission_clear_default_tags($accession)
tpps_submit_page_2(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:447
tpps_clean_state(array &$form_state)
tpps_tripal_entity_publish($bundle_name, array $vals, array $options=array())
tpps_load_submission($accession, $state=TRUE)
Definition: submissions.inc:27
tpps_submit_summary(array &$form_state)
$tpps_job_logger
Definition: submit_all.php:13

◆ tpps_submit_environment()

tpps_submit_environment ( array &  $form_state,
  $i,
TripalJob &  $job = NULL 
)

Submits environmental information for one species.

Parameters
array$form_stateThe TPPS submission object.
int$iThe organism number we are submitting.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 2105 of file submit_all.php.

2105  {
2106  tpps_job_logger_write('[INFO] - Submitting environment data...');
2107  $job->logMessage('[INFO] - Submitting environment data...');
2108  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
2109  $environment = $fourthpage["organism-$i"]['environment'] ?? NULL;
2110  if (empty($environment)) {
2111  return;
2112  }
2113  tpps_submission_add_tag($form_state['accession'], 'Environment');
2114 
2115  $env_layers = isset($environment['env_layers']) ? $environment['env_layers'] : FALSE;
2116  $env_params = isset($environment['env_params']) ? $environment['env_params'] : FALSE;
2117  $env_count = 0;
2118 
2119  $species_index = "species-$i";
2120  if (empty($form_state['saved_values'][TPPS_PAGE_3]['tree-accession']['check'])) {
2121  $species_index = "species-1";
2122  }
2123  $tree_accession = $form_state['saved_values'][TPPS_PAGE_3]['tree-accession'][$species_index];
2124  $tree_acc_fid = $tree_accession['file'];
2125  if (!empty($form_state['revised_files'][$tree_acc_fid]) and (file_load($form_state['revised_files'][$tree_acc_fid]))) {
2126  $tree_acc_fid = $form_state['revised_files'][$tree_acc_fid];
2127  }
2128 
2129  $env_cvterm = tpps_load_cvterm('environment')->cvterm_id;
2130 
2131  if (db_table_exists('cartogratree_layers') and db_table_exists('cartogratree_fields')) {
2132  $layers_params = array();
2133  $records = array(
2134  'phenotype' => array(),
2135  'phenotype_cvterm' => array(),
2136  'stock_phenotype' => array(),
2137  );
2138 
2139  foreach ($env_layers as $layer_name => $layer_id) {
2140  if ($layer_name == 'other' or $layer_name == 'other_db' or $layer_name == 'other_name' or $layer_name == 'other_params') {
2141  continue;
2142  }
2143  if (!empty($layer_id) and !empty($env_params[$layer_name])) {
2144  $layers_params[$layer_id] = array();
2145  $params = $env_params[$layer_name];
2146  foreach ($params as $param_name => $param_id) {
2147  if (!empty($param_id)) {
2148  $layers_params[$layer_id][$param_id] = $param_name;
2149  }
2150  }
2151  }
2152  elseif (!empty($layer_id) and preg_match('/worldclim_subgroup_(.+)/', $layer_id, $matches)) {
2153  $subgroup_id = $matches[1];
2154  $layers = db_select('cartogratree_layers', 'l')
2155  ->fields('l', array('layer_id'))
2156  ->condition('subgroup_id', $subgroup_id)
2157  ->execute();
2158  while (($layer = $layers->fetchObject())) {
2159  $params = db_select('cartogratree_fields', 'f')
2160  ->fields('f', array('field_id', 'display_name'))
2161  ->condition('layer_id', $layer->layer_id)
2162  ->execute();
2163  while (($param = $params->fetchObject())) {
2164  $layers_params[$layer->layer_id][$param->field_id] = $param->display_name;
2165  }
2166  }
2167  }
2168  }
2169 
2170  $options = array(
2171  'no_header' => !empty($tree_accession['file-no-header']),
2172  'records' => $records,
2173  'tree_id' => $tree_accession['file-groups']['Tree Id'][1],
2174  'accession' => $form_state['accession'],
2175  'tree_info' => $form_state['tree_info'],
2176  'layers_params' => $layers_params,
2177  'env_count' => &$env_count,
2178  'env_cvterm' => $env_cvterm,
2179  'suffix' => 0,
2180  'job' => &$job,
2181  );
2182  tpps_job_logger_write('[INFO] - Processing environment_layers file data...');
2183  $job->logMessage('[INFO] - Processing environmental_layers file data...');
2184  tpps_file_iterator($tree_acc_fid, 'tpps_process_environment_layers', $options);
2185  tpps_job_logger_write('[INFO] - Done.');
2186  $job->logMessage('[INFO] - Done.');
2187 
2188  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
2189  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
2190  tpps_chado_insert_multi($options['records']);
2191  tpps_job_logger_write('[INFO] - Done.');
2192  $job->logMessage('[INFO] - Done.');
2193  unset($options['records']);
2194  $env_count = 0;
2195  }
2196 }
tpps_submission_add_tag($accession, $tag)
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
const TPPS_PAGE_4
Definition: tpps.module:15
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
const TPPS_PAGE_3
Definition: tpps.module:14
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_submit_genotype()

tpps_submit_genotype ( array &  $form_state,
array  $species_codes,
  $i,
TripalJob &  $job = NULL 
)

Submits genotype information for one species.

Parameters
array$form_stateThe TPPS submission object.
array$species_codesAn array of 4-letter species codes associated with the submission.
int$iThe organism number we are submitting.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 1206 of file submit_all.php.

1206  {
1207  tpps_job_logger_write('[INFO] - Submitting genotype data...');
1208  $job->logMessage('[INFO] - Submitting genotype data...');
1209  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
1210  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
1211  $genotype = $fourthpage["organism-$i"]['genotype'] ?? NULL;
1212  if (empty($genotype)) {
1213  return;
1214  }
1215  tpps_submission_add_tag($form_state['accession'], 'Genotype');
1216 
1217  $project_id = $form_state['ids']['project_id'];
1218  $record_group = variable_get('tpps_record_group', 10000);
1219 
1220  $genotype_count = 0;
1221  $genotype_total = 0;
1222  $seq_var_cvterm = tpps_load_cvterm('sequence_variant')->cvterm_id;
1223  $overrides = array(
1224  'genotype_call' => array(
1225  'variant' => array(
1226  'table' => 'feature',
1227  'columns' => array(
1228  'variant_id' => 'feature_id',
1229  ),
1230  ),
1231  'marker' => array(
1232  'table' => 'feature',
1233  'columns' => array(
1234  'marker_id' => 'feature_id',
1235  ),
1236  ),
1237  ),
1238  );
1239 
1240  $records = array(
1241  'feature' => array(),
1242  'genotype' => array(),
1243  'genotype_call' => array(),
1244  'stock_genotype' => array(),
1245  );
1246 
1247  $multi_insert_options = array(
1248  'fk_overrides' => $overrides,
1249  'entities' => array(
1250  'label' => 'Genotype',
1251  'table' => 'genotype',
1252  ),
1253  );
1254 
1255  $options = array(
1256  'records' => $records,
1257  'tree_info' => $form_state['tree_info'],
1258  'species_codes' => $species_codes,
1259  'genotype_count' => &$genotype_count,
1260  'genotype_total' => &$genotype_total,
1261  'project_id' => $project_id,
1262  'seq_var_cvterm' => $seq_var_cvterm,
1263  'multi_insert' => &$multi_insert_options,
1264  'job' => &$job,
1265  );
1266 
1267  if ($genotype['ref-genome'] == 'manual' or $genotype['ref-genome'] == 'manual2' or $genotype['ref-genome'] == 'url') {
1268  if ($genotype['tripal_fasta']['file_upload']) {
1269  // Uploaded new file.
1270  $assembly_user = $genotype['tripal_fasta']['file_upload'];
1271  tpps_add_project_file($form_state, $assembly_user);
1272  }
1273  if ($genotype['tripal_fasta']['file_upload_existing']) {
1274  // Uploaded existing file.
1275  $assembly_user = $genotype['tripal_fasta']['file_upload_existing'];
1276  tpps_add_project_file($form_state, $assembly_user);
1277  }
1278  if ($genotype['tripal_fasta']['file_remote']) {
1279  // Provided url to file.
1280  $assembly_user = $genotype['tripal_fasta']['file_remote'];
1281  tpps_chado_insert_record('projectprop', array(
1282  'project_id' => $project_id,
1283  'type_id' => tpps_load_cvterm('file_path')->cvterm_id,
1284  'value' => $assembly_user,
1285  'rank' => $form_state['file_rank'],
1286  ));
1287  $form_state['file_rank']++;
1288  }
1289  }
1290  elseif ($genotype['ref-genome'] != 'none') {
1291  tpps_chado_insert_record('projectprop', array(
1292  'project_id' => $project_id,
1293  'type_id' => tpps_load_cvterm('reference_genome')->cvterm_id,
1294  'value' => $genotype['ref-genome'],
1295  ));
1296  }
1297 
1298  if (!empty($genotype['files']['file-type']['SNPs Genotype Assay'])) {
1299  $snp_fid = $genotype['files']['snps-assay'];
1300  tpps_add_project_file($form_state, $snp_fid);
1301 
1302  $options['type'] = 'snp';
1303  $options['headers'] = tpps_file_headers($snp_fid);
1304  $options['marker'] = 'SNP';
1305  $options['type_cvterm'] = tpps_load_cvterm('snp')->cvterm_id;
1306 
1307  if (!empty($genotype['files']['file-type']['SNPs Associations'])) {
1308  $assoc_fid = $genotype['files']['snps-association'];
1309  tpps_add_project_file($form_state, $assoc_fid);
1310 
1311  $options['records']['featureloc'] = array();
1312  $options['records']['featureprop'] = array();
1313  $options['records']['feature_relationship'] = array();
1314  $options['records']['feature_cvterm'] = array();
1315  $options['records']['feature_cvtermprop'] = array();
1316 
1317  $options['associations'] = array();
1318  $options['associations_tool'] = $genotype['files']['snps-association-tool'];
1319  $options['associations_groups'] = $genotype['files']['snps-association-groups'];
1320  $options['scaffold_cvterm'] = tpps_load_cvterm('scaffold')->cvterm_id;
1321  $options['phenotype_meta'] = $form_state['data']['phenotype_meta'];
1322  $options['pub_id'] = $form_state['ids']['pub_id'];
1323 
1324  switch ($genotype['files']['snps-association-type']) {
1325  case 'P value':
1326  $options['associations_type'] = tpps_load_cvterm('p_value')->cvterm_id;
1327  break;
1328 
1329  case 'Genomic Inflation Factor (GIF)':
1330  $options['associations_type'] = tpps_load_cvterm('lambda')->cvterm_id;
1331  break;
1332 
1333  case 'P-adjusted (FDR) / Q value':
1334  $options['associations_type'] = tpps_load_cvterm('q_value')->cvterm_id;
1335  break;
1336 
1337  case 'P-adjusted (FWE)':
1338  $options['associations_type'] = tpps_load_cvterm('p_adj_fwe')->cvterm_id;
1339  break;
1340 
1341  case 'P-adjusted (Bonferroni)':
1342  $options['associations_type'] = tpps_load_cvterm('bonferroni')->cvterm_id;
1343  break;
1344 
1345  default:
1346  break;
1347  }
1348  tpps_job_logger_write('[INFO] - Processing snp_association file data...');
1349  $job->logMessage('[INFO] - Processing snp_association file data...');
1350  tpps_file_iterator($assoc_fid, 'tpps_process_snp_association', $options);
1351  tpps_job_logger_write('[INFO] - Done.');
1352  $job->logMessage('[INFO] - Done.');
1353 
1354  $multi_insert_options['fk_overrides']['featureloc'] = array(
1355  'srcfeature' => array(
1356  'table' => 'feature',
1357  'columns' => array(
1358  'srcfeature_id' => 'feature_id',
1359  ),
1360  ),
1361  );
1362  $multi_insert_options['fk_overrides']['feature_relationship'] = array(
1363  'subject' => array(
1364  'table' => 'feature',
1365  'columns' => array(
1366  'subject_id' => 'feature_id',
1367  ),
1368  ),
1369  'object' => array(
1370  'table' => 'feature',
1371  'columns' => array(
1372  'object_id' => 'feature_id',
1373  ),
1374  ),
1375  );
1376 
1377  $pop_struct_fid = $genotype['files']['snps-pop-struct'];
1378  tpps_add_project_file($form_state, $pop_struct_fid);
1379 
1380  $kinship_fid = $genotype['files']['snps-kinship'];
1381  tpps_add_project_file($form_state, $kinship_fid);
1382  }
1383  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1384  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1385  tpps_file_iterator($snp_fid, 'tpps_process_genotype_spreadsheet', $options);
1386  tpps_job_logger_write('[INFO] - Done.');
1387  $job->logMessage('[INFO] - Done.');
1388 
1389  tpps_job_logger_write('[INFO] - Inserting genotype_spreadsheet data into database using insert_multi...');
1390  $job->logMessage('[INFO] - Inserting genotype_spreadsheet data into database using insert_multi...');
1391  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1392  tpps_job_logger_write('[INFO] - Done');
1393  $job->logMessage('[INFO] - Done');
1394  $options['records'] = $records;
1395  $genotype_total += $genotype_count;
1396  tpps_job_logger_write('[INFO] - Genotype count:' . $genotype_count);
1397  $job->logMessage('[INFO] - Genotype count:' . $genotype_count);
1398  $genotype_count = 0;
1399  }
1400 
1401  if (!empty($genotype['files']['file-type']['Assay Design']) and $genotype['marker-type']['SNPs']) {
1402  if ($genotype['files']['assay-load'] == 'new') {
1403  $design_fid = $genotype['files']['assay-design'];
1404  }
1405  if ($genotype['files']['assay-load'] != 'new') {
1406  $design_fid = $genotype['files']['assay-load'];
1407  }
1408  tpps_add_project_file($form_state, $design_fid);
1409  }
1410 
1411  if (!empty($genotype['files']['file-type']['SSRs/cpSSRs Genotype Spreadsheet'])) {
1412  $ssr_fid = $genotype['files']['ssrs'];
1413  tpps_add_project_file($form_state, $ssr_fid);
1414 
1415  $options['type'] = 'ssrs';
1416  $options['headers'] = tpps_ssrs_headers($ssr_fid, $genotype['files']['ploidy']);
1417  $options['marker'] = $genotype['SSRs/cpSSRs'];
1418  $options['type_cvterm'] = tpps_load_cvterm('ssr')->cvterm_id;
1419  $options['empty'] = $genotype['files']['ssrs-empty'];
1420  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1421  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1422  tpps_file_iterator($ssr_fid, 'tpps_process_genotype_spreadsheet', $options);
1423  tpps_job_logger_write('[INFO] - Done.');
1424  $job->logMessage('[INFO] - Done.');
1425 
1426  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1427  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1428  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1429  tpps_job_logger_write('[INFO] - Done');
1430  $job->logMessage('[INFO] - Done.');
1431  $options['records'] = $records;
1432  $genotype_count = 0;
1433 
1434  if (!empty($genotype['files']['ssr-extra-check'])) {
1435  $extra_fid = $genotype['files']['ssrs_extra'];
1436  tpps_add_project_file($form_state, $extra_fid);
1437 
1438  $options['marker'] = $genotype['files']['extra-ssr-type'];
1439  $options['headers'] = tpps_ssrs_headers($extra_fid, $genotype['files']['extra-ploidy']);
1440  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1441  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1442  tpps_file_iterator($extra_fid, 'tpps_process_genotype_spreadsheet', $options);
1443  tpps_job_logger_write('[INFO] - Done.');
1444  $job->logMessage('[INFO] - Done.');
1445 
1446  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1447  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1448  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1449  tpps_job_logger_write('[INFO] - Done.');
1450  $job->logMessage('[INFO] - Done.');
1451  $options['records'] = $records;
1452  $genotype_count = 0;
1453  }
1454  }
1455 
1456  if (!empty($genotype['files']['file-type']['Indel Genotype Spreadsheet'])) {
1457  $indel_fid = $genotype['files']['indels'];
1458  tpps_add_project_file($form_state, $indel_fid);
1459 
1460  $options['type'] = 'indel';
1461  $options['headers'] = tpps_file_headers($indel_fid);
1462  $options['marker'] = 'Indel';
1463  $options['type_cvterm'] = tpps_load_cvterm('indel')->cvterm_id;
1464  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1465  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1466  tpps_file_iterator($indel_fid, 'tpps_process_genotype_spreadsheet', $options);
1467  tpps_job_logger_write('[INFO] - Done.');
1468  $job->logMessage('[INFO] - Done.');
1469 
1470  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1471  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1472  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1473  tpps_job_logger_write('[INFO] - Done.');
1474  $job->logMessage('[INFO] - Done.');
1475  $options['records'] = $records;
1476  $genotype_total += $genotype_count;
1477  tpps_job_logger_write('[INFO] - Genotype count:' . $genotype_total);
1478  $job->logMessage('[INFO] - Genotype count:' . $genotype_total);
1479  $genotype_count = 0;
1480  }
1481 
1482  if (!empty($genotype['files']['file-type']['Other Marker Genotype Spreadsheet'])) {
1483  $other_fid = $genotype['files']['other'];
1484  tpps_add_project_file($form_state, $other_fid);
1485 
1486  $options['headers'] = tpps_file_headers($other_fid);
1487  if (!empty($genotype['files']['other-groups'])) {
1488  $groups = $genotype['files']['other-groups'];
1489  $options['headers'] = tpps_other_marker_headers($other_fid, $groups['Genotype Data'][0]);
1490  $options['tree_id'] = $groups['Tree Id'][1];
1491  }
1492 
1493  $options['type'] = 'other';
1494  $options['marker'] = $genotype['other-marker'];
1495  $options['type_cvterm'] = tpps_load_cvterm('genetic_marker')->cvterm_id;
1496  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1497  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1498  tpps_file_iterator($other_fid, 'tpps_process_genotype_spreadsheet', $options);
1499  tpps_job_logger_write('[INFO] - Done.');
1500  $job->logMessage('[INFO] - Done.');
1501 
1502  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1503  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1504  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1505  tpps_job_logger_write('[INFO] - Done.');
1506  $job->logMessage('[INFO] - Done.');
1507  $options['records'] = $records;
1508  $genotype_count = 0;
1509  }
1510 
1511  // check to make sure admin has not set disable_vcf_importing
1512  $disable_vcf_import = 0;
1513  if(isset($firstpage['disable_vcf_import'])) {
1514  $disable_vcf_import = $firstpage['disable_vcf_import'];
1515  }
1516  tpps_job_logger_write('[INFO] Disable VCF Import is set to ' . $disable_vcf_import . ' (0 means allow vcf import, 1 ignore vcf import)');
1517 
1518 
1519  if (!empty($genotype['files']['file-type']['VCF'])) {
1520  if($disable_vcf_import == 0) {
1521  // @todo we probably want to use tpps_file_iterator to parse vcf files.
1522  $vcf_fid = $genotype['files']['vcf'];
1523  tpps_add_project_file($form_state, $vcf_fid);
1524 
1525  $marker = 'SNP';
1526 
1527  $records['genotypeprop'] = array();
1528 
1529  $snp_cvterm = tpps_load_cvterm('snp')->cvterm_id;
1530  $format_cvterm = tpps_load_cvterm('format')->cvterm_id;
1531  $qual_cvterm = tpps_load_cvterm('quality_value')->cvterm_id;
1532  $filter_cvterm = tpps_load_cvterm('filter')->cvterm_id;
1533  $freq_cvterm = tpps_load_cvterm('allelic_frequency')->cvterm_id;
1534  $depth_cvterm = tpps_load_cvterm('read_depth')->cvterm_id;
1535  $n_sample_cvterm = tpps_load_cvterm('number_samples')->cvterm_id;
1536 
1537  $vcf_file = file_load($vcf_fid);
1538  $location = tpps_get_location($vcf_file->uri);
1539  echo "VCF location: $location\n";
1540 
1541  $vcf_content = gzopen($location, 'r');
1542  $stocks = array();
1543  $format = "";
1544  $current_id = $form_state['ids']['organism_ids'][$i];
1545  $species_code = $species_codes[$current_id];
1546 
1547  // dpm('start: ' . date('r'));.
1548  echo "[INFO] Processing Genotype VCF file\n";
1549  $file_progress_line_count = 0;
1550  $record_count = 0;
1551  while (($vcf_line = gzgets($vcf_content)) !== FALSE) {
1552  $file_progress_line_count++;
1553  if($file_progress_line_count % 10000 == 0 && $file_progress_line_count != 0) {
1554  echo '[INFO] [VCF PROCESSING STATUS] ' . $file_progress_line_count . " lines done\n";
1555  }
1556  if ($vcf_line[0] != '#' && stripos($vcf_line,'.vcf') === FALSE && trim($vcf_line) != "" && str_replace("\0", "", $vcf_line) != "") {
1557  $line_process_start_time = microtime(true);
1558  $record_count = $record_count + 1;
1559  print_r('Record count:' . $record_count . "\n");
1560  $genotype_count += count($stocks);
1561  $vcf_line = explode("\t", $vcf_line);
1562  $scaffold_id = &$vcf_line[0];
1563  $position = &$vcf_line[1];
1564  $variant_name = &$vcf_line[2];
1565  $ref = &$vcf_line[3];
1566  $alt = &$vcf_line[4];
1567  $qual = &$vcf_line[5];
1568  $filter = &$vcf_line[6];
1569  $info = &$vcf_line[7];
1570 
1571  if (empty($variant_name) or $variant_name == '.') {
1572  // $variant_name = "{$scaffold_id}{$position}$ref:$alt";
1573  $variant_name = $scaffold_id . '_' . $position . 'SNP';
1574  }
1575  // $marker_name = $variant_name . $marker; // Original by Peter
1576  $marker_name = $scaffold_id . '_' . $position; // Emily updated suggestion on Tuesday August 9th 2022
1577  $description = "$ref:$alt";
1578  // $genotype_name = "$marker-$species_code-$scaffold_id-$position"; // Original by Peter
1579 
1580  // Instead, we have multiple genotypes we need to generate, so lets do a key val array
1581  $detected_genotypes = array();
1582  $first_genotypes = array(); // used to save the first genotype in each row of the VCF (used for genotype_call table)
1583  $count_columns = count($vcf_line);
1584  for ($j = 9; $j < $count_columns; $j++) {
1585 
1586  $genotype_combination = tpps_submit_vcf_render_genotype_combination($vcf_line[$j], $ref, $alt);
1587 
1588  $detected_genotypes[$marker_name . $genotype_combination] = TRUE;
1589 
1590  // Record the first genotype name to use for genotype_call table
1591  if($j == 9) {
1592  // print_r('[First Genotype]:' . $marker_name . $genotype_combination . "\n");
1593  $first_genotypes[$marker_name . $genotype_combination] = TRUE;
1594  }
1595 
1596  }
1597 
1598  // print_r('[New Feature]: ' . $marker_name . "\n");
1599  $records['feature'][$marker_name] = array(
1600  'organism_id' => $current_id,
1601  'uniquename' => $marker_name,
1602  'type_id' => $seq_var_cvterm,
1603  );
1604 
1605  // print_r('[New Feature variant_name]: ' . $variant_name . "\n");
1606  $records['feature'][$variant_name] = array(
1607  'organism_id' => $current_id,
1608  'uniquename' => $variant_name,
1609  'type_id' => $seq_var_cvterm,
1610  );
1611 
1612  // Rish 12/08/2022: So we have multiple genotypes created
1613  // So I adjusted some of this code into a for statement
1614  // since the genotype_desc seems important and so I modified it to be unique
1615  // and based on the genotype_name
1616  $genotype_names = array_keys($detected_genotypes);
1617 
1618  // print_r($detected_genotypes);
1619  echo "\n";
1620  echo "line#$file_progress_line_count ";
1621  print_r('genotypes per line: ' . count($genotype_names) . " ");
1622 
1623  $genotype_name_progress_count = 0;
1624  foreach ($genotype_names as $genotype_name) {
1625  $genotype_name_progress_count++;
1626  $genotype_desc = "$marker-$species_code-$genotype_name-$position-$description";
1627  // print_r('[DEBUG: Genotype] genotype_name: ' . $genotype_name . ' ' . 'genotype_desc: ' . $genotype_desc . "\n");
1628 
1629 
1630  $records['genotype'][$genotype_desc] = array(
1631  'name' => $genotype_name,
1632  'uniquename' => $genotype_desc,
1633  'description' => $description,
1634  'type_id' => $snp_cvterm,
1635  );
1636 
1637  if ($format != "") {
1638  $records['genotypeprop']["$genotype_desc-format"] = array(
1639  'type_id' => $format_cvterm,
1640  'value' => $format,
1641  '#fk' => array(
1642  'genotype' => $genotype_desc,
1643  ),
1644  );
1645  }
1646 
1647  $vcf_cols_count = count($vcf_line);
1648 
1649  echo "gen_name_index:$genotype_name_progress_count colcount:$vcf_cols_count ";
1650  for ($j = 9; $j < $vcf_cols_count; $j++) {
1651  // Rish: This was added on 09/12/2022
1652  // This gets the name of the current genotype for the tree_id column
1653  // being checked.
1654  $column_genotype_name = $marker_name . tpps_submit_vcf_render_genotype_combination($vcf_line[$j], $ref, $alt);
1655  if($column_genotype_name == $genotype_name) {
1656  // Found a match between the tree_id genotype and the genotype_name from records
1657 
1658  // print_r('[genotype_call insert]: ' . "{$stocks[$j - 9]}-$genotype_name" . "\n");
1659  $records['genotype_call']["{$stocks[$j - 9]}-$genotype_name"] = array(
1660  'project_id' => $project_id,
1661  'stock_id' => $stocks[$j - 9],
1662  '#fk' => array(
1663  'genotype' => $genotype_desc,
1664  'variant' => $variant_name,
1665  'marker' => $marker_name,
1666  ),
1667  );
1668 
1669  $records['stock_genotype']["{$stocks[$j - 9]}-$genotype_name"] = array(
1670  'stock_id' => $stocks[$j - 9],
1671  '#fk' => array(
1672  'genotype' => $genotype_desc,
1673  ),
1674  );
1675  }
1676 
1677  }
1678 
1679  // Quality score.
1680  $records['genotypeprop']["$genotype_desc-qual"] = array(
1681  'type_id' => $qual_cvterm,
1682  'value' => $qual,
1683  '#fk' => array(
1684  'genotype' => $genotype_desc,
1685  ),
1686  );
1687 
1688  // filter: pass/fail.
1689  $records['genotypeprop']["$genotype_desc-filter"] = array(
1690  'type_id' => $filter_cvterm,
1691  'value' => ($filter == '.') ? "P" : "NP",
1692  '#fk' => array(
1693  'genotype' => $genotype_desc,
1694  ),
1695  );
1696 
1697  // Break up info column.
1698  $info_vals = explode(";", $info);
1699  foreach ($info_vals as $key => $val) {
1700  $parts = explode("=", $val);
1701  unset($info_vals[$key]);
1702  $info_vals[$parts[0]] = isset($parts[1]) ? $parts[1] : '';
1703  }
1704 
1705  // Allele frequency, assuming that the info code for allele
1706  // frequency is 'AF'.
1707  if (isset($info_vals['AF']) and $info_vals['AF'] != '') {
1708  $records['genotypeprop']["$genotype_desc-freq"] = array(
1709  'type_id' => $freq_cvterm,
1710  'value' => $info_vals['AF'],
1711  '#fk' => array(
1712  'genotype' => $genotype_desc,
1713  ),
1714  );
1715  }
1716 
1717  // Depth coverage, assuming that the info code for depth coverage is
1718  // 'DP'.
1719  if (isset($info_vals['DP']) and $info_vals['DP'] != '') {
1720  $records['genotypeprop']["$genotype_desc-depth"] = array(
1721  'type_id' => $depth_cvterm,
1722  'value' => $info_vals['DP'],
1723  '#fk' => array(
1724  'genotype' => $genotype_desc,
1725  ),
1726  );
1727  }
1728 
1729  // Number of samples, assuming that the info code for number of
1730  // samples is 'NS'.
1731  if (isset($info_vals['NS']) and $info_vals['NS'] != '') {
1732  $records['genotypeprop']["$genotype_desc-n_sample"] = array(
1733  'type_id' => $n_sample_cvterm,
1734  'value' => $info_vals['NS'],
1735  '#fk' => array(
1736  'genotype' => $genotype_desc,
1737  ),
1738  );
1739  }
1740  }
1741  $line_process_end_time = microtime(true);
1742  $line_process_elapsed_time = $line_process_end_time - $line_process_start_time;
1743  echo " PHP Proctime: $line_process_elapsed_time seconds\n";
1744  if(!isset($line_process_cumulative_time)) {
1745  $line_process_cumulative_time = 0;
1746  }
1747  $line_process_cumulative_time += $line_process_elapsed_time;
1748  echo "Cumulative PHP proctime: " . $line_process_cumulative_time . " seconds\n";
1749  echo "\nGenotype call records to insert (LINE:$file_progress_line_count): " . count($records['genotype_call']);
1750  echo "\nrecord group threshold: $record_group ";
1751  // throw new Exception('DEBUG');
1752  // Tripal Job has issues when all submissions are made at the same
1753  // time, so break them up into groups of 10,000 genotypes along with
1754  // their relevant genotypeprops.
1755  if ($genotype_count > $record_group) {
1756  tpps_job_logger_write('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1757  $job->logMessage('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1758  tpps_job_logger_write('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1759  $job->logMessage('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1760  tpps_job_logger_write('[INFO] - Last insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1761  $job->logMessage('[INFO] - Last insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1762  $genotype_count = 0;
1763  $insert_start_time = microtime(true);
1764  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1765  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1766  tpps_chado_insert_multi($records, $multi_insert_options);
1767  tpps_job_logger_write('[INFO] - Done.');
1768  $job->logMessage('[INFO] - Done.');
1769  $insert_end_time = microtime(true);
1770  $insert_elapsed_time = $insert_end_time - $insert_start_time;
1771  tpps_job_logger_write('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1772  $job->logMessage('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1773  tpps_job_logger_write('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1774  $job->logMessage('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1775  if(!isset($insert_cumulative_time)) {
1776  $insert_cumulative_time = 0;
1777  }
1778  $insert_cumulative_time += $insert_elapsed_time;
1779  tpps_job_logger_write('[INFO] - Insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1780  $job->logMessage('[INFO] - Insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1781  // throw new Exception('DEBUG');
1782  $records = array(
1783  'feature' => array(),
1784  'genotype' => array(),
1785  'genotype_call' => array(),
1786  'genotypeprop' => array(),
1787  'stock_genotype' => array(),
1788  );
1789  $genotype_count = 0;
1790  }
1791  }
1792  elseif (preg_match('/##FORMAT=/', $vcf_line)) {
1793  $format .= substr($vcf_line, 9, -1);
1794  }
1795  elseif (preg_match('/#CHROM/', $vcf_line)) {
1796  $vcf_line = explode("\t", $vcf_line);
1797  for ($j = 9; $j < count($vcf_line); $j++) {
1798  $stocks[] = $form_state['tree_info'][trim($vcf_line[$j])]['stock_id'];
1799  }
1800  }
1801  }
1802  // Insert the last set of values.
1803  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1804  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1805  tpps_chado_insert_multi($records, $multi_insert_options);
1806  tpps_job_logger_write('[INFO] - Done.');
1807  $job->logMessage('[INFO] - Done.');
1808  unset($records);
1809  $genotype_count = 0;
1810  // dpm('done: ' . date('r'));.
1811  }
1812  }
1813 }
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_ssrs_headers($fid, $ploidy)
tpps_other_marker_headers($fid, array $cols)
tpps_add_project_file(array &$state, &$fid)
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_submission_add_tag($accession, $tag)
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:972
const TPPS_PAGE_4
Definition: tpps.module:15
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
tpps_get_location($location)
Definition: file_utils.inc:640
tpps_file_iterator($fid, $function, array &$options=array())
tpps_submit_vcf_render_genotype_combination($raw_value, $ref, $alt)

◆ tpps_submit_page_1()

tpps_submit_page_1 ( array &  $form_state,
TripalJob &  $job = NULL 
)

Submits Publication and Species data to the database.

Parameters
array$form_stateThe state of the form being submitted.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 189 of file submit_all.php.

189  {
190 
191  $dbxref_id = $form_state['dbxref_id'];
192  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
193  $thirdpage = $form_state['saved_values'][TPPS_PAGE_3];
194  $seconds = $firstpage['publication']['secondaryAuthors'];
195 
196  tpps_chado_insert_record('project_dbxref', array(
197  'project_id' => $form_state['ids']['project_id'],
198  'dbxref_id' => $dbxref_id,
199  'is_current' => TRUE,
200  ));
201 
202  if (!empty($form_state['tpps_type']) and $form_state['tpps_type'] == 'tppsc' and !empty($form_state['saved_values'][TPPS_PAGE_1]['doi'])) {
203  $dryad_db = chado_get_db(array('name' => 'dryad'));
204  $dryad_dbxref = chado_insert_dbxref(array(
205  'db_id' => $dryad_db->db_id,
206  'accession' => $form_state['saved_values'][TPPS_PAGE_1]['doi'],
207  ))->dbxref_id;
208  tpps_chado_insert_record('project_dbxref', array(
209  'project_id' => $form_state['ids']['project_id'],
210  'dbxref_id' => $dryad_dbxref,
211  'is_current' => TRUE,
212  ));
213  }
214 
215  if (!empty($firstpage['photo'])) {
216  tpps_add_project_file($form_state, $firstpage['photo']);
217  }
218 
219  $primary_author_id = tpps_chado_insert_record('contact', array(
220  'name' => $firstpage['primaryAuthor'],
221  'type_id' => tpps_load_cvterm('person')->cvterm_id,
222  ));
223 
224  tpps_chado_insert_record('project_contact', array(
225  'project_id' => $form_state['ids']['project_id'],
226  'contact_id' => $primary_author_id,
227  ));
228 
229  $authors = array($firstpage['primaryAuthor']);
230  if ($seconds['number'] != 0) {
231  for ($i = 1; $i <= $seconds['number']; $i++) {
232  if(!empty($seconds[$i]) || $seconds[$i] != "") {
233  tpps_chado_insert_record('contact', array(
234  'name' => $seconds[$i],
235  'type_id' => tpps_load_cvterm('person')->cvterm_id,
236  ));
237 
238  $names = explode(" ", $seconds[$i]);
239  $first_name = implode(" ", array_slice($names, 0, -1));
240  $last_name = end($names);
241  $pubauthors[] = array(
242  'rank' => "$i",
243  'surname' => $last_name,
244  'givennames' => $first_name,
245  );
246  $authors[] = $seconds[$i];
247  }
248  else {
249  tpps_job_logger_write('[INFO] - Secondary publishers error - found an empty secondary publisher name. Ignoring this input.');
250  $job->logMessage('[INFO] - Secondary publishers error - found an empty secondary publisher name. Ignoring this input.');
251  // throw new Exception("Seconds[$i]" . $seconds[$i]);
252  }
253  }
254  }
255 
256  $publication_id = tpps_chado_insert_record('pub', array(
257  'title' => $firstpage['publication']['title'],
258  'series_name' => $firstpage['publication']['journal'],
259  'type_id' => tpps_load_cvterm('article')->cvterm_id,
260  'pyear' => $firstpage['publication']['year'],
261  'uniquename' => implode('; ', $authors) . " {$firstpage['publication']['title']}. {$firstpage['publication']['journal']}; {$firstpage['publication']['year']}",
262  ));
263  $form_state['ids']['pub_id'] = $publication_id;
264  tpps_tripal_entity_publish('Publication', array(
265  $firstpage['publication']['title'],
266  $publication_id,
267  ));
268  $form_state['pyear'] = $firstpage['publication']['year'];
269  $form_state['journal'] = $firstpage['publication']['journal'];
270 
271  if (!empty($firstpage['publication']['abstract'])) {
272  tpps_chado_insert_record('pubprop', array(
273  'pub_id' => $publication_id,
274  'type_id' => tpps_load_cvterm('abstract')->cvterm_id,
275  'value' => $firstpage['publication']['abstract'],
276  ));
277  }
278 
279  tpps_chado_insert_record('pubprop', array(
280  'pub_id' => $publication_id,
281  'type_id' => tpps_load_cvterm('authors')->cvterm_id,
282  'value' => implode(', ', $authors),
283  ));
284  $form_state['authors'] = $authors;
285 
286  tpps_chado_insert_record('project_pub', array(
287  'project_id' => $form_state['ids']['project_id'],
288  'pub_id' => $publication_id,
289  ));
290 
291  if (!empty($firstpage['organization'])) {
292  $organization_id = tpps_chado_insert_record('contact', array(
293  'name' => $firstpage['organization'],
294  'type_id' => tpps_load_cvterm('organization')->cvterm_id,
295  ));
296 
297  tpps_chado_insert_record('contact_relationship', array(
298  'type_id' => tpps_load_cvterm('contact_part_of')->cvterm_id,
299  'subject_id' => $primary_author_id,
300  'object_id' => $organization_id,
301  ));
302  }
303 
304  $names = explode(" ", $firstpage['primaryAuthor']);
305  $first_name = implode(" ", array_slice($names, 0, -1));
306  $last_name = end($names);
307 
308  tpps_chado_insert_record('pubauthor', array(
309  'pub_id' => $publication_id,
310  'rank' => '0',
311  'surname' => $last_name,
312  'givennames' => $first_name,
313  ));
314 
315  if (!empty($pubauthors)) {
316  foreach ($pubauthors as $info) {
317  $info['pub_id'] = $publication_id;
318  tpps_chado_insert_record('pubauthor', $info);
319  }
320  }
321 
322  $form_state['ids']['organism_ids'] = array();
323  $organism_number = $firstpage['organism']['number'];
324 
325  for ($i = 1; $i <= $organism_number; $i++) {
326  $parts = explode(" ", $firstpage['organism'][$i]['name']);
327  $genus = $parts[0];
328  $species = implode(" ", array_slice($parts, 1));
329  $infra = NULL;
330  if (isset($parts[2]) and ($parts[2] == 'var.' or $parts[2] == 'subsp.')) {
331  $infra = implode(" ", array_slice($parts, 2));
332  }
333 
334  $record = array(
335  'genus' => $genus,
336  'species' => $species,
337  'infraspecific_name' => $infra,
338  );
339 
340  if (preg_match('/ x /', $species)) {
341  $record['type_id'] = tpps_load_cvterm('speciesaggregate')->cvterm_id;
342  }
343  $form_state['ids']['organism_ids'][$i] = tpps_chado_insert_record('organism', $record);
344 
345  if (!empty(tpps_load_cvterm('Type'))) {
346  tpps_chado_insert_record('organismprop', array(
347  'organism_id' => $form_state['ids']['organism_ids'][$i],
348  'type_id' => tpps_load_cvterm('Type')->cvterm_id,
349  'value' => $firstpage['organism'][$i]['is_tree'] ? 'Tree' : 'Non-tree',
350  ));
351  }
352 
353  if ($organism_number != 1) {
354  if (!empty($thirdpage['tree-accession']['check']) and empty($thirdpage['tree-accession']["species-$i"]['file'])) {
355  continue;
356  }
357 
358  if (empty($thirdpage['tree-accession']['check'])) {
359  $options = array(
360  'cols' => array(),
361  'search' => $firstpage['organism'][$i]['name'],
362  'found' => FALSE,
363  );
364  $tree_accession = $thirdpage['tree-accession']["species-1"];
365  $groups = $tree_accession['file-groups'];
366  if ($groups['Genus and Species']['#type'] == 'separate') {
367  $options['cols']['genus'] = $groups['Genus and Species']['6'];
368  $options['cols']['species'] = $groups['Genus and Species']['7'];
369  }
370  if ($groups['Genus and Species']['#type'] != 'separate') {
371  $options['cols']['org'] = $groups['Genus and Species']['10'];
372  }
373  $fid = $tree_accession['file'];
374  tpps_file_iterator($fid, 'tpps_check_organisms', $options);
375  if (!$options['found']) {
376  continue;
377  }
378  }
379  }
380 
381  $code_exists = tpps_chado_prop_exists('organism', $form_state['ids']['organism_ids'][$i], 'organism 4 letter code');
382 
383  if (!$code_exists) {
384  foreach (tpps_get_species_codes($genus, $species) as $trial_code) {
385  $new_code_query = chado_select_record('organismprop', array('value'), array(
386  'type_id' => tpps_load_cvterm('organism 4 letter code')->cvterm_id,
387  'value' => $trial_code,
388  ));
389 
390  if (empty($new_code_query)) {
391  break;
392  }
393  }
394 
395  tpps_chado_insert_record('organismprop', array(
396  'organism_id' => $form_state['ids']['organism_ids'][$i],
397  'type_id' => tpps_load_cvterm('organism 4 letter code')->cvterm_id,
398  'value' => $trial_code,
399  ));
400  }
401 
402  $ranks = array(
403  'family',
404  'order',
405  'subkingdom',
406  );
407 
408  foreach ($ranks as $rank) {
409  $exists = tpps_chado_prop_exists('organism', $form_state['ids']['organism_ids'][$i], $rank);
410  if (!$exists) {
411  $taxon = tpps_get_taxon($firstpage['organism'][$i]['name'], $rank);
412  if ($taxon) {
413  tpps_chado_insert_record('organismprop', array(
414  'organism_id' => $form_state['ids']['organism_ids'][$i],
415  'type_id' => tpps_load_cvterm($rank)->cvterm_id,
416  'value' => $taxon,
417  ));
418  }
419  }
420  }
421 
422  tpps_chado_insert_record('project_organism', array(
423  'organism_id' => $form_state['ids']['organism_ids'][$i],
424  'project_id' => $form_state['ids']['project_id'],
425  ));
426 
427  tpps_chado_insert_record('pub_organism', array(
428  'organism_id' => $form_state['ids']['organism_ids'][$i],
429  'pub_id' => $publication_id,
430  ));
431 
432  tpps_tripal_entity_publish('Organism', array(
433  "$genus $species",
434  $form_state['ids']['organism_ids'][$i],
435  ));
436  }
437 }
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_add_project_file(array &$state, &$fid)
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_get_species_codes($genus, $species)
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_tripal_entity_publish($bundle_name, array $vals, array $options=array())
tpps_chado_prop_exists($base_table, $id, $name, array $options=array())
tpps_get_taxon($org_name, $rank)
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
const TPPS_PAGE_3
Definition: tpps.module:14
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_submit_page_2()

tpps_submit_page_2 ( array &  $form_state,
TripalJob &  $job = NULL 
)

Submits Study Design data to the database.

Parameters
array$form_stateThe state of the form being submitted.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 447 of file submit_all.php.

447  {
448 
449  $secondpage = $form_state['saved_values'][TPPS_PAGE_2];
450 
451  if (!empty($secondpage['StartingDate'])) {
452  tpps_chado_insert_record('projectprop', array(
453  'project_id' => $form_state['ids']['project_id'],
454  'type_id' => tpps_load_cvterm('study_start')->cvterm_id,
455  'value' => $secondpage['StartingDate']['month'] . " " . $secondpage['StartingDate']['year'],
456  ));
457 
458  tpps_chado_insert_record('projectprop', array(
459  'project_id' => $form_state['ids']['project_id'],
460  'type_id' => tpps_load_cvterm('study_end')->cvterm_id,
461  'value' => $secondpage['EndingDate']['month'] . " " . $secondpage['EndingDate']['year'],
462  ));
463  }
464 
465  tpps_chado_insert_record('projectprop', array(
466  'project_id' => $form_state['ids']['project_id'],
467  'type_id' => tpps_load_cvterm('association_results_type')->cvterm_id,
468  'value' => $secondpage['data_type'],
469  ));
470 
471  $studytype_options = array(
472  0 => '- Select -',
473  1 => 'Natural Population (Landscape)',
474  2 => 'Growth Chamber',
475  3 => 'Greenhouse',
476  4 => 'Experimental/Common Garden',
477  5 => 'Plantation',
478  );
479 
480  tpps_chado_insert_record('projectprop', array(
481  'project_id' => $form_state['ids']['project_id'],
482  'type_id' => tpps_load_cvterm('study_type')->cvterm_id,
483  'value' => $studytype_options[$secondpage['study_type']],
484  ));
485 
486  if (!empty($secondpage['study_info']['season'])) {
487  $seasons = implode($secondpage['study_info']['season']);
488 
489  tpps_chado_insert_record('projectprop', array(
490  'project_id' => $form_state['ids']['project_id'],
491  'type_id' => tpps_load_cvterm('assession_season')->cvterm_id,
492  'value' => $seasons,
493  ));
494  }
495 
496  if (!empty($secondpage['study_info']['assessions'])) {
497  tpps_chado_insert_record('projectprop', array(
498  'project_id' => $form_state['ids']['project_id'],
499  'type_id' => tpps_load_cvterm('assession_number')->cvterm_id,
500  'value' => $secondpage['study_info']['assessions'],
501  ));
502  }
503 
504  if (!empty($secondpage['study_info']['temp'])) {
505  tpps_chado_insert_record('projectprop', array(
506  'project_id' => $form_state['ids']['project_id'],
507  'type_id' => tpps_load_cvterm('temperature_high')->cvterm_id,
508  'value' => $secondpage['study_info']['temp']['high'],
509  ));
510 
511  tpps_chado_insert_record('projectprop', array(
512  'project_id' => $form_state['ids']['project_id'],
513  'type_id' => tpps_load_cvterm('temperature_low')->cvterm_id,
514  'value' => $secondpage['study_info']['temp']['low'],
515  ));
516  }
517 
518  $types = array(
519  'co2',
520  'humidity',
521  'light',
522  'salinity',
523  );
524 
525  foreach ($types as $type) {
526  if (!empty($secondpage['study_info'][$type])) {
527  $set = $secondpage['study_info'][$type];
528 
529  tpps_chado_insert_record('projectprop', array(
530  'project_id' => $form_state['ids']['project_id'],
531  'type_id' => tpps_load_cvterm("{$type}_control")->cvterm_id,
532  'value' => ($set['option'] == '1') ? 'True' : 'False',
533  ));
534 
535  if ($set['option'] == '1') {
536  tpps_chado_insert_record('projectprop', array(
537  'project_id' => $form_state['ids']['project_id'],
538  'type_id' => tpps_load_cvterm("{$type}_level")->cvterm_id,
539  'value' => $set['controlled'],
540  ));
541  }
542  elseif (!empty($set['uncontrolled'])) {
543  tpps_chado_insert_record('projectprop', array(
544  'project_id' => $form_state['ids']['project_id'],
545  'type_id' => tpps_load_cvterm("{$type}_level")->cvterm_id,
546  'value' => $set['uncontrolled'],
547  ));
548  }
549  }
550  }
551 
552  if (!empty($secondpage['study_info']['rooting'])) {
553  $root = $secondpage['study_info']['rooting'];
554 
555  tpps_chado_insert_record('projectprop', array(
556  'project_id' => $form_state['ids']['project_id'],
557  'type_id' => tpps_load_cvterm('rooting_type')->cvterm_id,
558  'value' => $root['option'],
559  ));
560 
561  if ($root['option'] == 'Soil') {
562  tpps_chado_insert_record('projectprop', array(
563  'project_id' => $form_state['ids']['project_id'],
564  'type_id' => tpps_load_cvterm('soil_type')->cvterm_id,
565  'value' => ($root['soil']['type'] == 'Other') ? $root['soil']['other'] : $root['soil']['type'],
566  ));
567 
568  tpps_chado_insert_record('projectprop', array(
569  'project_id' => $form_state['ids']['project_id'],
570  'type_id' => tpps_load_cvterm('soil_container')->cvterm_id,
571  'value' => $root['soil']['container'],
572  ));
573  }
574 
575  if (!empty($secondpage['study_info']['rooting']['ph'])) {
576  $set = $secondpage['study_info']['rooting']['ph'];
577 
578  tpps_chado_insert_record('projectprop', array(
579  'project_id' => $form_state['ids']['project_id'],
580  'type_id' => tpps_load_cvterm('pH_control')->cvterm_id,
581  'value' => ($set['option'] == '1') ? 'True' : 'False',
582  ));
583 
584  if ($set['option'] == '1') {
585  tpps_chado_insert_record('projectprop', array(
586  'project_id' => $form_state['ids']['project_id'],
587  'type_id' => tpps_load_cvterm('pH_level')->cvterm_id,
588  'value' => $set['controlled'],
589  ));
590  }
591  elseif (!empty($set['uncontrolled'])) {
592  tpps_chado_insert_record('projectprop', array(
593  'project_id' => $form_state['ids']['project_id'],
594  'type_id' => tpps_load_cvterm('pH_level')->cvterm_id,
595  'value' => $set['uncontrolled'],
596  ));
597  }
598  }
599 
600  $description = FALSE;
601  $rank = 0;
602  foreach ($root['treatment'] as $value) {
603  if (!$description) {
604  $record_next = ((bool) $value);
605  $description = TRUE;
606  continue;
607  }
608  if ($record_next) {
609  tpps_chado_insert_record('projectprop', array(
610  'project_id' => $form_state['ids']['project_id'],
611  'type_id' => tpps_load_cvterm('treatment')->cvterm_id,
612  'value' => $value,
613  'rank' => $rank,
614  ));
615  $rank++;
616  }
617  $description = FALSE;
618  }
619  }
620 }
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
const TPPS_PAGE_2
Definition: tpps.module:13

◆ tpps_submit_page_3()

tpps_submit_page_3 ( array &  $form_state,
TripalJob &  $job = NULL 
)

Submits Plant Accession data to the database.

Parameters
array$form_stateThe state of the form being submitted.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 630 of file submit_all.php.

630  {
631  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
632  $thirdpage = $form_state['saved_values'][TPPS_PAGE_3];
633  $organism_number = $firstpage['organism']['number'];
634  $form_state['locations'] = array();
635  $form_state['tree_info'] = array();
636  $stock_count = 0;
637  $loc_name = 'Location (latitude/longitude or country/state or population group)';
638 
639  if (!empty($thirdpage['skip_validation'])) {
640  tpps_submission_add_tag($form_state['accession'], 'No Location Information');
641  }
642 
643  if (!empty($thirdpage['study_location'])) {
644  $type = $thirdpage['study_location']['type'];
645  $locs = $thirdpage['study_location']['locations'];
646  $geo_api_key = variable_get('tpps_geocode_api_key', NULL);
647 
648  for ($i = 1; $i <= $locs['number']; $i++) {
649  if ($type !== '2') {
650  $standard_coordinate = explode(',', tpps_standard_coord($locs[$i]));
651  $latitude = $standard_coordinate[0];
652  $longitude = $standard_coordinate[1];
653 
654  tpps_chado_insert_record('projectprop', array(
655  'project_id' => $form_state['ids']['project_id'],
656  'type_id' => tpps_load_cvterm('gps_latitude')->cvterm_id,
657  'value' => $latitude,
658  'rank' => $i,
659  ));
660 
661  tpps_chado_insert_record('projectprop', array(
662  'project_id' => $form_state['ids']['project_id'],
663  'type_id' => tpps_load_cvterm('gps_longitude')->cvterm_id,
664  'value' => $longitude,
665  'rank' => $i,
666  ));
667  continue;
668  }
669  $loc = $locs[$i];
670  tpps_chado_insert_record('projectprop', array(
671  'project_id' => $form_state['ids']['project_id'],
672  'type_id' => tpps_load_cvterm('experiment_location')->cvterm_id,
673  'value' => $loc,
674  'rank' => $i,
675  ));
676 
677  if (isset($geo_api_key)) {
678  $query = urlencode($loc);
679  $url = "https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
680  $response = json_decode(file_get_contents($url));
681 
682  if ($response->total_results) {
683  $result = $response->results[0]->geometry;
684  $form_state['locations'][$loc] = $result;
685 
686  tpps_chado_insert_record('projectprop', array(
687  'project_id' => $form_state['ids']['project_id'],
688  'type_id' => tpps_load_cvterm('gps_latitude')->cvterm_id,
689  'value' => $result->lat,
690  'rank' => $i,
691  ));
692 
693  tpps_chado_insert_record('projectprop', array(
694  'project_id' => $form_state['ids']['project_id'],
695  'type_id' => tpps_load_cvterm('gps_longitude')->cvterm_id,
696  'value' => $result->lng,
697  'rank' => $i,
698  ));
699  }
700  }
701  }
702  }
703 
704  $cvterms = array(
705  'org' => tpps_load_cvterm('organism')->cvterm_id,
706  'clone' => tpps_load_cvterm('clone')->cvterm_id,
707  'has_part' => tpps_load_cvterm('has_part')->cvterm_id,
708  'lat' => tpps_load_cvterm('gps_latitude')->cvterm_id,
709  'lng' => tpps_load_cvterm('gps_longitude')->cvterm_id,
710  'country' => tpps_load_cvterm('country')->cvterm_id,
711  'state' => tpps_load_cvterm('state')->cvterm_id,
712  'county' => tpps_load_cvterm('county')->cvterm_id,
713  'district' => tpps_load_cvterm('district')->cvterm_id,
714  'loc' => tpps_load_cvterm('location')->cvterm_id,
715  'gps_type' => tpps_load_cvterm('gps_type')->cvterm_id,
716  'precision' => tpps_load_cvterm('gps_precision')->cvterm_id,
717  );
718 
719  $records = array(
720  'stock' => array(),
721  'stockprop' => array(),
722  'stock_relationship' => array(),
723  'project_stock' => array(),
724  );
725  $overrides = array(
726  'stock_relationship' => array(
727  'subject' => array(
728  'table' => 'stock',
729  'columns' => array(
730  'subject_id' => 'stock_id',
731  ),
732  ),
733  'object' => array(
734  'table' => 'stock',
735  'columns' => array(
736  'object_id' => 'stock_id',
737  ),
738  ),
739  ),
740  );
741 
742  $multi_insert_options = array(
743  'fk_overrides' => $overrides,
744  'fks' => 'stock',
745  'entities' => array(
746  'label' => 'Stock',
747  'table' => 'stock',
748  'prefix' => $form_state['accession'] . '-',
749  ),
750  );
751 
752  $names = array();
753  for ($i = 1; $i <= $organism_number; $i++) {
754  $names[$i] = $firstpage['organism'][$i]['name'];
755  }
756  $names['number'] = $firstpage['organism']['number'];
757  $options = array(
758  'cvterms' => $cvterms,
759  'records' => $records,
760  'overrides' => $overrides,
761  'locations' => &$form_state['locations'],
762  'accession' => $form_state['accession'],
763  'single_file' => empty($thirdpage['tree-accession']['check']),
764  'org_names' => $names,
765  'saved_ids' => &$form_state['ids'],
766  'stock_count' => &$stock_count,
767  'multi_insert' => $multi_insert_options,
768  'tree_info' => &$form_state['tree_info'],
769  'job' => &$job,
770  );
771 
772  for ($i = 1; $i <= $organism_number; $i++) {
773  $tree_accession = $thirdpage['tree-accession']["species-$i"];
774  $fid = $tree_accession['file'];
775 
776  tpps_add_project_file($form_state, $fid);
777 
778  $column_vals = $tree_accession['file-columns'];
779  $groups = $tree_accession['file-groups'];
780 
781  $options['org_num'] = $i;
782  $options['no_header'] = !empty($tree_accession['file-no-header']);
783  $options['empty'] = $tree_accession['file-empty'];
784  $options['pop_group'] = $tree_accession['pop-group'];
785  $options['exact'] = $tree_accession['exact_coords'] ?? NULL;
786  $options['precision'] = NULL;
787  if (!$options['exact']) {
788  $options['precision'] = $tree_accession['coord_precision'] ?? NULL;
789  if (!array_key_exists(tpps_get_tag_id('No Location Information'), tpps_submission_get_tags($form_state['accession']))) {
790  tpps_submission_add_tag($form_state['accession'], 'Approximate Coordinates');
791  }
792  }
793  $county = array_search('8', $column_vals);
794  $district = array_search('9', $column_vals);
795  $clone = array_search('13', $column_vals);
796  $options['column_ids'] = array(
797  'id' => $groups['Tree Id']['1'],
798  'lat' => $groups[$loc_name]['4'] ?? NULL,
799  'lng' => $groups[$loc_name]['5'] ?? NULL,
800  'country' => $groups[$loc_name]['2'] ?? NULL,
801  'state' => $groups[$loc_name]['3'] ?? NULL,
802  'county' => ($county !== FALSE) ? $county : NULL,
803  'district' => ($district !== FALSE) ? $district : NULL,
804  'clone' => ($clone !== FALSE) ? $clone : NULL,
805  'pop_group' => $groups[$loc_name]['12'] ?? NULL,
806  );
807 
808  if ($organism_number != 1 and empty($thirdpage['tree-accession']['check'])) {
809  if ($groups['Genus and Species']['#type'] == 'separate') {
810  $options['column_ids']['genus'] = $groups['Genus and Species']['6'];
811  $options['column_ids']['species'] = $groups['Genus and Species']['7'];
812  }
813  if ($groups['Genus and Species']['#type'] != 'separate') {
814  $options['column_ids']['org'] = $groups['Genus and Species']['10'];
815  }
816  }
817  tpps_job_logger_write('[INFO] - Processing accession file data...');
818  $job->logMessage('[INFO] - Processing accession file data...');
819  tpps_file_iterator($fid, 'tpps_process_accession', $options);
820  tpps_job_logger_write('[INFO] - Done.');
821  $job->logMessage('[INFO] - Done.');
822 
823  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
824  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
825  $new_ids = tpps_chado_insert_multi($options['records'], $multi_insert_options);
826  tpps_job_logger_write('[INFO] - Done.');
827  $job->logMessage('[INFO] - Done.');
828  foreach ($new_ids as $t_id => $stock_id) {
829  $form_state['tree_info'][$t_id]['stock_id'] = $stock_id;
830  }
831  unset($options['records']);
832  $stock_count = 0;
833  if (empty($thirdpage['tree-accession']['check'])) {
834  break;
835  }
836  }
837 
838  if (!empty($thirdpage['existing_trees'])) {
839  tpps_matching_trees($form_state['ids']['project_id']);
840  }
841 }
tpps_submission_get_tags($accession)
tpps_standard_coord($raw_coordinate)
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_get_tag_id($name)
tpps_add_project_file(array &$state, &$fid)
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_matching_trees($project_id)
Definition: match_trees.inc:14
tpps_submission_add_tag($accession, $tag)
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
const TPPS_PAGE_3
Definition: tpps.module:14
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_submit_page_4()

tpps_submit_page_4 ( array &  $form_state,
TripalJob &  $job = NULL 
)

Submits Tripal FASTAImporter job for reference genome.

The remaining data for the fourth page is submitted during the TPPS File Parsing Tripal Job due to its size.

Parameters
array$form_stateThe state of the form being submitted.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 854 of file submit_all.php.

854  {
855  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
856  $organism_number = $form_state['saved_values'][TPPS_PAGE_1]['organism']['number'];
857  $species_codes = array();
858 
859  for ($i = 1; $i <= $organism_number; $i++) {
860  // Get species codes.
861  $species_codes[$form_state['ids']['organism_ids'][$i]] = current(chado_select_record('organismprop', array('value'), array(
862  'type_id' => tpps_load_cvterm('organism 4 letter code')->cvterm_id,
863  'organism_id' => $form_state['ids']['organism_ids'][$i],
864  ), array(
865  'limit' => 1,
866  )))->value;
867 
868  // Submit importer jobs.
869  if (isset($fourthpage["organism-$i"]['genotype'])) {
870  $ref_genome = $fourthpage["organism-$i"]['genotype']['ref-genome'];
871 
872  if ($ref_genome === 'url' or $ref_genome === 'manual' or $ref_genome === 'manual2') {
873  // Create job for tripal fasta importer.
874  $class = 'FASTAImporter';
875  tripal_load_include_importer_class($class);
876 
877  $fasta = $fourthpage["organism-$i"]['genotype']['tripal_fasta'];
878 
879  $file_upload = isset($fasta['file']['file_upload']) ? trim($fasta['file']['file_upload']) : 0;
880  $file_existing = isset($fasta['file']['file_upload_existing']) ? trim($fasta['file']['file_upload_existing']) : 0;
881  $file_remote = isset($fasta['file']['file_remote']) ? trim($fasta['file']['file_remote']) : 0;
882  $analysis_id = $fasta['analysis_id'];
883  $seqtype = $fasta['seqtype'];
884  $organism_id = $form_state['ids']['organism_ids'][$i];
885  $re_accession = $fasta['db']['re_accession'];
886  $db_id = $fasta['db']['db_id'];
887 
888  $run_args = array(
889  'importer_class' => $class,
890  'file_remote' => $file_remote,
891  'analysis_id' => $analysis_id,
892  'seqtype' => $seqtype,
893  'organism_id' => $organism_id,
894  'method' => '2',
895  'match_type' => '0',
896  're_name' => '',
897  're_uname' => '',
898  're_accession' => $re_accession,
899  'db_id' => $db_id,
900  'rel_type' => '',
901  're_subject' => '',
902  'parent_type' => '',
903  );
904 
905  $file_details = array();
906 
907  if ($file_existing) {
908  $file_details['fid'] = $file_existing;
909  }
910  elseif ($file_upload) {
911  $file_details['fid'] = $file_upload;
912  }
913  elseif ($file_remote) {
914  $file_details['file_remote'] = $file_remote;
915  }
916 
917  try {
918  $importer = new $class();
919  $form = array();
920  $importer->formSubmit($form, $form_state);
921 
922  $importer->create($run_args, $file_details);
923 
924  $importer->submitJob();
925 
926  }
927  catch (Exception $ex) {
928  drupal_set_message(t('Cannot submit import: @msg', array('@msg' => $ex->getMessage())), 'error');
929  }
930  }
931  elseif ($ref_genome === 'bio') {
932  $eutils = $fourthpage["organism-$i"]['genotype']['tripal_eutils'];
933  $class = 'EutilsImporter';
934  tripal_load_include_importer_class($class);
935 
936  $run_args = array(
937  'importer_class' => $class,
938  'db' => $eutils['db'],
939  'accession' => $eutils['accession'],
940  'linked_records' => $eutils['options']['linked_records'],
941  );
942 
943  try {
944  $importer = new $class();
945  $importer->create($run_args);
946  $importer->submitJob();
947  }
948  catch (Exception $ex) {
949  drupal_set_message(t('Cannot submit BioProject: @msg', array('@msg' => $ex->getMessage())), 'error');
950  }
951  }
952  }
953  }
954 
955  $form_state['data']['phenotype'] = array();
956  $form_state['data']['phenotype_meta'] = array();
957 
958  // Submit raw data.
959  for ($i = 1; $i <= $organism_number; $i++) {
960  tpps_submit_phenotype($form_state, $i, $job);
961  tpps_submit_genotype($form_state, $species_codes, $i, $job);
962  tpps_submit_environment($form_state, $i, $job);
963  }
964 }
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_submit_environment(array &$form_state, $i, TripalJob &$job=NULL)
tpps_submit_genotype(array &$form_state, array $species_codes, $i, TripalJob &$job=NULL)
tpps_submit_phenotype(array &$form_state, $i, TripalJob &$job=NULL)
Definition: submit_all.php:976
const TPPS_PAGE_4
Definition: tpps.module:15
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)

◆ tpps_submit_phenotype()

tpps_submit_phenotype ( array &  $form_state,
  $i,
TripalJob &  $job = NULL 
)

Submits phenotype information for one species.

Parameters
array$form_stateThe TPPS submission object.
int$iThe organism number we are submitting.
TripalJob$jobThe TripalJob object for the submission job.

Definition at line 976 of file submit_all.php.

976  {
977  tpps_job_logger_write('[INFO] - Submitting phenotype data...');
978  $job->logMessage('[INFO] - Submitting phenotype data...');
979  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
980  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
981  $phenotype = $fourthpage["organism-$i"]['phenotype'] ?? NULL;
982  $organism_name = $firstpage['organism'][$i]['name'];
983  if (empty($phenotype)) {
984  return;
985  }
986  tpps_submission_add_tag($form_state['accession'], 'Phenotype');
987 
988  // Get appropriate cvterms.
989  $phenotype_cvterms = array(
990  'time' => tpps_load_cvterm('time')->cvterm_id,
991  'desc' => tpps_load_cvterm('description')->cvterm_id,
992  'unit' => tpps_load_cvterm('unit')->cvterm_id,
993  'min' => tpps_load_cvterm('minimum')->cvterm_id,
994  'max' => tpps_load_cvterm('maximum')->cvterm_id,
995  'environment' => tpps_load_cvterm('environment')->cvterm_id,
996  'intensity' => tpps_load_cvterm('intensity')->cvterm_id,
997  );
998 
999  $records = array(
1000  'phenotype' => array(),
1001  'phenotypeprop' => array(),
1002  'stock_phenotype' => array(),
1003  'phenotype_cvterm' => array(),
1004  );
1005  $phenotype_count = 0;
1006 
1007  $options = array(
1008  'records' => $records,
1009  'cvterms' => $phenotype_cvterms,
1010  'accession' => $form_state['accession'],
1011  'tree_info' => $form_state['tree_info'],
1012  'suffix' => 0,
1013  'phenotype_count' => $phenotype_count,
1014  'data' => &$form_state['data']['phenotype'],
1015  'job' => &$job,
1016  );
1017 
1018  if (!empty($phenotype['normal-check'])) {
1019  $phenotype_number = $phenotype['phenotypes-meta']['number'];
1020  $phenotypes_meta = array();
1021  $data_fid = $phenotype['file'];
1022  $phenos_edit = $form_state['phenotypes_edit'] ?? NULL;
1023 
1024  tpps_add_project_file($form_state, $data_fid);
1025 
1026  $env_phenotypes = FALSE;
1027  // Populate $phenotypes_meta with manually entered metadata.
1028  for ($j = 1; $j <= $phenotype_number; $j++) {
1029  $name = strtolower($phenotype['phenotypes-meta'][$j]['name']);
1030  if (!empty($phenos_edit[$j])) {
1031  // (Rish) BUGFIX related to sex -> age
1032  // keep track of the cvterm id
1033  $cvterm_id = $phenotype['phenotypes-meta'][$j]['attribute'];
1034  $result = $phenos_edit[$j] + $phenotype['phenotypes-meta'][$j];
1035  $phenotype['phenotypes-meta'][$j] = $result;
1036  // restore the cvterm_id from the original (since this is from verified cvterm table which populated the select list dropdown box on tpps form)
1037  $phenotype['phenotypes-meta'][$j]['attribute'] = $cvterm_id;
1038  }
1039  $phenotypes_meta[$name] = array();
1040  $phenotypes_meta[$name]['attr'] = $phenotype['phenotypes-meta'][$j]['attribute'];
1041  // print_r('LINE 1022:');
1042  // print_r($phenotype['phenotypes-meta'][$j]);
1043  if ($phenotype['phenotypes-meta'][$j]['attribute'] == 'other') {
1044  $phenotypes_meta[$name]['attr-other'] = $phenotype['phenotypes-meta'][$j]['attr-other'];
1045  }
1046  $phenotypes_meta[$name]['desc'] = $phenotype['phenotypes-meta'][$j]['description'];
1047  $phenotypes_meta[$name]['unit'] = $phenotype['phenotypes-meta'][$j]['units'];
1048  if ($phenotype['phenotypes-meta'][$j]['units'] == 'other') {
1049  $phenotypes_meta[$name]['unit-other'] = $phenotype['phenotypes-meta'][$j]['unit-other'];
1050  }
1051  $phenotypes_meta[$name]['struct'] = $phenotype['phenotypes-meta'][$j]['structure'];
1052  if ($phenotype['phenotypes-meta'][$j]['structure'] == 'other') {
1053  $phenotypes_meta[$name]['struct-other'] = $phenotype['phenotypes-meta'][$j]['struct-other'];
1054  }
1055  if (!empty($phenotype['phenotypes-meta'][$j]['val-check']) or !empty($phenotype['phenotypes-meta'][$j]['bin-check'] or $phenotype['phenotypes-meta'][$j]['units'] == tpps_load_cvterm('boolean')->cvterm_id)) {
1056  $phenotypes_meta[$name]['min'] = $phenotype['phenotypes-meta'][$j]['min'];
1057  $phenotypes_meta[$name]['max'] = $phenotype['phenotypes-meta'][$j]['max'];
1058  }
1059  $phenotypes_meta[$name]['env'] = !empty($phenotype['phenotypes-meta'][$j]['env-check']);
1060  if ($phenotypes_meta[$name]['env']) {
1061  $env_phenotypes = TRUE;
1062  }
1063  }
1064  if ($env_phenotypes) {
1065  tpps_submission_add_tag($form_state['accession'], 'Environment');
1066  }
1067 
1068  // throw new Exception('$phenotype[check]:' . $phenotype['check'] . "\n");
1069  if ($phenotype['check'] == '1' || $phenotype['check'] == 'upload_file') {
1070  $meta_fid = $phenotype['metadata'];
1071  print_r('META_FID:' . $meta_fid . "\n");
1072  // Added because 009 META FID was 0 which caused failures
1073  if ($meta_fid > 0) {
1074 
1075  tpps_add_project_file($form_state, $meta_fid);
1076 
1077  // Get metadata column values.
1078  $groups = $phenotype['metadata-groups'];
1079  $column_vals = $phenotype['metadata-columns'];
1080  $struct = array_search('5', $column_vals);
1081  $min = array_search('6', $column_vals);
1082  $max = array_search('7', $column_vals);
1083  $columns = array(
1084  'name' => $groups['Phenotype Id']['1'],
1085  'attr' => $groups['Attribute']['2'],
1086  'desc' => $groups['Description']['3'],
1087  'unit' => $groups['Units']['4'],
1088  'struct' => !empty($struct) ? $struct : NULL,
1089  'min' => !empty($min) ? $min : NULL,
1090  'max' => !empty($max) ? $max : NULL,
1091  );
1092 
1093  $meta_options = array(
1094  'no_header' => $phenotype['metadata-no-header'],
1095  'meta_columns' => $columns,
1096  'meta' => &$phenotypes_meta,
1097  );
1098 
1099  tpps_job_logger_write('[INFO] - Processing phenotype_meta file data...');
1100  $job->logMessage('[INFO] - Processing phenotype_meta file data...');
1101  tpps_file_iterator($meta_fid, 'tpps_process_phenotype_meta', $meta_options);
1102  tpps_job_logger_write('[INFO] - Done.');
1103  $job->logMessage('[INFO] - Done.');
1104  }
1105  else {
1106  tpps_job_logger_write('[WARNING] - phenotype_meta file id looks incorrect but the UI checkbox was selected. Need to double check this!');
1107  }
1108  }
1109 
1110  $time_options = array();
1111  if ($phenotype['time']['time-check']) {
1112  $time_options = $phenotype['time'];
1113  }
1114  tpps_refine_phenotype_meta($phenotypes_meta, $time_options, $job);
1115 
1116  // Get metadata header values.
1117  $groups = $phenotype['file-groups'];
1118  $column_vals = $phenotype['file-columns'];
1119  $time_index = ($phenotype['format'] == 0) ? '2' : '4';
1120  $clone_index = ($phenotype['format'] == 0) ? '3' : '5';
1121  $time = array_search($time_index, $column_vals);
1122  $clone = array_search($clone_index, $column_vals);
1123  $meta_headers = array(
1124  'name' => $groups['Phenotype Name/Identifier']['2'] ?? NULL,
1125  'value' => $groups['Phenotype Value(s)']['3'] ?? NULL,
1126  'time' => !empty($time) ? $time : NULL,
1127  'clone' => !empty($clone) ? $clone : NULL,
1128  );
1129 
1130  // Get data header values.
1131  if ($phenotype['format'] == 0) {
1132  $file_headers = tpps_file_headers($data_fid, $phenotype['file-no-header']);
1133  $data_columns = array();
1134  if(is_array($groups['Phenotype Data']['0']) && !empty($groups['Phenotype Data']['0'])) {
1135  foreach ($groups['Phenotype Data']['0'] as $col) {
1136  $data_columns[$col] = $file_headers[$col];
1137  }
1138  }
1139  else {
1140  $col = $groups['Phenotype Data'][0];
1141  $data_columns[$col] = $file_headers[$col];
1142  }
1143  unset($file_headers);
1144  }
1145 
1146  $options['no_header'] = $phenotype['file-no-header'];
1147  $options['tree_id'] = $groups['Tree Identifier']['1'];
1148  $options['meta_headers'] = $meta_headers;
1149  $options['data_columns'] = $data_columns ?? NULL;
1150  $options['meta'] = $phenotypes_meta;
1151  $options['file_empty'] = $phenotype['file-empty'];
1152  $options['organism_name'] = $organism_name;
1153 
1154  print_r('DATA_FID:' . $data_fid . "\n");
1155  tpps_job_logger_write('[INFO] - Processing phenotype_data file data...');
1156  $job->logMessage('[INFO] - Processing phenotype_data file data...');
1157  tpps_file_iterator($data_fid, 'tpps_process_phenotype_data', $options);
1158  $form_state['data']['phenotype_meta'] += $phenotypes_meta;
1159  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1160  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1161  // print_r($options['records']);
1162  tpps_chado_insert_multi($options['records']);
1163  tpps_job_logger_write('[INFO] - Done.');
1164  $job->logMessage('[INFO] - Done.');
1165  }
1166 
1167  if (!empty($phenotype['iso-check'])) {
1168  $iso_fid = $phenotype['iso'];
1169  tpps_add_project_file($form_state, $iso_fid);
1170 
1171  $options['iso'] = TRUE;
1172  $options['records'] = $records;
1173  $options['cvterms'] = $phenotype_cvterms;
1174  $options['file_headers'] = tpps_file_headers($iso_fid);
1175  $options['organism_name'] = $organism_name;
1176  $options['meta'] = array(
1177  'desc' => "Mass Spectrometry",
1178  'unit' => "intensity (arbitrary units)",
1179  'attr_id' => tpps_load_cvterm('intensity')->cvterm_id,
1180  );
1181 
1182  print_r('ISO_FID:' . $iso_fid . "\n");
1183  tpps_job_logger_write('[INFO] - Processing phenotype_data file data...');
1184  $job->logMessage('[INFO] - Processing phenotype_data file data...');
1185  tpps_file_iterator($iso_fid, 'tpps_process_phenotype_data', $options);
1186  tpps_job_logger_write('[INFO] - Inserting phenotype_data into database using insert_multi...');
1187  $job->logMessage('[INFO] - Inserting phenotype_data into database using insert_multi...');
1188  tpps_chado_insert_multi($options['records']);
1189  tpps_job_logger_write('[INFO] - Done.');
1190  $job->logMessage('[INFO] - Done.');
1191  }
1192 }
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_refine_phenotype_meta(array &$meta, array $time_options=array(), TripalJob &$job=NULL)
tpps_add_project_file(array &$state, &$fid)
tpps_submission_add_tag($accession, $tag)
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:972
const TPPS_PAGE_4
Definition: tpps.module:15
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_submit_summary()

tpps_submit_summary ( array &  $form_state)

Submits additional data provided in the summary page to the database.

Parameters
array$form_stateThe state of the form being submitted.

Definition at line 3155 of file submit_all.php.

3155  {
3156  $analysis_options = array(
3157  'diversity' => 'Diversity',
3158  'population_structure' => 'Population Structure',
3159  'association_genetics' => 'Association Genetics',
3160  'landscape_genomics' => 'Landscape Genomics',
3161  'phenotype_environment' => 'Phenotype-Environment',
3162  );
3163 
3164  foreach ($analysis_options as $option => $label) {
3165  if (!empty($form_state['saved_values']['summarypage']['analysis']["{$option}_check"])) {
3166  tpps_chado_insert_record('projectprop', array(
3167  'project_id' => $form_state['ids']['project_id'],
3168  'type_id' => tpps_load_cvterm('analysis_type')->cvterm_id,
3169  'value' => $label,
3170  ));
3171 
3172  $fid = $form_state['saved_values']['summarypage']['analysis']["{$option}_file"];
3173  if (!empty($fid)) {
3174  tpps_add_project_file($form_state, $fid);
3175 
3176  tpps_chado_insert_record('projectprop', array(
3177  'project_id' => $form_state['ids']['project_id'],
3178  'type_id' => tpps_load_cvterm('source_description')->cvterm_id,
3179  'value' => $form_state['saved_values']['summarypage']['analysis']["{$option}_file_description"],
3180  ));
3181  }
3182  }
3183  }
3184 
3185  if (!empty($form_state['saved_values']['summarypage']['tree_pictures'])) {
3186  foreach ($form_state['saved_values']['summarypage']['tree_pictures'] as $name => $fid) {
3187  if (substr($name, -4) == '_url' or substr($name, -12) == '_attribution' or substr($name, -8) == '_license') {
3188  continue;
3189  }
3190  if (!empty($fid)) {
3191  $form_state['file_info']['summarypage'][$fid] = implode('_', explode(' ', $name)) . '.jpg';
3192  if (db_table_exists('treepictures_metadata')) {
3193  db_insert('treepictures_metadata')
3194  ->fields(array('species', 'source', 'attribution', 'license'))
3195  ->values(array(
3196  'species' => $form_state['file_info']['summarypage'][$fid],
3197  'source' => $form_state['saved_values']['summarypage']['tree_pictures']["{$name}_url"],
3198  'attribution' => $form_state['saved_values']['summarypage']['tree_pictures']["{$name}_attribution"],
3199  'license' => $form_state['saved_values']['summarypage']['tree_pictures']["{$name}_license"],
3200  ))
3201  ->execute();
3202  }
3203  }
3204  }
3205  }
3206 }
tpps_add_project_file(array &$state, &$fid)
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)

◆ tpps_submit_vcf_render_genotype_combination()

tpps_submit_vcf_render_genotype_combination (   $raw_value,
  $ref,
  $alt 
)

Render genotype combination

Parameters
string$raw_valueTree ID genotype value from VCF file
string$refREF value
string$altALT value

Definition at line 2073 of file submit_all.php.

2073  {
2074  // $raw_value = $vcf_line[$j]; // format looks like this: 0/0:27,0:27:81:0,81,1065
2075  $raw_value_colon_parts = explode(':',$raw_value);
2076  $ref_alt_indices = explode('/', $raw_value_colon_parts[0]);
2077  $genotype_combination = "";
2078  $count_indices = count($ref_alt_indices);
2079  for($k = 0; $k < $count_indices; $k++) {
2080  $index_tmp = $ref_alt_indices[$k];
2081  if($k > 0) {
2082  $genotype_combination .= ':';
2083  }
2084  if($index_tmp == 0) {
2085  $genotype_combination .= $ref;
2086  }
2087  else {
2088  $genotype_combination .= $alt;
2089  }
2090  }
2091  return $genotype_combination;
2092 }

Variable Documentation

◆ $tpps_job

$tpps_job = NULL

Definition at line 14 of file submit_all.php.

◆ $tpps_job_logger

$tpps_job_logger = NULL

Definition at line 13 of file submit_all.php.