Tripal Plant PopGen Submit
submit_all.php
Go to the documentation of this file.
1 <?php
2 
12  // Global variables
14  $tpps_job = NULL;
15 
16 
25 function tpps_submit_all($accession, TripalJob $job = NULL) {
26  global $tpps_job;
27  $tpps_job = $job;
28  // Get public path
29  $log_path = drupal_realpath('public://') . '/tpps_job_logs/';
30 
31  mkdir($log_path);
32 
33  // Update the global $tpps_job_logger variable
34  global $tpps_job_logger;
35  $tpps_job_logger = [];
36  $tpps_job_logger['job_object'] = $job;
37  $tpps_job_logger['log_file_path'] = $log_path . $accession . '_' . $tpps_job_logger['job_object']->getJobID() . '.txt';
38  $tpps_job_logger['log_file_handle'] = fopen($tpps_job_logger['log_file_path'], "w+");
39 
40  tpps_job_logger_write('[INFO] Setting up...');
41  $job->logMessage('[INFO] Setting up...');
42  $job->setInterval(1);
43  $form_state = tpps_load_submission($accession);
44  $form_state['status'] = 'Submission Job Running';
45  tpps_update_submission($form_state, array('status' => 'Submission Job Running'));
46  $transaction = db_transaction();
47 
48 
49 
50  try {
51 
52  tpps_job_logger_write('[INFO] Clearing Database...');
53  $job->logMessage('[INFO] Clearing Database...');
54  tpps_submission_clear_db($accession);
55  tpps_job_logger_write('[INFO] Database Cleared');
56  $job->logMessage('[INFO] Database Cleared.');
57  $project_id = $form_state['ids']['project_id'] ?? NULL;
58 
59  $form_state = tpps_load_submission($accession);
60  tpps_clean_state($form_state);
62  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
63  $form_state['file_rank'] = 0;
64  $form_state['ids'] = array();
65 
66  tpps_job_logger_write('[INFO] Creating project record...');
67  $job->logMessage('[INFO] Creating project record...');
68  $form_state['title'] = $firstpage['publication']['title'];
69  $form_state['abstract'] = $firstpage['publication']['abstract'];
70  $project_record = array(
71  'name' => $firstpage['publication']['title'],
72  'description' => $firstpage['publication']['abstract'],
73  );
74  if (!empty($project_id)) {
75  $project_record['project_id'] = $project_id;
76  }
77  $form_state['ids']['project_id'] = tpps_chado_insert_record('project', $project_record);
78  tpps_job_logger_write("[INFO] Project record created. project_id: @pid\n", array('@pid' => $form_state['ids']['project_id']));
79  $job->logMessage("[INFO] Project record created. project_id: @pid\n", array('@pid' => $form_state['ids']['project_id']));
80 
81  tpps_tripal_entity_publish('Project', array(
82  $firstpage['publication']['title'],
83  $form_state['ids']['project_id'],
84  ));
85 
86  tpps_job_logger_write("[INFO] Submitting Publication/Species information...");
87  $job->logMessage("[INFO] Submitting Publication/Species information...");
88  tpps_submit_page_1($form_state, $job);
89  tpps_job_logger_write("[INFO] Publication/Species information submitted!\n");
90  $job->logMessage("[INFO] Publication/Species information submitted!\n");
91 
92  tpps_job_logger_write("[INFO] Submitting Study Details...");
93  $job->logMessage("[INFO] Submitting Study Details...");
94  tpps_submit_page_2($form_state, $job);
95  tpps_job_logger_write("[INFO] Study Details sumbitted!\n");
96  $job->logMessage("[INFO] Study Details sumbitted!\n");
97 
98  tpps_job_logger_write("[INFO] Submitting Accession information...");
99  $job->logMessage("[INFO] Submitting Accession information...");
100  tpps_submit_page_3($form_state, $job);
101  tpps_job_logger_write("[INFO] Accession information submitted!\n");
102  $job->logMessage("[INFO] Accession information submitted!\n");
103 
104  tpps_job_logger_write("[INFO] Submitting Raw data...");
105  $job->logMessage("[INFO] Submitting Raw data...");
106  tpps_submit_page_4($form_state, $job);
107  tpps_job_logger_write("[INFO] Raw data submitted!\n");
108  $job->logMessage("[INFO] Raw data submitted!\n");
109 
110  tpps_job_logger_write("[INFO] Submitting Summary information...");
111  $job->logMessage("[INFO] Submitting Summary information...");
112  tpps_submit_summary($form_state);
113  tpps_job_logger_write("[INFO] Summary information submitted!\n");
114  $job->logMessage("[INFO] Summary information submitted!\n");
115 
116  tpps_update_submission($form_state);
117 
118  tpps_job_logger_write("[INFO] Renaming files...");
119  $job->logMessage("[INFO] Renaming files...");
120  tpps_submission_rename_files($accession);
121  tpps_job_logger_write("[INFO] Files renamed!\n");
122  $job->logMessage("[INFO] Files renamed!\n");
123  $form_state = tpps_load_submission($accession);
124  $form_state['status'] = 'Approved';
125  $form_state['loaded'] = time();
126  tpps_job_logger_write("[INFO] Finishing up...");
127  $job->logMessage("[INFO] Finishing up...");
128  tpps_update_submission($form_state, array('status' => 'Approved'));
129  tpps_job_logger_write("[INFO] Complete!");
130  $job->logMessage("[INFO] Complete!");
131 
132  fclose($tpps_job_logger['log_file_handle']);
133 
134  }
135  catch (Exception $e) {
136  $transaction->rollback();
137  $form_state = tpps_load_submission($accession);
138  $form_state['status'] = 'Pending Approval';
139  tpps_update_submission($form_state, array('status' => 'Pending Approval'));
140 
141  tpps_job_logger_write('[ERROR] Job failed');
142  $job->logMessage('[ERROR] Job failed', array(), TRIPAL_ERROR);
143  tpps_job_logger_write('[ERROR] Error message: @msg', array('@msg' => $e->getMessage()));
144  $job->logMessage('[ERROR] Error message: @msg', array('@msg' => $e->getMessage()), TRIPAL_ERROR);
145  tpps_job_logger_write("[ERROR] Trace: \n@trace", array('@trace' => $e->getTraceAsString()));
146  $job->logMessage("[ERROR] Trace: \n@trace", array('@trace' => $e->getTraceAsString()), TRIPAL_ERROR);
147 
148  fclose($tpps_job_logger['log_file_handle']);
149  watchdog_exception('tpps', $e);
150  throw new Exception('Job failed.');
151  }
152 }
153 
160 function tpps_job_logger_write($string, $replacements = []) {
161  global $tpps_job_logger;
162  try {
163  foreach ($replacements as $key_string => $replace_string) {
164  $string = str_replace($key_string, $replace_string, $string);
165  }
166 
167  // Add timestamp
168  $time_now = time();
169  $timestamp_now = date('m/d/y g:i:s A', $time_now);
170 
171  $string = "\n" . $timestamp_now . " " . $string;
172 
173  fwrite($tpps_job_logger['log_file_handle'],$string);
174  fflush($tpps_job_logger['log_file_handle']);
175  }
176  catch (Exception $e) {
177  print_r($e->getMessage());
178  }
179 }
180 
189 function tpps_submit_page_1(array &$form_state, TripalJob &$job = NULL) {
190 
191  $dbxref_id = $form_state['dbxref_id'];
192  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
193  $thirdpage = $form_state['saved_values'][TPPS_PAGE_3];
194  $seconds = $firstpage['publication']['secondaryAuthors'];
195 
196  tpps_chado_insert_record('project_dbxref', array(
197  'project_id' => $form_state['ids']['project_id'],
198  'dbxref_id' => $dbxref_id,
199  'is_current' => TRUE,
200  ));
201 
202  if (!empty($form_state['tpps_type']) and $form_state['tpps_type'] == 'tppsc' and !empty($form_state['saved_values'][TPPS_PAGE_1]['doi'])) {
203  $dryad_db = chado_get_db(array('name' => 'dryad'));
204  $dryad_dbxref = chado_insert_dbxref(array(
205  'db_id' => $dryad_db->db_id,
206  'accession' => $form_state['saved_values'][TPPS_PAGE_1]['doi'],
207  ))->dbxref_id;
208  tpps_chado_insert_record('project_dbxref', array(
209  'project_id' => $form_state['ids']['project_id'],
210  'dbxref_id' => $dryad_dbxref,
211  'is_current' => TRUE,
212  ));
213  }
214 
215  if (!empty($firstpage['photo'])) {
216  tpps_add_project_file($form_state, $firstpage['photo']);
217  }
218 
219  $primary_author_id = tpps_chado_insert_record('contact', array(
220  'name' => $firstpage['primaryAuthor'],
221  'type_id' => tpps_load_cvterm('person')->cvterm_id,
222  ));
223 
224  tpps_chado_insert_record('project_contact', array(
225  'project_id' => $form_state['ids']['project_id'],
226  'contact_id' => $primary_author_id,
227  ));
228 
229  $authors = array($firstpage['primaryAuthor']);
230  if ($seconds['number'] != 0) {
231  for ($i = 1; $i <= $seconds['number']; $i++) {
232  if(!empty($seconds[$i]) || $seconds[$i] != "") {
233  tpps_chado_insert_record('contact', array(
234  'name' => $seconds[$i],
235  'type_id' => tpps_load_cvterm('person')->cvterm_id,
236  ));
237 
238  $names = explode(" ", $seconds[$i]);
239  $first_name = implode(" ", array_slice($names, 0, -1));
240  $last_name = end($names);
241  $pubauthors[] = array(
242  'rank' => "$i",
243  'surname' => $last_name,
244  'givennames' => $first_name,
245  );
246  $authors[] = $seconds[$i];
247  }
248  else {
249  tpps_job_logger_write('[INFO] - Secondary publishers error - found an empty secondary publisher name. Ignoring this input.');
250  $job->logMessage('[INFO] - Secondary publishers error - found an empty secondary publisher name. Ignoring this input.');
251  // throw new Exception("Seconds[$i]" . $seconds[$i]);
252  }
253  }
254  }
255 
256  $publication_id = tpps_chado_insert_record('pub', array(
257  'title' => $firstpage['publication']['title'],
258  'series_name' => $firstpage['publication']['journal'],
259  'type_id' => tpps_load_cvterm('article')->cvterm_id,
260  'pyear' => $firstpage['publication']['year'],
261  'uniquename' => implode('; ', $authors) . " {$firstpage['publication']['title']}. {$firstpage['publication']['journal']}; {$firstpage['publication']['year']}",
262  ));
263  $form_state['ids']['pub_id'] = $publication_id;
264  tpps_tripal_entity_publish('Publication', array(
265  $firstpage['publication']['title'],
266  $publication_id,
267  ));
268  $form_state['pyear'] = $firstpage['publication']['year'];
269  $form_state['journal'] = $firstpage['publication']['journal'];
270 
271  if (!empty($firstpage['publication']['abstract'])) {
272  tpps_chado_insert_record('pubprop', array(
273  'pub_id' => $publication_id,
274  'type_id' => tpps_load_cvterm('abstract')->cvterm_id,
275  'value' => $firstpage['publication']['abstract'],
276  ));
277  }
278 
279  tpps_chado_insert_record('pubprop', array(
280  'pub_id' => $publication_id,
281  'type_id' => tpps_load_cvterm('authors')->cvterm_id,
282  'value' => implode(', ', $authors),
283  ));
284  $form_state['authors'] = $authors;
285 
286  tpps_chado_insert_record('project_pub', array(
287  'project_id' => $form_state['ids']['project_id'],
288  'pub_id' => $publication_id,
289  ));
290 
291  if (!empty($firstpage['organization'])) {
292  $organization_id = tpps_chado_insert_record('contact', array(
293  'name' => $firstpage['organization'],
294  'type_id' => tpps_load_cvterm('organization')->cvterm_id,
295  ));
296 
297  tpps_chado_insert_record('contact_relationship', array(
298  'type_id' => tpps_load_cvterm('contact_part_of')->cvterm_id,
299  'subject_id' => $primary_author_id,
300  'object_id' => $organization_id,
301  ));
302  }
303 
304  $names = explode(" ", $firstpage['primaryAuthor']);
305  $first_name = implode(" ", array_slice($names, 0, -1));
306  $last_name = end($names);
307 
308  tpps_chado_insert_record('pubauthor', array(
309  'pub_id' => $publication_id,
310  'rank' => '0',
311  'surname' => $last_name,
312  'givennames' => $first_name,
313  ));
314 
315  if (!empty($pubauthors)) {
316  foreach ($pubauthors as $info) {
317  $info['pub_id'] = $publication_id;
318  tpps_chado_insert_record('pubauthor', $info);
319  }
320  }
321 
322  $form_state['ids']['organism_ids'] = array();
323  $organism_number = $firstpage['organism']['number'];
324 
325  for ($i = 1; $i <= $organism_number; $i++) {
326  $parts = explode(" ", $firstpage['organism'][$i]['name']);
327  $genus = $parts[0];
328  $species = implode(" ", array_slice($parts, 1));
329  $infra = NULL;
330  if (isset($parts[2]) and ($parts[2] == 'var.' or $parts[2] == 'subsp.')) {
331  $infra = implode(" ", array_slice($parts, 2));
332  }
333 
334  $record = array(
335  'genus' => $genus,
336  'species' => $species,
337  'infraspecific_name' => $infra,
338  );
339 
340  if (preg_match('/ x /', $species)) {
341  $record['type_id'] = tpps_load_cvterm('speciesaggregate')->cvterm_id;
342  }
343  $form_state['ids']['organism_ids'][$i] = tpps_chado_insert_record('organism', $record);
344 
345  if (!empty(tpps_load_cvterm('Type'))) {
346  tpps_chado_insert_record('organismprop', array(
347  'organism_id' => $form_state['ids']['organism_ids'][$i],
348  'type_id' => tpps_load_cvterm('Type')->cvterm_id,
349  'value' => $firstpage['organism'][$i]['is_tree'] ? 'Tree' : 'Non-tree',
350  ));
351  }
352 
353  if ($organism_number != 1) {
354  if (!empty($thirdpage['tree-accession']['check']) and empty($thirdpage['tree-accession']["species-$i"]['file'])) {
355  continue;
356  }
357 
358  if (empty($thirdpage['tree-accession']['check'])) {
359  $options = array(
360  'cols' => array(),
361  'search' => $firstpage['organism'][$i]['name'],
362  'found' => FALSE,
363  );
364  $tree_accession = $thirdpage['tree-accession']["species-1"];
365  $groups = $tree_accession['file-groups'];
366  if ($groups['Genus and Species']['#type'] == 'separate') {
367  $options['cols']['genus'] = $groups['Genus and Species']['6'];
368  $options['cols']['species'] = $groups['Genus and Species']['7'];
369  }
370  if ($groups['Genus and Species']['#type'] != 'separate') {
371  $options['cols']['org'] = $groups['Genus and Species']['10'];
372  }
373  $fid = $tree_accession['file'];
374  tpps_file_iterator($fid, 'tpps_check_organisms', $options);
375  if (!$options['found']) {
376  continue;
377  }
378  }
379  }
380 
381  $code_exists = tpps_chado_prop_exists('organism', $form_state['ids']['organism_ids'][$i], 'organism 4 letter code');
382 
383  if (!$code_exists) {
384  foreach (tpps_get_species_codes($genus, $species) as $trial_code) {
385  $new_code_query = chado_select_record('organismprop', array('value'), array(
386  'type_id' => tpps_load_cvterm('organism 4 letter code')->cvterm_id,
387  'value' => $trial_code,
388  ));
389 
390  if (empty($new_code_query)) {
391  break;
392  }
393  }
394 
395  tpps_chado_insert_record('organismprop', array(
396  'organism_id' => $form_state['ids']['organism_ids'][$i],
397  'type_id' => tpps_load_cvterm('organism 4 letter code')->cvterm_id,
398  'value' => $trial_code,
399  ));
400  }
401 
402  $ranks = array(
403  'family',
404  'order',
405  'subkingdom',
406  );
407 
408  foreach ($ranks as $rank) {
409  $exists = tpps_chado_prop_exists('organism', $form_state['ids']['organism_ids'][$i], $rank);
410  if (!$exists) {
411  $taxon = tpps_get_taxon($firstpage['organism'][$i]['name'], $rank);
412  if ($taxon) {
413  tpps_chado_insert_record('organismprop', array(
414  'organism_id' => $form_state['ids']['organism_ids'][$i],
415  'type_id' => tpps_load_cvterm($rank)->cvterm_id,
416  'value' => $taxon,
417  ));
418  }
419  }
420  }
421 
422  tpps_chado_insert_record('project_organism', array(
423  'organism_id' => $form_state['ids']['organism_ids'][$i],
424  'project_id' => $form_state['ids']['project_id'],
425  ));
426 
427  tpps_chado_insert_record('pub_organism', array(
428  'organism_id' => $form_state['ids']['organism_ids'][$i],
429  'pub_id' => $publication_id,
430  ));
431 
432  tpps_tripal_entity_publish('Organism', array(
433  "$genus $species",
434  $form_state['ids']['organism_ids'][$i],
435  ));
436  }
437 }
438 
447 function tpps_submit_page_2(array &$form_state, TripalJob &$job = NULL) {
448 
449  $secondpage = $form_state['saved_values'][TPPS_PAGE_2];
450 
451  if (!empty($secondpage['StartingDate'])) {
452  tpps_chado_insert_record('projectprop', array(
453  'project_id' => $form_state['ids']['project_id'],
454  'type_id' => tpps_load_cvterm('study_start')->cvterm_id,
455  'value' => $secondpage['StartingDate']['month'] . " " . $secondpage['StartingDate']['year'],
456  ));
457 
458  tpps_chado_insert_record('projectprop', array(
459  'project_id' => $form_state['ids']['project_id'],
460  'type_id' => tpps_load_cvterm('study_end')->cvterm_id,
461  'value' => $secondpage['EndingDate']['month'] . " " . $secondpage['EndingDate']['year'],
462  ));
463  }
464 
465  tpps_chado_insert_record('projectprop', array(
466  'project_id' => $form_state['ids']['project_id'],
467  'type_id' => tpps_load_cvterm('association_results_type')->cvterm_id,
468  'value' => $secondpage['data_type'],
469  ));
470 
471  $studytype_options = array(
472  0 => '- Select -',
473  1 => 'Natural Population (Landscape)',
474  2 => 'Growth Chamber',
475  3 => 'Greenhouse',
476  4 => 'Experimental/Common Garden',
477  5 => 'Plantation',
478  );
479 
480  tpps_chado_insert_record('projectprop', array(
481  'project_id' => $form_state['ids']['project_id'],
482  'type_id' => tpps_load_cvterm('study_type')->cvterm_id,
483  'value' => $studytype_options[$secondpage['study_type']],
484  ));
485 
486  if (!empty($secondpage['study_info']['season'])) {
487  $seasons = implode($secondpage['study_info']['season']);
488 
489  tpps_chado_insert_record('projectprop', array(
490  'project_id' => $form_state['ids']['project_id'],
491  'type_id' => tpps_load_cvterm('assession_season')->cvterm_id,
492  'value' => $seasons,
493  ));
494  }
495 
496  if (!empty($secondpage['study_info']['assessions'])) {
497  tpps_chado_insert_record('projectprop', array(
498  'project_id' => $form_state['ids']['project_id'],
499  'type_id' => tpps_load_cvterm('assession_number')->cvterm_id,
500  'value' => $secondpage['study_info']['assessions'],
501  ));
502  }
503 
504  if (!empty($secondpage['study_info']['temp'])) {
505  tpps_chado_insert_record('projectprop', array(
506  'project_id' => $form_state['ids']['project_id'],
507  'type_id' => tpps_load_cvterm('temperature_high')->cvterm_id,
508  'value' => $secondpage['study_info']['temp']['high'],
509  ));
510 
511  tpps_chado_insert_record('projectprop', array(
512  'project_id' => $form_state['ids']['project_id'],
513  'type_id' => tpps_load_cvterm('temperature_low')->cvterm_id,
514  'value' => $secondpage['study_info']['temp']['low'],
515  ));
516  }
517 
518  $types = array(
519  'co2',
520  'humidity',
521  'light',
522  'salinity',
523  );
524 
525  foreach ($types as $type) {
526  if (!empty($secondpage['study_info'][$type])) {
527  $set = $secondpage['study_info'][$type];
528 
529  tpps_chado_insert_record('projectprop', array(
530  'project_id' => $form_state['ids']['project_id'],
531  'type_id' => tpps_load_cvterm("{$type}_control")->cvterm_id,
532  'value' => ($set['option'] == '1') ? 'True' : 'False',
533  ));
534 
535  if ($set['option'] == '1') {
536  tpps_chado_insert_record('projectprop', array(
537  'project_id' => $form_state['ids']['project_id'],
538  'type_id' => tpps_load_cvterm("{$type}_level")->cvterm_id,
539  'value' => $set['controlled'],
540  ));
541  }
542  elseif (!empty($set['uncontrolled'])) {
543  tpps_chado_insert_record('projectprop', array(
544  'project_id' => $form_state['ids']['project_id'],
545  'type_id' => tpps_load_cvterm("{$type}_level")->cvterm_id,
546  'value' => $set['uncontrolled'],
547  ));
548  }
549  }
550  }
551 
552  if (!empty($secondpage['study_info']['rooting'])) {
553  $root = $secondpage['study_info']['rooting'];
554 
555  tpps_chado_insert_record('projectprop', array(
556  'project_id' => $form_state['ids']['project_id'],
557  'type_id' => tpps_load_cvterm('rooting_type')->cvterm_id,
558  'value' => $root['option'],
559  ));
560 
561  if ($root['option'] == 'Soil') {
562  tpps_chado_insert_record('projectprop', array(
563  'project_id' => $form_state['ids']['project_id'],
564  'type_id' => tpps_load_cvterm('soil_type')->cvterm_id,
565  'value' => ($root['soil']['type'] == 'Other') ? $root['soil']['other'] : $root['soil']['type'],
566  ));
567 
568  tpps_chado_insert_record('projectprop', array(
569  'project_id' => $form_state['ids']['project_id'],
570  'type_id' => tpps_load_cvterm('soil_container')->cvterm_id,
571  'value' => $root['soil']['container'],
572  ));
573  }
574 
575  if (!empty($secondpage['study_info']['rooting']['ph'])) {
576  $set = $secondpage['study_info']['rooting']['ph'];
577 
578  tpps_chado_insert_record('projectprop', array(
579  'project_id' => $form_state['ids']['project_id'],
580  'type_id' => tpps_load_cvterm('pH_control')->cvterm_id,
581  'value' => ($set['option'] == '1') ? 'True' : 'False',
582  ));
583 
584  if ($set['option'] == '1') {
585  tpps_chado_insert_record('projectprop', array(
586  'project_id' => $form_state['ids']['project_id'],
587  'type_id' => tpps_load_cvterm('pH_level')->cvterm_id,
588  'value' => $set['controlled'],
589  ));
590  }
591  elseif (!empty($set['uncontrolled'])) {
592  tpps_chado_insert_record('projectprop', array(
593  'project_id' => $form_state['ids']['project_id'],
594  'type_id' => tpps_load_cvterm('pH_level')->cvterm_id,
595  'value' => $set['uncontrolled'],
596  ));
597  }
598  }
599 
600  $description = FALSE;
601  $rank = 0;
602  foreach ($root['treatment'] as $value) {
603  if (!$description) {
604  $record_next = ((bool) $value);
605  $description = TRUE;
606  continue;
607  }
608  if ($record_next) {
609  tpps_chado_insert_record('projectprop', array(
610  'project_id' => $form_state['ids']['project_id'],
611  'type_id' => tpps_load_cvterm('treatment')->cvterm_id,
612  'value' => $value,
613  'rank' => $rank,
614  ));
615  $rank++;
616  }
617  $description = FALSE;
618  }
619  }
620 }
621 
630 function tpps_submit_page_3(array &$form_state, TripalJob &$job = NULL) {
631  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
632  $thirdpage = $form_state['saved_values'][TPPS_PAGE_3];
633  $organism_number = $firstpage['organism']['number'];
634  $form_state['locations'] = array();
635  $form_state['tree_info'] = array();
636  $stock_count = 0;
637  $loc_name = 'Location (latitude/longitude or country/state or population group)';
638 
639  if (!empty($thirdpage['skip_validation'])) {
640  tpps_submission_add_tag($form_state['accession'], 'No Location Information');
641  }
642 
643  if (!empty($thirdpage['study_location'])) {
644  $type = $thirdpage['study_location']['type'];
645  $locs = $thirdpage['study_location']['locations'];
646  $geo_api_key = variable_get('tpps_geocode_api_key', NULL);
647 
648  for ($i = 1; $i <= $locs['number']; $i++) {
649  if ($type !== '2') {
650  $standard_coordinate = explode(',', tpps_standard_coord($locs[$i]));
651  $latitude = $standard_coordinate[0];
652  $longitude = $standard_coordinate[1];
653 
654  tpps_chado_insert_record('projectprop', array(
655  'project_id' => $form_state['ids']['project_id'],
656  'type_id' => tpps_load_cvterm('gps_latitude')->cvterm_id,
657  'value' => $latitude,
658  'rank' => $i,
659  ));
660 
661  tpps_chado_insert_record('projectprop', array(
662  'project_id' => $form_state['ids']['project_id'],
663  'type_id' => tpps_load_cvterm('gps_longitude')->cvterm_id,
664  'value' => $longitude,
665  'rank' => $i,
666  ));
667  continue;
668  }
669  $loc = $locs[$i];
670  tpps_chado_insert_record('projectprop', array(
671  'project_id' => $form_state['ids']['project_id'],
672  'type_id' => tpps_load_cvterm('experiment_location')->cvterm_id,
673  'value' => $loc,
674  'rank' => $i,
675  ));
676 
677  if (isset($geo_api_key)) {
678  $query = urlencode($loc);
679  $url = "https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
680  $response = json_decode(file_get_contents($url));
681 
682  if ($response->total_results) {
683  $result = $response->results[0]->geometry;
684  $form_state['locations'][$loc] = $result;
685 
686  tpps_chado_insert_record('projectprop', array(
687  'project_id' => $form_state['ids']['project_id'],
688  'type_id' => tpps_load_cvterm('gps_latitude')->cvterm_id,
689  'value' => $result->lat,
690  'rank' => $i,
691  ));
692 
693  tpps_chado_insert_record('projectprop', array(
694  'project_id' => $form_state['ids']['project_id'],
695  'type_id' => tpps_load_cvterm('gps_longitude')->cvterm_id,
696  'value' => $result->lng,
697  'rank' => $i,
698  ));
699  }
700  }
701  }
702  }
703 
704  $cvterms = array(
705  'org' => tpps_load_cvterm('organism')->cvterm_id,
706  'clone' => tpps_load_cvterm('clone')->cvterm_id,
707  'has_part' => tpps_load_cvterm('has_part')->cvterm_id,
708  'lat' => tpps_load_cvterm('gps_latitude')->cvterm_id,
709  'lng' => tpps_load_cvterm('gps_longitude')->cvterm_id,
710  'country' => tpps_load_cvterm('country')->cvterm_id,
711  'state' => tpps_load_cvterm('state')->cvterm_id,
712  'county' => tpps_load_cvterm('county')->cvterm_id,
713  'district' => tpps_load_cvterm('district')->cvterm_id,
714  'loc' => tpps_load_cvterm('location')->cvterm_id,
715  'gps_type' => tpps_load_cvterm('gps_type')->cvterm_id,
716  'precision' => tpps_load_cvterm('gps_precision')->cvterm_id,
717  );
718 
719  $records = array(
720  'stock' => array(),
721  'stockprop' => array(),
722  'stock_relationship' => array(),
723  'project_stock' => array(),
724  );
725  $overrides = array(
726  'stock_relationship' => array(
727  'subject' => array(
728  'table' => 'stock',
729  'columns' => array(
730  'subject_id' => 'stock_id',
731  ),
732  ),
733  'object' => array(
734  'table' => 'stock',
735  'columns' => array(
736  'object_id' => 'stock_id',
737  ),
738  ),
739  ),
740  );
741 
742  $multi_insert_options = array(
743  'fk_overrides' => $overrides,
744  'fks' => 'stock',
745  'entities' => array(
746  'label' => 'Stock',
747  'table' => 'stock',
748  'prefix' => $form_state['accession'] . '-',
749  ),
750  );
751 
752  $names = array();
753  for ($i = 1; $i <= $organism_number; $i++) {
754  $names[$i] = $firstpage['organism'][$i]['name'];
755  }
756  $names['number'] = $firstpage['organism']['number'];
757  $options = array(
758  'cvterms' => $cvterms,
759  'records' => $records,
760  'overrides' => $overrides,
761  'locations' => &$form_state['locations'],
762  'accession' => $form_state['accession'],
763  'single_file' => empty($thirdpage['tree-accession']['check']),
764  'org_names' => $names,
765  'saved_ids' => &$form_state['ids'],
766  'stock_count' => &$stock_count,
767  'multi_insert' => $multi_insert_options,
768  'tree_info' => &$form_state['tree_info'],
769  'job' => &$job,
770  );
771 
772  for ($i = 1; $i <= $organism_number; $i++) {
773  $tree_accession = $thirdpage['tree-accession']["species-$i"];
774  $fid = $tree_accession['file'];
775 
776  tpps_add_project_file($form_state, $fid);
777 
778  $column_vals = $tree_accession['file-columns'];
779  $groups = $tree_accession['file-groups'];
780 
781  $options['org_num'] = $i;
782  $options['no_header'] = !empty($tree_accession['file-no-header']);
783  $options['empty'] = $tree_accession['file-empty'];
784  $options['pop_group'] = $tree_accession['pop-group'];
785  $options['exact'] = $tree_accession['exact_coords'] ?? NULL;
786  $options['precision'] = NULL;
787  if (!$options['exact']) {
788  $options['precision'] = $tree_accession['coord_precision'] ?? NULL;
789  if (!array_key_exists(tpps_get_tag_id('No Location Information'), tpps_submission_get_tags($form_state['accession']))) {
790  tpps_submission_add_tag($form_state['accession'], 'Approximate Coordinates');
791  }
792  }
793  $county = array_search('8', $column_vals);
794  $district = array_search('9', $column_vals);
795  $clone = array_search('13', $column_vals);
796  $options['column_ids'] = array(
797  'id' => $groups['Tree Id']['1'],
798  'lat' => $groups[$loc_name]['4'] ?? NULL,
799  'lng' => $groups[$loc_name]['5'] ?? NULL,
800  'country' => $groups[$loc_name]['2'] ?? NULL,
801  'state' => $groups[$loc_name]['3'] ?? NULL,
802  'county' => ($county !== FALSE) ? $county : NULL,
803  'district' => ($district !== FALSE) ? $district : NULL,
804  'clone' => ($clone !== FALSE) ? $clone : NULL,
805  'pop_group' => $groups[$loc_name]['12'] ?? NULL,
806  );
807 
808  if ($organism_number != 1 and empty($thirdpage['tree-accession']['check'])) {
809  if ($groups['Genus and Species']['#type'] == 'separate') {
810  $options['column_ids']['genus'] = $groups['Genus and Species']['6'];
811  $options['column_ids']['species'] = $groups['Genus and Species']['7'];
812  }
813  if ($groups['Genus and Species']['#type'] != 'separate') {
814  $options['column_ids']['org'] = $groups['Genus and Species']['10'];
815  }
816  }
817  tpps_job_logger_write('[INFO] - Processing accession file data...');
818  $job->logMessage('[INFO] - Processing accession file data...');
819  tpps_file_iterator($fid, 'tpps_process_accession', $options);
820  tpps_job_logger_write('[INFO] - Done.');
821  $job->logMessage('[INFO] - Done.');
822 
823  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
824  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
825  $new_ids = tpps_chado_insert_multi($options['records'], $multi_insert_options);
826  tpps_job_logger_write('[INFO] - Done.');
827  $job->logMessage('[INFO] - Done.');
828  foreach ($new_ids as $t_id => $stock_id) {
829  $form_state['tree_info'][$t_id]['stock_id'] = $stock_id;
830  }
831  unset($options['records']);
832  $stock_count = 0;
833  if (empty($thirdpage['tree-accession']['check'])) {
834  break;
835  }
836  }
837 
838  if (!empty($thirdpage['existing_trees'])) {
839  tpps_matching_trees($form_state['ids']['project_id']);
840  }
841 }
842 
854 function tpps_submit_page_4(array &$form_state, TripalJob &$job = NULL) {
855  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
856  $organism_number = $form_state['saved_values'][TPPS_PAGE_1]['organism']['number'];
857  $species_codes = array();
858 
859  for ($i = 1; $i <= $organism_number; $i++) {
860  // Get species codes.
861  $species_codes[$form_state['ids']['organism_ids'][$i]] = current(chado_select_record('organismprop', array('value'), array(
862  'type_id' => tpps_load_cvterm('organism 4 letter code')->cvterm_id,
863  'organism_id' => $form_state['ids']['organism_ids'][$i],
864  ), array(
865  'limit' => 1,
866  )))->value;
867 
868  // Submit importer jobs.
869  if (isset($fourthpage["organism-$i"]['genotype'])) {
870  $ref_genome = $fourthpage["organism-$i"]['genotype']['ref-genome'];
871 
872  if ($ref_genome === 'url' or $ref_genome === 'manual' or $ref_genome === 'manual2') {
873  // Create job for tripal fasta importer.
874  $class = 'FASTAImporter';
875  tripal_load_include_importer_class($class);
876 
877  $fasta = $fourthpage["organism-$i"]['genotype']['tripal_fasta'];
878 
879  $file_upload = isset($fasta['file']['file_upload']) ? trim($fasta['file']['file_upload']) : 0;
880  $file_existing = isset($fasta['file']['file_upload_existing']) ? trim($fasta['file']['file_upload_existing']) : 0;
881  $file_remote = isset($fasta['file']['file_remote']) ? trim($fasta['file']['file_remote']) : 0;
882  $analysis_id = $fasta['analysis_id'];
883  $seqtype = $fasta['seqtype'];
884  $organism_id = $form_state['ids']['organism_ids'][$i];
885  $re_accession = $fasta['db']['re_accession'];
886  $db_id = $fasta['db']['db_id'];
887 
888  $run_args = array(
889  'importer_class' => $class,
890  'file_remote' => $file_remote,
891  'analysis_id' => $analysis_id,
892  'seqtype' => $seqtype,
893  'organism_id' => $organism_id,
894  'method' => '2',
895  'match_type' => '0',
896  're_name' => '',
897  're_uname' => '',
898  're_accession' => $re_accession,
899  'db_id' => $db_id,
900  'rel_type' => '',
901  're_subject' => '',
902  'parent_type' => '',
903  );
904 
905  $file_details = array();
906 
907  if ($file_existing) {
908  $file_details['fid'] = $file_existing;
909  }
910  elseif ($file_upload) {
911  $file_details['fid'] = $file_upload;
912  }
913  elseif ($file_remote) {
914  $file_details['file_remote'] = $file_remote;
915  }
916 
917  try {
918  $importer = new $class();
919  $form = array();
920  $importer->formSubmit($form, $form_state);
921 
922  $importer->create($run_args, $file_details);
923 
924  $importer->submitJob();
925 
926  }
927  catch (Exception $ex) {
928  drupal_set_message(t('Cannot submit import: @msg', array('@msg' => $ex->getMessage())), 'error');
929  }
930  }
931  elseif ($ref_genome === 'bio') {
932  $eutils = $fourthpage["organism-$i"]['genotype']['tripal_eutils'];
933  $class = 'EutilsImporter';
934  tripal_load_include_importer_class($class);
935 
936  $run_args = array(
937  'importer_class' => $class,
938  'db' => $eutils['db'],
939  'accession' => $eutils['accession'],
940  'linked_records' => $eutils['options']['linked_records'],
941  );
942 
943  try {
944  $importer = new $class();
945  $importer->create($run_args);
946  $importer->submitJob();
947  }
948  catch (Exception $ex) {
949  drupal_set_message(t('Cannot submit BioProject: @msg', array('@msg' => $ex->getMessage())), 'error');
950  }
951  }
952  }
953  }
954 
955  $form_state['data']['phenotype'] = array();
956  $form_state['data']['phenotype_meta'] = array();
957 
958  // Submit raw data.
959  for ($i = 1; $i <= $organism_number; $i++) {
960  tpps_submit_phenotype($form_state, $i, $job);
961  tpps_submit_genotype($form_state, $species_codes, $i, $job);
962  tpps_submit_environment($form_state, $i, $job);
963  }
964 }
965 
976 function tpps_submit_phenotype(array &$form_state, $i, TripalJob &$job = NULL) {
977  tpps_job_logger_write('[INFO] - Submitting phenotype data...');
978  $job->logMessage('[INFO] - Submitting phenotype data...');
979  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
980  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
981  $phenotype = $fourthpage["organism-$i"]['phenotype'] ?? NULL;
982  $organism_name = $firstpage['organism'][$i]['name'];
983  if (empty($phenotype)) {
984  return;
985  }
986  tpps_submission_add_tag($form_state['accession'], 'Phenotype');
987 
988  // Get appropriate cvterms.
989  $phenotype_cvterms = array(
990  'time' => tpps_load_cvterm('time')->cvterm_id,
991  'desc' => tpps_load_cvterm('description')->cvterm_id,
992  'unit' => tpps_load_cvterm('unit')->cvterm_id,
993  'min' => tpps_load_cvterm('minimum')->cvterm_id,
994  'max' => tpps_load_cvterm('maximum')->cvterm_id,
995  'environment' => tpps_load_cvterm('environment')->cvterm_id,
996  'intensity' => tpps_load_cvterm('intensity')->cvterm_id,
997  );
998 
999  $records = array(
1000  'phenotype' => array(),
1001  'phenotypeprop' => array(),
1002  'stock_phenotype' => array(),
1003  'phenotype_cvterm' => array(),
1004  );
1005  $phenotype_count = 0;
1006 
1007  $options = array(
1008  'records' => $records,
1009  'cvterms' => $phenotype_cvterms,
1010  'accession' => $form_state['accession'],
1011  'tree_info' => $form_state['tree_info'],
1012  'suffix' => 0,
1013  'phenotype_count' => $phenotype_count,
1014  'data' => &$form_state['data']['phenotype'],
1015  'job' => &$job,
1016  );
1017 
1018  if (!empty($phenotype['normal-check'])) {
1019  $phenotype_number = $phenotype['phenotypes-meta']['number'];
1020  $phenotypes_meta = array();
1021  $data_fid = $phenotype['file'];
1022  $phenos_edit = $form_state['phenotypes_edit'] ?? NULL;
1023 
1024  tpps_add_project_file($form_state, $data_fid);
1025 
1026  $env_phenotypes = FALSE;
1027  // Populate $phenotypes_meta with manually entered metadata.
1028  for ($j = 1; $j <= $phenotype_number; $j++) {
1029  $name = strtolower($phenotype['phenotypes-meta'][$j]['name']);
1030  if (!empty($phenos_edit[$j])) {
1031  // (Rish) BUGFIX related to sex -> age
1032  // keep track of the cvterm id
1033  $cvterm_id = $phenotype['phenotypes-meta'][$j]['attribute'];
1034  $result = $phenos_edit[$j] + $phenotype['phenotypes-meta'][$j];
1035  $phenotype['phenotypes-meta'][$j] = $result;
1036  // restore the cvterm_id from the original (since this is from verified cvterm table which populated the select list dropdown box on tpps form)
1037  $phenotype['phenotypes-meta'][$j]['attribute'] = $cvterm_id;
1038  }
1039  $phenotypes_meta[$name] = array();
1040  $phenotypes_meta[$name]['attr'] = $phenotype['phenotypes-meta'][$j]['attribute'];
1041  // print_r('LINE 1022:');
1042  // print_r($phenotype['phenotypes-meta'][$j]);
1043  if ($phenotype['phenotypes-meta'][$j]['attribute'] == 'other') {
1044  $phenotypes_meta[$name]['attr-other'] = $phenotype['phenotypes-meta'][$j]['attr-other'];
1045  }
1046  $phenotypes_meta[$name]['desc'] = $phenotype['phenotypes-meta'][$j]['description'];
1047  $phenotypes_meta[$name]['unit'] = $phenotype['phenotypes-meta'][$j]['units'];
1048  if ($phenotype['phenotypes-meta'][$j]['units'] == 'other') {
1049  $phenotypes_meta[$name]['unit-other'] = $phenotype['phenotypes-meta'][$j]['unit-other'];
1050  }
1051  $phenotypes_meta[$name]['struct'] = $phenotype['phenotypes-meta'][$j]['structure'];
1052  if ($phenotype['phenotypes-meta'][$j]['structure'] == 'other') {
1053  $phenotypes_meta[$name]['struct-other'] = $phenotype['phenotypes-meta'][$j]['struct-other'];
1054  }
1055  if (!empty($phenotype['phenotypes-meta'][$j]['val-check']) or !empty($phenotype['phenotypes-meta'][$j]['bin-check'] or $phenotype['phenotypes-meta'][$j]['units'] == tpps_load_cvterm('boolean')->cvterm_id)) {
1056  $phenotypes_meta[$name]['min'] = $phenotype['phenotypes-meta'][$j]['min'];
1057  $phenotypes_meta[$name]['max'] = $phenotype['phenotypes-meta'][$j]['max'];
1058  }
1059  $phenotypes_meta[$name]['env'] = !empty($phenotype['phenotypes-meta'][$j]['env-check']);
1060  if ($phenotypes_meta[$name]['env']) {
1061  $env_phenotypes = TRUE;
1062  }
1063  }
1064  if ($env_phenotypes) {
1065  tpps_submission_add_tag($form_state['accession'], 'Environment');
1066  }
1067 
1068  // throw new Exception('$phenotype[check]:' . $phenotype['check'] . "\n");
1069  if ($phenotype['check'] == '1' || $phenotype['check'] == 'upload_file') {
1070  $meta_fid = $phenotype['metadata'];
1071  print_r('META_FID:' . $meta_fid . "\n");
1072  // Added because 009 META FID was 0 which caused failures
1073  if ($meta_fid > 0) {
1074 
1075  tpps_add_project_file($form_state, $meta_fid);
1076 
1077  // Get metadata column values.
1078  $groups = $phenotype['metadata-groups'];
1079  $column_vals = $phenotype['metadata-columns'];
1080  $struct = array_search('5', $column_vals);
1081  $min = array_search('6', $column_vals);
1082  $max = array_search('7', $column_vals);
1083  $columns = array(
1084  'name' => $groups['Phenotype Id']['1'],
1085  'attr' => $groups['Attribute']['2'],
1086  'desc' => $groups['Description']['3'],
1087  'unit' => $groups['Units']['4'],
1088  'struct' => !empty($struct) ? $struct : NULL,
1089  'min' => !empty($min) ? $min : NULL,
1090  'max' => !empty($max) ? $max : NULL,
1091  );
1092 
1093  $meta_options = array(
1094  'no_header' => $phenotype['metadata-no-header'],
1095  'meta_columns' => $columns,
1096  'meta' => &$phenotypes_meta,
1097  );
1098 
1099  tpps_job_logger_write('[INFO] - Processing phenotype_meta file data...');
1100  $job->logMessage('[INFO] - Processing phenotype_meta file data...');
1101  tpps_file_iterator($meta_fid, 'tpps_process_phenotype_meta', $meta_options);
1102  tpps_job_logger_write('[INFO] - Done.');
1103  $job->logMessage('[INFO] - Done.');
1104  }
1105  else {
1106  tpps_job_logger_write('[WARNING] - phenotype_meta file id looks incorrect but the UI checkbox was selected. Need to double check this!');
1107  }
1108  }
1109 
1110  $time_options = array();
1111  if ($phenotype['time']['time-check']) {
1112  $time_options = $phenotype['time'];
1113  }
1114  tpps_refine_phenotype_meta($phenotypes_meta, $time_options, $job);
1115 
1116  // Get metadata header values.
1117  $groups = $phenotype['file-groups'];
1118  $column_vals = $phenotype['file-columns'];
1119  $time_index = ($phenotype['format'] == 0) ? '2' : '4';
1120  $clone_index = ($phenotype['format'] == 0) ? '3' : '5';
1121  $time = array_search($time_index, $column_vals);
1122  $clone = array_search($clone_index, $column_vals);
1123  $meta_headers = array(
1124  'name' => $groups['Phenotype Name/Identifier']['2'] ?? NULL,
1125  'value' => $groups['Phenotype Value(s)']['3'] ?? NULL,
1126  'time' => !empty($time) ? $time : NULL,
1127  'clone' => !empty($clone) ? $clone : NULL,
1128  );
1129 
1130  // Get data header values.
1131  if ($phenotype['format'] == 0) {
1132  $file_headers = tpps_file_headers($data_fid, $phenotype['file-no-header']);
1133  $data_columns = array();
1134  if(is_array($groups['Phenotype Data']['0']) && !empty($groups['Phenotype Data']['0'])) {
1135  foreach ($groups['Phenotype Data']['0'] as $col) {
1136  $data_columns[$col] = $file_headers[$col];
1137  }
1138  }
1139  else {
1140  $col = $groups['Phenotype Data'][0];
1141  $data_columns[$col] = $file_headers[$col];
1142  }
1143  unset($file_headers);
1144  }
1145 
1146  $options['no_header'] = $phenotype['file-no-header'];
1147  $options['tree_id'] = $groups['Tree Identifier']['1'];
1148  $options['meta_headers'] = $meta_headers;
1149  $options['data_columns'] = $data_columns ?? NULL;
1150  $options['meta'] = $phenotypes_meta;
1151  $options['file_empty'] = $phenotype['file-empty'];
1152  $options['organism_name'] = $organism_name;
1153 
1154  print_r('DATA_FID:' . $data_fid . "\n");
1155  tpps_job_logger_write('[INFO] - Processing phenotype_data file data...');
1156  $job->logMessage('[INFO] - Processing phenotype_data file data...');
1157  tpps_file_iterator($data_fid, 'tpps_process_phenotype_data', $options);
1158  $form_state['data']['phenotype_meta'] += $phenotypes_meta;
1159  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1160  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1161  // print_r($options['records']);
1162  tpps_chado_insert_multi($options['records']);
1163  tpps_job_logger_write('[INFO] - Done.');
1164  $job->logMessage('[INFO] - Done.');
1165  }
1166 
1167  if (!empty($phenotype['iso-check'])) {
1168  $iso_fid = $phenotype['iso'];
1169  tpps_add_project_file($form_state, $iso_fid);
1170 
1171  $options['iso'] = TRUE;
1172  $options['records'] = $records;
1173  $options['cvterms'] = $phenotype_cvterms;
1174  $options['file_headers'] = tpps_file_headers($iso_fid);
1175  $options['organism_name'] = $organism_name;
1176  $options['meta'] = array(
1177  'desc' => "Mass Spectrometry",
1178  'unit' => "intensity (arbitrary units)",
1179  'attr_id' => tpps_load_cvterm('intensity')->cvterm_id,
1180  );
1181 
1182  print_r('ISO_FID:' . $iso_fid . "\n");
1183  tpps_job_logger_write('[INFO] - Processing phenotype_data file data...');
1184  $job->logMessage('[INFO] - Processing phenotype_data file data...');
1185  tpps_file_iterator($iso_fid, 'tpps_process_phenotype_data', $options);
1186  tpps_job_logger_write('[INFO] - Inserting phenotype_data into database using insert_multi...');
1187  $job->logMessage('[INFO] - Inserting phenotype_data into database using insert_multi...');
1188  tpps_chado_insert_multi($options['records']);
1189  tpps_job_logger_write('[INFO] - Done.');
1190  $job->logMessage('[INFO] - Done.');
1191  }
1192 }
1193 
1206 function tpps_submit_genotype(array &$form_state, array $species_codes, $i, TripalJob &$job = NULL) {
1207  tpps_job_logger_write('[INFO] - Submitting genotype data...');
1208  $job->logMessage('[INFO] - Submitting genotype data...');
1209  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
1210  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
1211  $genotype = $fourthpage["organism-$i"]['genotype'] ?? NULL;
1212  if (empty($genotype)) {
1213  return;
1214  }
1215  tpps_submission_add_tag($form_state['accession'], 'Genotype');
1216 
1217  $project_id = $form_state['ids']['project_id'];
1218  $record_group = variable_get('tpps_record_group', 10000);
1219 
1220  $genotype_count = 0;
1221  $genotype_total = 0;
1222  $seq_var_cvterm = tpps_load_cvterm('sequence_variant')->cvterm_id;
1223  $overrides = array(
1224  'genotype_call' => array(
1225  'variant' => array(
1226  'table' => 'feature',
1227  'columns' => array(
1228  'variant_id' => 'feature_id',
1229  ),
1230  ),
1231  'marker' => array(
1232  'table' => 'feature',
1233  'columns' => array(
1234  'marker_id' => 'feature_id',
1235  ),
1236  ),
1237  ),
1238  );
1239 
1240  $records = array(
1241  'feature' => array(),
1242  'genotype' => array(),
1243  'genotype_call' => array(),
1244  'stock_genotype' => array(),
1245  );
1246 
1247  $multi_insert_options = array(
1248  'fk_overrides' => $overrides,
1249  'entities' => array(
1250  'label' => 'Genotype',
1251  'table' => 'genotype',
1252  ),
1253  );
1254 
1255  $options = array(
1256  'records' => $records,
1257  'tree_info' => $form_state['tree_info'],
1258  'species_codes' => $species_codes,
1259  'genotype_count' => &$genotype_count,
1260  'genotype_total' => &$genotype_total,
1261  'project_id' => $project_id,
1262  'seq_var_cvterm' => $seq_var_cvterm,
1263  'multi_insert' => &$multi_insert_options,
1264  'job' => &$job,
1265  );
1266 
1267  if ($genotype['ref-genome'] == 'manual' or $genotype['ref-genome'] == 'manual2' or $genotype['ref-genome'] == 'url') {
1268  if ($genotype['tripal_fasta']['file_upload']) {
1269  // Uploaded new file.
1270  $assembly_user = $genotype['tripal_fasta']['file_upload'];
1271  tpps_add_project_file($form_state, $assembly_user);
1272  }
1273  if ($genotype['tripal_fasta']['file_upload_existing']) {
1274  // Uploaded existing file.
1275  $assembly_user = $genotype['tripal_fasta']['file_upload_existing'];
1276  tpps_add_project_file($form_state, $assembly_user);
1277  }
1278  if ($genotype['tripal_fasta']['file_remote']) {
1279  // Provided url to file.
1280  $assembly_user = $genotype['tripal_fasta']['file_remote'];
1281  tpps_chado_insert_record('projectprop', array(
1282  'project_id' => $project_id,
1283  'type_id' => tpps_load_cvterm('file_path')->cvterm_id,
1284  'value' => $assembly_user,
1285  'rank' => $form_state['file_rank'],
1286  ));
1287  $form_state['file_rank']++;
1288  }
1289  }
1290  elseif ($genotype['ref-genome'] != 'none') {
1291  tpps_chado_insert_record('projectprop', array(
1292  'project_id' => $project_id,
1293  'type_id' => tpps_load_cvterm('reference_genome')->cvterm_id,
1294  'value' => $genotype['ref-genome'],
1295  ));
1296  }
1297 
1298  if (!empty($genotype['files']['file-type']['SNPs Genotype Assay'])) {
1299  $snp_fid = $genotype['files']['snps-assay'];
1300  tpps_add_project_file($form_state, $snp_fid);
1301 
1302  $options['type'] = 'snp';
1303  $options['headers'] = tpps_file_headers($snp_fid);
1304  $options['marker'] = 'SNP';
1305  $options['type_cvterm'] = tpps_load_cvterm('snp')->cvterm_id;
1306 
1307  if (!empty($genotype['files']['file-type']['SNPs Associations'])) {
1308  $assoc_fid = $genotype['files']['snps-association'];
1309  tpps_add_project_file($form_state, $assoc_fid);
1310 
1311  $options['records']['featureloc'] = array();
1312  $options['records']['featureprop'] = array();
1313  $options['records']['feature_relationship'] = array();
1314  $options['records']['feature_cvterm'] = array();
1315  $options['records']['feature_cvtermprop'] = array();
1316 
1317  $options['associations'] = array();
1318  $options['associations_tool'] = $genotype['files']['snps-association-tool'];
1319  $options['associations_groups'] = $genotype['files']['snps-association-groups'];
1320  $options['scaffold_cvterm'] = tpps_load_cvterm('scaffold')->cvterm_id;
1321  $options['phenotype_meta'] = $form_state['data']['phenotype_meta'];
1322  $options['pub_id'] = $form_state['ids']['pub_id'];
1323 
1324  switch ($genotype['files']['snps-association-type']) {
1325  case 'P value':
1326  $options['associations_type'] = tpps_load_cvterm('p_value')->cvterm_id;
1327  break;
1328 
1329  case 'Genomic Inflation Factor (GIF)':
1330  $options['associations_type'] = tpps_load_cvterm('lambda')->cvterm_id;
1331  break;
1332 
1333  case 'P-adjusted (FDR) / Q value':
1334  $options['associations_type'] = tpps_load_cvterm('q_value')->cvterm_id;
1335  break;
1336 
1337  case 'P-adjusted (FWE)':
1338  $options['associations_type'] = tpps_load_cvterm('p_adj_fwe')->cvterm_id;
1339  break;
1340 
1341  case 'P-adjusted (Bonferroni)':
1342  $options['associations_type'] = tpps_load_cvterm('bonferroni')->cvterm_id;
1343  break;
1344 
1345  default:
1346  break;
1347  }
1348  tpps_job_logger_write('[INFO] - Processing snp_association file data...');
1349  $job->logMessage('[INFO] - Processing snp_association file data...');
1350  tpps_file_iterator($assoc_fid, 'tpps_process_snp_association', $options);
1351  tpps_job_logger_write('[INFO] - Done.');
1352  $job->logMessage('[INFO] - Done.');
1353 
1354  $multi_insert_options['fk_overrides']['featureloc'] = array(
1355  'srcfeature' => array(
1356  'table' => 'feature',
1357  'columns' => array(
1358  'srcfeature_id' => 'feature_id',
1359  ),
1360  ),
1361  );
1362  $multi_insert_options['fk_overrides']['feature_relationship'] = array(
1363  'subject' => array(
1364  'table' => 'feature',
1365  'columns' => array(
1366  'subject_id' => 'feature_id',
1367  ),
1368  ),
1369  'object' => array(
1370  'table' => 'feature',
1371  'columns' => array(
1372  'object_id' => 'feature_id',
1373  ),
1374  ),
1375  );
1376 
1377  $pop_struct_fid = $genotype['files']['snps-pop-struct'];
1378  tpps_add_project_file($form_state, $pop_struct_fid);
1379 
1380  $kinship_fid = $genotype['files']['snps-kinship'];
1381  tpps_add_project_file($form_state, $kinship_fid);
1382  }
1383  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1384  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1385  tpps_file_iterator($snp_fid, 'tpps_process_genotype_spreadsheet', $options);
1386  tpps_job_logger_write('[INFO] - Done.');
1387  $job->logMessage('[INFO] - Done.');
1388 
1389  tpps_job_logger_write('[INFO] - Inserting genotype_spreadsheet data into database using insert_multi...');
1390  $job->logMessage('[INFO] - Inserting genotype_spreadsheet data into database using insert_multi...');
1391  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1392  tpps_job_logger_write('[INFO] - Done');
1393  $job->logMessage('[INFO] - Done');
1394  $options['records'] = $records;
1395  $genotype_total += $genotype_count;
1396  tpps_job_logger_write('[INFO] - Genotype count:' . $genotype_count);
1397  $job->logMessage('[INFO] - Genotype count:' . $genotype_count);
1398  $genotype_count = 0;
1399  }
1400 
1401  if (!empty($genotype['files']['file-type']['Assay Design']) and $genotype['marker-type']['SNPs']) {
1402  if ($genotype['files']['assay-load'] == 'new') {
1403  $design_fid = $genotype['files']['assay-design'];
1404  }
1405  if ($genotype['files']['assay-load'] != 'new') {
1406  $design_fid = $genotype['files']['assay-load'];
1407  }
1408  tpps_add_project_file($form_state, $design_fid);
1409  }
1410 
1411  if (!empty($genotype['files']['file-type']['SSRs/cpSSRs Genotype Spreadsheet'])) {
1412  $ssr_fid = $genotype['files']['ssrs'];
1413  tpps_add_project_file($form_state, $ssr_fid);
1414 
1415  $options['type'] = 'ssrs';
1416  $options['headers'] = tpps_ssrs_headers($ssr_fid, $genotype['files']['ploidy']);
1417  $options['marker'] = $genotype['SSRs/cpSSRs'];
1418  $options['type_cvterm'] = tpps_load_cvterm('ssr')->cvterm_id;
1419  $options['empty'] = $genotype['files']['ssrs-empty'];
1420  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1421  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1422  tpps_file_iterator($ssr_fid, 'tpps_process_genotype_spreadsheet', $options);
1423  tpps_job_logger_write('[INFO] - Done.');
1424  $job->logMessage('[INFO] - Done.');
1425 
1426  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1427  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1428  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1429  tpps_job_logger_write('[INFO] - Done');
1430  $job->logMessage('[INFO] - Done.');
1431  $options['records'] = $records;
1432  $genotype_count = 0;
1433 
1434  if (!empty($genotype['files']['ssr-extra-check'])) {
1435  $extra_fid = $genotype['files']['ssrs_extra'];
1436  tpps_add_project_file($form_state, $extra_fid);
1437 
1438  $options['marker'] = $genotype['files']['extra-ssr-type'];
1439  $options['headers'] = tpps_ssrs_headers($extra_fid, $genotype['files']['extra-ploidy']);
1440  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1441  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1442  tpps_file_iterator($extra_fid, 'tpps_process_genotype_spreadsheet', $options);
1443  tpps_job_logger_write('[INFO] - Done.');
1444  $job->logMessage('[INFO] - Done.');
1445 
1446  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1447  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1448  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1449  tpps_job_logger_write('[INFO] - Done.');
1450  $job->logMessage('[INFO] - Done.');
1451  $options['records'] = $records;
1452  $genotype_count = 0;
1453  }
1454  }
1455 
1456  if (!empty($genotype['files']['file-type']['Indel Genotype Spreadsheet'])) {
1457  $indel_fid = $genotype['files']['indels'];
1458  tpps_add_project_file($form_state, $indel_fid);
1459 
1460  $options['type'] = 'indel';
1461  $options['headers'] = tpps_file_headers($indel_fid);
1462  $options['marker'] = 'Indel';
1463  $options['type_cvterm'] = tpps_load_cvterm('indel')->cvterm_id;
1464  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1465  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1466  tpps_file_iterator($indel_fid, 'tpps_process_genotype_spreadsheet', $options);
1467  tpps_job_logger_write('[INFO] - Done.');
1468  $job->logMessage('[INFO] - Done.');
1469 
1470  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1471  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1472  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1473  tpps_job_logger_write('[INFO] - Done.');
1474  $job->logMessage('[INFO] - Done.');
1475  $options['records'] = $records;
1476  $genotype_total += $genotype_count;
1477  tpps_job_logger_write('[INFO] - Genotype count:' . $genotype_total);
1478  $job->logMessage('[INFO] - Genotype count:' . $genotype_total);
1479  $genotype_count = 0;
1480  }
1481 
1482  if (!empty($genotype['files']['file-type']['Other Marker Genotype Spreadsheet'])) {
1483  $other_fid = $genotype['files']['other'];
1484  tpps_add_project_file($form_state, $other_fid);
1485 
1486  $options['headers'] = tpps_file_headers($other_fid);
1487  if (!empty($genotype['files']['other-groups'])) {
1488  $groups = $genotype['files']['other-groups'];
1489  $options['headers'] = tpps_other_marker_headers($other_fid, $groups['Genotype Data'][0]);
1490  $options['tree_id'] = $groups['Tree Id'][1];
1491  }
1492 
1493  $options['type'] = 'other';
1494  $options['marker'] = $genotype['other-marker'];
1495  $options['type_cvterm'] = tpps_load_cvterm('genetic_marker')->cvterm_id;
1496  tpps_job_logger_write('[INFO] - Processing genotype_spreadsheet file data...');
1497  $job->logMessage('[INFO] - Processing genotype_spreadsheet file data...');
1498  tpps_file_iterator($other_fid, 'tpps_process_genotype_spreadsheet', $options);
1499  tpps_job_logger_write('[INFO] - Done.');
1500  $job->logMessage('[INFO] - Done.');
1501 
1502  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1503  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1504  tpps_chado_insert_multi($options['records'], $multi_insert_options);
1505  tpps_job_logger_write('[INFO] - Done.');
1506  $job->logMessage('[INFO] - Done.');
1507  $options['records'] = $records;
1508  $genotype_count = 0;
1509  }
1510 
1511  // check to make sure admin has not set disable_vcf_importing
1512  $disable_vcf_import = 0;
1513  if(isset($firstpage['disable_vcf_import'])) {
1514  $disable_vcf_import = $firstpage['disable_vcf_import'];
1515  }
1516  tpps_job_logger_write('[INFO] Disable VCF Import is set to ' . $disable_vcf_import . ' (0 means allow vcf import, 1 ignore vcf import)');
1517 
1518 
1519  if (!empty($genotype['files']['file-type']['VCF'])) {
1520  if($disable_vcf_import == 0) {
1521  // @todo we probably want to use tpps_file_iterator to parse vcf files.
1522  $vcf_fid = $genotype['files']['vcf'];
1523  tpps_add_project_file($form_state, $vcf_fid);
1524 
1525  $marker = 'SNP';
1526 
1527  $records['genotypeprop'] = array();
1528 
1529  $snp_cvterm = tpps_load_cvterm('snp')->cvterm_id;
1530  $format_cvterm = tpps_load_cvterm('format')->cvterm_id;
1531  $qual_cvterm = tpps_load_cvterm('quality_value')->cvterm_id;
1532  $filter_cvterm = tpps_load_cvterm('filter')->cvterm_id;
1533  $freq_cvterm = tpps_load_cvterm('allelic_frequency')->cvterm_id;
1534  $depth_cvterm = tpps_load_cvterm('read_depth')->cvterm_id;
1535  $n_sample_cvterm = tpps_load_cvterm('number_samples')->cvterm_id;
1536 
1537  $vcf_file = file_load($vcf_fid);
1538  $location = tpps_get_location($vcf_file->uri);
1539  echo "VCF location: $location\n";
1540 
1541  $vcf_content = gzopen($location, 'r');
1542  $stocks = array();
1543  $format = "";
1544  $current_id = $form_state['ids']['organism_ids'][$i];
1545  $species_code = $species_codes[$current_id];
1546 
1547  // dpm('start: ' . date('r'));.
1548  echo "[INFO] Processing Genotype VCF file\n";
1549  $file_progress_line_count = 0;
1550  $record_count = 0;
1551  while (($vcf_line = gzgets($vcf_content)) !== FALSE) {
1552  $file_progress_line_count++;
1553  if($file_progress_line_count % 10000 == 0 && $file_progress_line_count != 0) {
1554  echo '[INFO] [VCF PROCESSING STATUS] ' . $file_progress_line_count . " lines done\n";
1555  }
1556  if ($vcf_line[0] != '#' && stripos($vcf_line,'.vcf') === FALSE && trim($vcf_line) != "" && str_replace("\0", "", $vcf_line) != "") {
1557  $line_process_start_time = microtime(true);
1558  $record_count = $record_count + 1;
1559  print_r('Record count:' . $record_count . "\n");
1560  $genotype_count += count($stocks);
1561  $vcf_line = explode("\t", $vcf_line);
1562  $scaffold_id = &$vcf_line[0];
1563  $position = &$vcf_line[1];
1564  $variant_name = &$vcf_line[2];
1565  $ref = &$vcf_line[3];
1566  $alt = &$vcf_line[4];
1567  $qual = &$vcf_line[5];
1568  $filter = &$vcf_line[6];
1569  $info = &$vcf_line[7];
1570 
1571  if (empty($variant_name) or $variant_name == '.') {
1572  // $variant_name = "{$scaffold_id}{$position}$ref:$alt";
1573  $variant_name = $scaffold_id . '_' . $position . 'SNP';
1574  }
1575  // $marker_name = $variant_name . $marker; // Original by Peter
1576  $marker_name = $scaffold_id . '_' . $position; // Emily updated suggestion on Tuesday August 9th 2022
1577  $description = "$ref:$alt";
1578  // $genotype_name = "$marker-$species_code-$scaffold_id-$position"; // Original by Peter
1579 
1580  // Instead, we have multiple genotypes we need to generate, so lets do a key val array
1581  $detected_genotypes = array();
1582  $first_genotypes = array(); // used to save the first genotype in each row of the VCF (used for genotype_call table)
1583  $count_columns = count($vcf_line);
1584  for ($j = 9; $j < $count_columns; $j++) {
1585 
1586  $genotype_combination = tpps_submit_vcf_render_genotype_combination($vcf_line[$j], $ref, $alt);
1587 
1588  $detected_genotypes[$marker_name . $genotype_combination] = TRUE;
1589 
1590  // Record the first genotype name to use for genotype_call table
1591  if($j == 9) {
1592  // print_r('[First Genotype]:' . $marker_name . $genotype_combination . "\n");
1593  $first_genotypes[$marker_name . $genotype_combination] = TRUE;
1594  }
1595 
1596  }
1597 
1598  // print_r('[New Feature]: ' . $marker_name . "\n");
1599  $records['feature'][$marker_name] = array(
1600  'organism_id' => $current_id,
1601  'uniquename' => $marker_name,
1602  'type_id' => $seq_var_cvterm,
1603  );
1604 
1605  // print_r('[New Feature variant_name]: ' . $variant_name . "\n");
1606  $records['feature'][$variant_name] = array(
1607  'organism_id' => $current_id,
1608  'uniquename' => $variant_name,
1609  'type_id' => $seq_var_cvterm,
1610  );
1611 
1612  // Rish 12/08/2022: So we have multiple genotypes created
1613  // So I adjusted some of this code into a for statement
1614  // since the genotype_desc seems important and so I modified it to be unique
1615  // and based on the genotype_name
1616  $genotype_names = array_keys($detected_genotypes);
1617 
1618  // print_r($detected_genotypes);
1619  echo "\n";
1620  echo "line#$file_progress_line_count ";
1621  print_r('genotypes per line: ' . count($genotype_names) . " ");
1622 
1623  $genotype_name_progress_count = 0;
1624  foreach ($genotype_names as $genotype_name) {
1625  $genotype_name_progress_count++;
1626  $genotype_desc = "$marker-$species_code-$genotype_name-$position-$description";
1627  // print_r('[DEBUG: Genotype] genotype_name: ' . $genotype_name . ' ' . 'genotype_desc: ' . $genotype_desc . "\n");
1628 
1629 
1630  $records['genotype'][$genotype_desc] = array(
1631  'name' => $genotype_name,
1632  'uniquename' => $genotype_desc,
1633  'description' => $description,
1634  'type_id' => $snp_cvterm,
1635  );
1636 
1637  if ($format != "") {
1638  $records['genotypeprop']["$genotype_desc-format"] = array(
1639  'type_id' => $format_cvterm,
1640  'value' => $format,
1641  '#fk' => array(
1642  'genotype' => $genotype_desc,
1643  ),
1644  );
1645  }
1646 
1647  $vcf_cols_count = count($vcf_line);
1648 
1649  echo "gen_name_index:$genotype_name_progress_count colcount:$vcf_cols_count ";
1650  for ($j = 9; $j < $vcf_cols_count; $j++) {
1651  // Rish: This was added on 09/12/2022
1652  // This gets the name of the current genotype for the tree_id column
1653  // being checked.
1654  $column_genotype_name = $marker_name . tpps_submit_vcf_render_genotype_combination($vcf_line[$j], $ref, $alt);
1655  if($column_genotype_name == $genotype_name) {
1656  // Found a match between the tree_id genotype and the genotype_name from records
1657 
1658  // print_r('[genotype_call insert]: ' . "{$stocks[$j - 9]}-$genotype_name" . "\n");
1659  $records['genotype_call']["{$stocks[$j - 9]}-$genotype_name"] = array(
1660  'project_id' => $project_id,
1661  'stock_id' => $stocks[$j - 9],
1662  '#fk' => array(
1663  'genotype' => $genotype_desc,
1664  'variant' => $variant_name,
1665  'marker' => $marker_name,
1666  ),
1667  );
1668 
1669  $records['stock_genotype']["{$stocks[$j - 9]}-$genotype_name"] = array(
1670  'stock_id' => $stocks[$j - 9],
1671  '#fk' => array(
1672  'genotype' => $genotype_desc,
1673  ),
1674  );
1675  }
1676 
1677  }
1678 
1679  // Quality score.
1680  $records['genotypeprop']["$genotype_desc-qual"] = array(
1681  'type_id' => $qual_cvterm,
1682  'value' => $qual,
1683  '#fk' => array(
1684  'genotype' => $genotype_desc,
1685  ),
1686  );
1687 
1688  // filter: pass/fail.
1689  $records['genotypeprop']["$genotype_desc-filter"] = array(
1690  'type_id' => $filter_cvterm,
1691  'value' => ($filter == '.') ? "P" : "NP",
1692  '#fk' => array(
1693  'genotype' => $genotype_desc,
1694  ),
1695  );
1696 
1697  // Break up info column.
1698  $info_vals = explode(";", $info);
1699  foreach ($info_vals as $key => $val) {
1700  $parts = explode("=", $val);
1701  unset($info_vals[$key]);
1702  $info_vals[$parts[0]] = isset($parts[1]) ? $parts[1] : '';
1703  }
1704 
1705  // Allele frequency, assuming that the info code for allele
1706  // frequency is 'AF'.
1707  if (isset($info_vals['AF']) and $info_vals['AF'] != '') {
1708  $records['genotypeprop']["$genotype_desc-freq"] = array(
1709  'type_id' => $freq_cvterm,
1710  'value' => $info_vals['AF'],
1711  '#fk' => array(
1712  'genotype' => $genotype_desc,
1713  ),
1714  );
1715  }
1716 
1717  // Depth coverage, assuming that the info code for depth coverage is
1718  // 'DP'.
1719  if (isset($info_vals['DP']) and $info_vals['DP'] != '') {
1720  $records['genotypeprop']["$genotype_desc-depth"] = array(
1721  'type_id' => $depth_cvterm,
1722  'value' => $info_vals['DP'],
1723  '#fk' => array(
1724  'genotype' => $genotype_desc,
1725  ),
1726  );
1727  }
1728 
1729  // Number of samples, assuming that the info code for number of
1730  // samples is 'NS'.
1731  if (isset($info_vals['NS']) and $info_vals['NS'] != '') {
1732  $records['genotypeprop']["$genotype_desc-n_sample"] = array(
1733  'type_id' => $n_sample_cvterm,
1734  'value' => $info_vals['NS'],
1735  '#fk' => array(
1736  'genotype' => $genotype_desc,
1737  ),
1738  );
1739  }
1740  }
1741  $line_process_end_time = microtime(true);
1742  $line_process_elapsed_time = $line_process_end_time - $line_process_start_time;
1743  echo " PHP Proctime: $line_process_elapsed_time seconds\n";
1744  if(!isset($line_process_cumulative_time)) {
1745  $line_process_cumulative_time = 0;
1746  }
1747  $line_process_cumulative_time += $line_process_elapsed_time;
1748  echo "Cumulative PHP proctime: " . $line_process_cumulative_time . " seconds\n";
1749  echo "\nGenotype call records to insert (LINE:$file_progress_line_count): " . count($records['genotype_call']);
1750  echo "\nrecord group threshold: $record_group ";
1751  // throw new Exception('DEBUG');
1752  // Tripal Job has issues when all submissions are made at the same
1753  // time, so break them up into groups of 10,000 genotypes along with
1754  // their relevant genotypeprops.
1755  if ($genotype_count > $record_group) {
1756  tpps_job_logger_write('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1757  $job->logMessage('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1758  tpps_job_logger_write('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1759  $job->logMessage('[INFO] - Last bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1760  tpps_job_logger_write('[INFO] - Last insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1761  $job->logMessage('[INFO] - Last insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1762  $genotype_count = 0;
1763  $insert_start_time = microtime(true);
1764  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1765  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1766  tpps_chado_insert_multi($records, $multi_insert_options);
1767  tpps_job_logger_write('[INFO] - Done.');
1768  $job->logMessage('[INFO] - Done.');
1769  $insert_end_time = microtime(true);
1770  $insert_elapsed_time = $insert_end_time - $insert_start_time;
1771  tpps_job_logger_write('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1772  $job->logMessage('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1773  tpps_job_logger_write('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1774  $job->logMessage('[INFO] - Bulk insert of ' . $record_group . ' took ' . $insert_elapsed_time . ' seconds');
1775  if(!isset($insert_cumulative_time)) {
1776  $insert_cumulative_time = 0;
1777  }
1778  $insert_cumulative_time += $insert_elapsed_time;
1779  tpps_job_logger_write('[INFO] - Insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1780  $job->logMessage('[INFO] - Insert cumulative time: ' . $insert_cumulative_time . ' seconds');
1781  // throw new Exception('DEBUG');
1782  $records = array(
1783  'feature' => array(),
1784  'genotype' => array(),
1785  'genotype_call' => array(),
1786  'genotypeprop' => array(),
1787  'stock_genotype' => array(),
1788  );
1789  $genotype_count = 0;
1790  }
1791  }
1792  elseif (preg_match('/##FORMAT=/', $vcf_line)) {
1793  $format .= substr($vcf_line, 9, -1);
1794  }
1795  elseif (preg_match('/#CHROM/', $vcf_line)) {
1796  $vcf_line = explode("\t", $vcf_line);
1797  for ($j = 9; $j < count($vcf_line); $j++) {
1798  $stocks[] = $form_state['tree_info'][trim($vcf_line[$j])]['stock_id'];
1799  }
1800  }
1801  }
1802  // Insert the last set of values.
1803  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
1804  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
1805  tpps_chado_insert_multi($records, $multi_insert_options);
1806  tpps_job_logger_write('[INFO] - Done.');
1807  $job->logMessage('[INFO] - Done.');
1808  unset($records);
1809  $genotype_count = 0;
1810  // dpm('done: ' . date('r'));.
1811  }
1812  }
1813 }
1814 
1820  // drush php-eval 'include("/var/www/Drupal/sites/all/modules/TGDR/forms/submit/submit_all.php"); tpps_generate_popstruct("TGDR675", "/var/www/Drupal/sites/default/files/popstruct_temp/Panel4SNPv3.vcf");'
1821 function tpps_generate_popstruct($study_accession, $vcf_location) {
1822  // Perform basic checks
1823  if ($study_accession == "") {
1824  tpps_job_logger_write("[FATAL ERROR] You must enter a non-empty study accession. Aborting.\n");
1825  return;
1826  }
1827 
1828  if ($vcf_location == "") {
1829  tpps_job_logger_write("[FATAL ERROR] You must enter a non-empty vcf_location. Aborting.\n");
1830  return;
1831  }
1832 
1833  // Get the correct path of the public directory
1834  $path = 'public://';
1835  $public_path = drupal_realpath($path);
1836  tpps_job_logger_write('[PUBLIC PATH] ' . $public_path . "\n");
1837  echo('[PUBLIC PATH] ' . $public_path . "\n");
1838 
1839  // Get the module path
1840  $module_path = DRUPAL_ROOT . '/' . drupal_get_path('module', 'tpps');
1841  tpps_job_logger_write('[MODULE PATH] ' . $module_path . "\n");
1842  echo('[MODULE PATH] ' . $module_path . "\n");
1843 
1844  // Tools path
1845  $tools_path = $module_path . "/tools";
1846  tpps_job_logger_write('[TOOLS PATH] ' . $tools_path . "\n");
1847  echo('[TOOLS PATH] ' . $tools_path . "\n");
1848 
1849  // Make temp directory just in case for vcf files etc
1850  $popstruct_temp_dir = $public_path . '/popstruct_temp/' . $study_accession;
1851  mkdir($popstruct_temp_dir, 0755, true);
1852 
1853  // In case there are already files in here, delete them
1854  $files = glob($popstruct_temp_dir . '/*'); // get all file names
1855  foreach($files as $file){ // iterate files
1856  if(is_file($file)) {
1857  tpps_job_logger_write("[CLEAN UP BEFORE BEGIN] Removing $file from the popstruct directory");
1858  echo("[CLEAN UP BEFORE BEGIN] Removing $file from the popstruct directory\n");
1859  tpps_job_logger_write("[FILE CLEAN/DELETE] $file");
1860  echo("[FILE CLEAN/DELETE] $file\n");
1861  // echo "TODO: Perform the actual delete\n";
1862  unlink($file); // delete file
1863  }
1864  }
1865 
1866  $flag_using_temp_file = false;
1867 
1868  // This variable is used to process the vcf since we may have to gunzip
1869  // the file. So we need to keep the original location variable (by not overwriting it).
1870  $vcf_location_temp = $vcf_location;
1871  if (stripos($vcf_location, '.gz') !== FALSE) {
1872  // we need to gunzip the file
1873  // Set flag to true that we are using a temp file
1874  // This will need to be deleted afterwards
1875  $flag_using_temp_file = true;
1876 
1877  // Get file name without extension so we use that as the gunzipped filename
1878  $file_name_without_ext = basename($vcf_location, ".gz");
1879 
1880  // Gunzip the the file
1881  shell_exec("gunzip -c " . $vcf_location . " > " . $popstruct_temp_dir . "/" . $file_name_without_ext);
1882 
1883  // Set the vcf_location_temp to where the gunzip file is
1884  $vcf_location_temp = $popstruct_temp_dir . "/" . $file_name_without_ext;
1885  }
1886 
1887  tpps_job_logger_write("[VCF_LOCATION_TEMP] $vcf_location_temp");
1888  echo("[VCF_LOCATION_TEMP] $vcf_location_temp");
1889 
1890  // So now we have th $vcf_location_temp which should be used accordingly
1891 
1892 
1893  // Step 1 - Perform PLINK
1894  // TODO: RESTORE THIS
1895  tpps_job_logger_write("PERFORM PLINK");
1896  echo("PERFORM PLINK");
1897  echo shell_exec($tools_path . '/plink/plink --vcf ' . $vcf_location_temp . " --allow-extra-chr --double-id --make-bed --out " . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink');
1898 
1899 
1900  // Step 2 by x - Fast Structure run
1901  // To get fastStruct installed, we need the dependenices
1902  // These dependencies seem to need Python 3.8 / pip3
1903  // For CENTOS
1904  // sudo yum -y groupinstall "Development Tools"
1905  // sudo yum -y install openssl-devel bzip2-devel libffi-devel xz-devel
1906 
1907  // TODO: RESTORE THIS
1908  for($i=1; $i <= 10; $i++) {
1909  tpps_job_logger_write("Performing FastStructure for k = $i\n");
1910  echo("Performing FastStructure for k = $i\n");
1911  $fast_structure_cmd = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib; export CFLAGS="-I/usr/local/include"; export LDFLAGS="-L/usr/local/lib"; python ' . $tools_path . "/fastStructure/structure.py -K " . $i . " --input=" . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink' . " --output=" . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink' . ' --full;';
1912  echo shell_exec($fast_structure_cmd);
1913  }
1914 
1915 
1916  // Step 3 is to select K from previous runs
1917  // TODO: RESTORE THIS
1918  tpps_job_logger_write("[INFO] Perform chooseK...\n");
1919  echo("[INFO] Perform chooseK...\n");
1920  $chooseK_cmd = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib; export CFLAGS="-I/usr/local/include"; export LDFLAGS="-L/usr/local/lib"; python ' . $tools_path . '/fastStructure/chooseK.py --input=' . $popstruct_temp_dir . '/' . $study_accession. '_popstruct_plink';
1921  $chooseK_output = shell_exec($chooseK_cmd);
1922  echo $chooseK_output . "\n";
1923 
1924  // Step 3b - from the output, get the suggested K value
1925  // Go through each line in $chooseK_output
1926  // TODO: RESTORE THIS
1927  $chooseK_lines = explode("\n", $chooseK_output);
1928  $chooseK_lines_count = count($chooseK_lines);
1929  $chooseK_optimal = 0;
1930  for ($i=0; $i<$chooseK_lines_count; $i++) {
1931  $line = $chooseK_lines[$i];
1932  if ($i == 0) {
1933  $chooseK_parts = explode('Model complexity that maximizes marginal likelihood = ', $line);
1934  }
1935  else if ($i == 1) {
1936  $chooseK_parts = explode('Model components used to explain structure in data = ', $line);
1937  }
1938 
1939  // Determine the highest value for use
1940  if($chooseK_parts[1] > $chooseK_optimal) {
1941  $chooseK_optimal = $chooseK_parts[1];
1942  }
1943  }
1944  tpps_job_logger_write("Optimal K is " . $chooseK_optimal . "\n");
1945  echo("Optimal K is " . $chooseK_optimal . "\n");
1946 
1947 
1948 
1949  // Step 4 - awk and sed to clean up files
1950  // TODO: RESTORE THIS
1951  tpps_job_logger_write("AWK AND SED adjustments");
1952  echo("AWK AND SED adjustments");
1953  $cmd_custom_cmds1 = "awk 'BEGIN { OFS = \"_\" } ;{print $1,$2}' " . $popstruct_temp_dir . '/' . $study_accession . '_popstruct_plink.fam > ' . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPanel.txt;";
1954  $cmd_custom_cmds1 .= "sed 's/_/\t/g' " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPanel.txt > " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPaneltab.txt;";
1955  $cmd_custom_cmds1 .= "awk '{print $1,$2}' " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPaneltab.txt > " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDfamPanel.txt;";
1956  echo shell_exec($cmd_custom_cmds1);
1957 
1958  // // Step 5 - count the population
1959  $count_output = shell_exec("wc -l " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDPanel.txt");
1960  tpps_job_logger_write($count_output . "\n");
1961  echo($count_output . "\n");
1962  $count_output_parts = explode(' ', $count_output);
1963  $population_count = $count_output_parts[0];
1964  tpps_job_logger_write("Population count:" . $population_count . "\n");
1965  echo("Population count:" . $population_count . "\n");
1966 
1967  // Step 6 - Execute R script which generates popstruct from Panel using chooseK optimal value
1968  // TODO: RESTORE THIS
1969  tpps_job_logger_write("RScript popstruct_from_panel execution\n");
1970  echo("RScript popstruct_from_panel execution\n");
1971  $cmd_custom_r_code = "Rscript " . $tools_path . "/popstruct_from_panel.R ";
1972  $cmd_custom_r_code .= $study_accession . " ";
1973  $cmd_custom_r_code .= $population_count . " ";
1974  $cmd_custom_r_code .= $popstruct_temp_dir . '/' . $study_accession . "_popstruct_plink." . $chooseK_optimal. ".meanQ ";
1975  $cmd_custom_r_code .= $popstruct_temp_dir . '/' . $study_accession . "_popstruct_IDfamPanel.txt ";
1976  $cmd_custom_r_code .= $popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel.txt";
1977 
1978  echo shell_exec($cmd_custom_r_code);
1979 
1980  // Step 7 - Cleaning up PopPanel columns...
1981  // TODO: RESTORE THIS
1982  $cmd_remove_column_code = "cut -d\\ -f2- " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel.txt > " . $popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel_final.txt";
1983  echo shell_exec($cmd_remove_column_code);
1984 
1985 
1986  // TODO: Push to postgres popstruct table
1987  // READ THE OUTPUT FILE, GET THE TREE_IDS AND LOCATIONS
1988  // THEN GO THROUGH THE OUTPUT FILE AND GET THE POPULATION GROUPS
1989  // THEN ADD THIS TO THE TABLE
1990  $file_handle = fopen($popstruct_temp_dir . '/' . $study_accession . "_popstruct_PopPanel_final.txt", "r");
1991  $tree_data = [];
1992  if ($file_handle) {
1993  while (($line = fgets($file_handle)) !== false) {
1994  // process the line read.
1995  $line_space_parts = explode(" ", $line);
1996  $tree_id = $study_accession . '-' . $line_space_parts[0];
1997  $tree_info = [
1998  'tree_id' => $tree_id,
1999  'population' => 0,
2000  'latitude' => 0,
2001  'longitude' => 0,
2002  'study_accession' => $study_accession
2003  ];
2004  if(count($line_space_parts) >= 4) {
2005  $population_group = $line_space_parts[3];
2006  if (strpos($population_group, 'e') !== FALSE) {
2007  $population_group = 1;
2008  }
2009  else {
2010  $population_group = intval(ceil($population_group)) + 1;
2011  }
2012  echo $population_group . ',';
2013  $tree_info['population'] = $population_group;
2014  $tree_data[$tree_id] = $tree_info;
2015  }
2016  }
2017  // echo "\n";
2018  fclose($file_handle);
2019 
2020  // Remove all records from the popstruct table for this study
2021  tpps_job_logger_write("Removing all popstruct data for accession $study_accession\n");
2022  echo("Removing all popstruct data for accession $study_accession\n");
2023  chado_query("DELETE FROM public.cartogratree_popstruct_layer WHERE study_accession = '" . $study_accession . "';");
2024 
2025 
2026  // Now query the locations of these tree_ids, so build an SQL statement
2027  $sql_locations = 'SELECT * FROM public.ct_trees WHERE uniquename IN (';
2028  $sql_tree_ids_list = '';
2029  $tree_id_count = 0;
2030  $sql_tree_ids_list = '';
2031  foreach($tree_data as $tree_info) {
2032  if($tree_id_count != 0) {
2033  $sql_tree_ids_list .= ',';
2034  }
2035  $sql_tree_ids_list .= "'" . $tree_info['tree_id'] . "'";
2036  $tree_id_count = $tree_id_count + 1;
2037  }
2038  $sql_locations .= $sql_tree_ids_list;
2039  // echo $sql_locations . "\n";
2040  $sql_locations .= ')';
2041  $results = chado_query($sql_locations);
2042  foreach($results as $row) {
2043  $tree_id = $row->uniquename;
2044  // echo $tree_id . "\n";
2045  $tree_data[$tree_id]['latitude'] = $row->latitude;
2046  $tree_data[$tree_id]['longitude'] = $row->longitude;
2047  $insert_sql = "INSERT INTO public.cartogratree_popstruct_layer (uniquename,population,study_accession,latitude,longitude) ";
2048  $insert_sql .= "VALUES (";
2049  $insert_sql .= "'" . $tree_id ."',". $tree_data[$tree_id]['population'] .",";
2050  $insert_sql .= "'" . $study_accession ."',". $tree_data[$tree_id]['latitude'] ."," . $tree_data[$tree_id]['latitude'] . "";
2051  $insert_sql .= ")";
2052  // echo $insert_sql . "\n";
2053  chado_query($insert_sql);
2054  }
2055 
2056  tpps_job_logger_write("POPSTRUCT completed.\n");
2057  echo("POPSTRUCT completed.\n");
2058 
2059  }
2060 
2061 }
2062 
2073 function tpps_submit_vcf_render_genotype_combination($raw_value, $ref, $alt) {
2074  // $raw_value = $vcf_line[$j]; // format looks like this: 0/0:27,0:27:81:0,81,1065
2075  $raw_value_colon_parts = explode(':',$raw_value);
2076  $ref_alt_indices = explode('/', $raw_value_colon_parts[0]);
2077  $genotype_combination = "";
2078  $count_indices = count($ref_alt_indices);
2079  for($k = 0; $k < $count_indices; $k++) {
2080  $index_tmp = $ref_alt_indices[$k];
2081  if($k > 0) {
2082  $genotype_combination .= ':';
2083  }
2084  if($index_tmp == 0) {
2085  $genotype_combination .= $ref;
2086  }
2087  else {
2088  $genotype_combination .= $alt;
2089  }
2090  }
2091  return $genotype_combination;
2092 }
2093 
2094 
2105 function tpps_submit_environment(array &$form_state, $i, TripalJob &$job = NULL) {
2106  tpps_job_logger_write('[INFO] - Submitting environment data...');
2107  $job->logMessage('[INFO] - Submitting environment data...');
2108  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
2109  $environment = $fourthpage["organism-$i"]['environment'] ?? NULL;
2110  if (empty($environment)) {
2111  return;
2112  }
2113  tpps_submission_add_tag($form_state['accession'], 'Environment');
2114 
2115  $env_layers = isset($environment['env_layers']) ? $environment['env_layers'] : FALSE;
2116  $env_params = isset($environment['env_params']) ? $environment['env_params'] : FALSE;
2117  $env_count = 0;
2118 
2119  $species_index = "species-$i";
2120  if (empty($form_state['saved_values'][TPPS_PAGE_3]['tree-accession']['check'])) {
2121  $species_index = "species-1";
2122  }
2123  $tree_accession = $form_state['saved_values'][TPPS_PAGE_3]['tree-accession'][$species_index];
2124  $tree_acc_fid = $tree_accession['file'];
2125  if (!empty($form_state['revised_files'][$tree_acc_fid]) and (file_load($form_state['revised_files'][$tree_acc_fid]))) {
2126  $tree_acc_fid = $form_state['revised_files'][$tree_acc_fid];
2127  }
2128 
2129  $env_cvterm = tpps_load_cvterm('environment')->cvterm_id;
2130 
2131  if (db_table_exists('cartogratree_layers') and db_table_exists('cartogratree_fields')) {
2132  $layers_params = array();
2133  $records = array(
2134  'phenotype' => array(),
2135  'phenotype_cvterm' => array(),
2136  'stock_phenotype' => array(),
2137  );
2138 
2139  foreach ($env_layers as $layer_name => $layer_id) {
2140  if ($layer_name == 'other' or $layer_name == 'other_db' or $layer_name == 'other_name' or $layer_name == 'other_params') {
2141  continue;
2142  }
2143  if (!empty($layer_id) and !empty($env_params[$layer_name])) {
2144  $layers_params[$layer_id] = array();
2145  $params = $env_params[$layer_name];
2146  foreach ($params as $param_name => $param_id) {
2147  if (!empty($param_id)) {
2148  $layers_params[$layer_id][$param_id] = $param_name;
2149  }
2150  }
2151  }
2152  elseif (!empty($layer_id) and preg_match('/worldclim_subgroup_(.+)/', $layer_id, $matches)) {
2153  $subgroup_id = $matches[1];
2154  $layers = db_select('cartogratree_layers', 'l')
2155  ->fields('l', array('layer_id'))
2156  ->condition('subgroup_id', $subgroup_id)
2157  ->execute();
2158  while (($layer = $layers->fetchObject())) {
2159  $params = db_select('cartogratree_fields', 'f')
2160  ->fields('f', array('field_id', 'display_name'))
2161  ->condition('layer_id', $layer->layer_id)
2162  ->execute();
2163  while (($param = $params->fetchObject())) {
2164  $layers_params[$layer->layer_id][$param->field_id] = $param->display_name;
2165  }
2166  }
2167  }
2168  }
2169 
2170  $options = array(
2171  'no_header' => !empty($tree_accession['file-no-header']),
2172  'records' => $records,
2173  'tree_id' => $tree_accession['file-groups']['Tree Id'][1],
2174  'accession' => $form_state['accession'],
2175  'tree_info' => $form_state['tree_info'],
2176  'layers_params' => $layers_params,
2177  'env_count' => &$env_count,
2178  'env_cvterm' => $env_cvterm,
2179  'suffix' => 0,
2180  'job' => &$job,
2181  );
2182  tpps_job_logger_write('[INFO] - Processing environment_layers file data...');
2183  $job->logMessage('[INFO] - Processing environmental_layers file data...');
2184  tpps_file_iterator($tree_acc_fid, 'tpps_process_environment_layers', $options);
2185  tpps_job_logger_write('[INFO] - Done.');
2186  $job->logMessage('[INFO] - Done.');
2187 
2188  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
2189  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
2190  tpps_chado_insert_multi($options['records']);
2191  tpps_job_logger_write('[INFO] - Done.');
2192  $job->logMessage('[INFO] - Done.');
2193  unset($options['records']);
2194  $env_count = 0;
2195  }
2196 }
2197 
2206 function tpps_check_organisms($row, array &$options = array()) {
2207  $cols = $options['cols'];
2208  $search = &$options['search'];
2209  $org_full_name = $row[$cols['org']] ?? "{$row[$cols['genus']]} {$row[$cols['species']]}";
2210  if ($search == $org_full_name) {
2211  $options['found'] = TRUE;
2212  }
2213 }
2214 
2223 function tpps_process_phenotype_meta($row, array &$options = array()) {
2224  global $tpps_job;
2225  $job = $tpps_job;
2226  $columns = $options['meta_columns'];
2227  $meta = &$options['meta'];
2228 
2229  $name = strtolower($row[$columns['name']]);
2230  $meta[$name] = array();
2231  $meta[$name]['attr'] = 'other';
2232  $meta[$name]['attr-other'] = $row[$columns['attr']];
2233  $meta[$name]['desc'] = $row[$columns['desc']];
2234  $meta[$name]['unit'] = 'other';
2235  $meta[$name]['unit-other'] = $row[$columns['unit']];
2236  if (!empty($columns['struct']) and isset($row[$columns['struct']]) and $row[$columns['struct']] != '') {
2237  $meta[$name]['struct'] = 'other';
2238  $meta[$name]['struct-other'] = $row[$columns['struct']];
2239  }
2240  if (!empty($columns['min']) and isset($row[$columns['min']]) and $row[$columns['min']] != '') {
2241  $meta[$name]['min'] = $row[$columns['min']];
2242  }
2243  if (!empty($columns['max']) and isset($row[$columns['max']]) and $row[$columns['max']] != '') {
2244  $meta[$name]['max'] = $row[$columns['max']];
2245  }
2246 }
2247 
2260 function tpps_refine_phenotype_meta(array &$meta, array $time_options = array(), TripalJob &$job = NULL) {
2261  $cvt_cache = array();
2262  $local_cv = chado_get_cv(array('name' => 'local'));
2263  $local_db = variable_get('tpps_local_db');
2264  $term_types = array(
2265  'attr' => array(
2266  'label' => 'Attribute',
2267  'ontology' => 'pato',
2268  ),
2269  'unit' => array(
2270  'label' => 'Unit',
2271  'ontology' => 'po',
2272  ),
2273  'struct' => array(
2274  'label' => 'Structure',
2275  'ontology' => 'po',
2276  ),
2277  );
2278  print_r($meta);
2279  foreach ($meta as $name => $data) {
2280  foreach ($term_types as $type => $info) {
2281  $meta[$name]["{$type}_id"] = $data["{$type}"];
2282  if ($data["{$type}"] == 'other') {
2283  $meta[$name]["{$type}_id"] = $cvt_cache[$data["{$type}-other"]] ?? NULL;
2284  if (empty($meta[$name]["{$type}_id"])) {
2285  $result = tpps_ols_install_term("{$info['ontology']}:{$data["{$type}-other"]}");
2286  if ($result !== FALSE) {
2287  $meta[$name]["{$type}_id"] = $result->cvterm_id;
2288  $job->logMessage("[INFO] New OLS Term {$info['ontology']}:{$data["{$type}-other"]} installed");
2289  }
2290 
2291  if (empty($meta[$name]["{$type}_id"])) {
2292  $term = chado_select_record('cvterm', array('cvterm_id'), array(
2293  'name' => array(
2294  'data' => $data["{$type}-other"],
2295  'op' => 'LIKE',
2296  ),
2297  ), array(
2298  'limit' => 1,
2299  ));
2300  $meta[$name]["{$type}_id"] = current($term)->cvterm_id ?? NULL;
2301  }
2302 
2303  if (empty($meta[$name]["{$type}_id"])) {
2304  $meta[$name]["{$type}_id"] = chado_insert_cvterm(array(
2305  'id' => "{$local_db->name}:{$data["{$type}-other"]}",
2306  // 'name' => $data["{$type}-other"],
2307  'name' => $data["{$type}"] . '-other',
2308  'definition' => '',
2309  'cv_name' => $local_cv->name,
2310  ))->cvterm_id;
2311  if (!empty($meta[$name]["{$type}_id"])) {
2312  $job->logMessage("[INFO] New Local {$info['label']} Term {$data["{$type}-other"]} installed");
2313  }
2314  }
2315  $cvt_cache[$data["{$type}-other"]] = $meta[$name]["{$type}_id"];
2316  }
2317  }
2318  }
2319 
2320  if (!empty($time_options['time_phenotypes'][strtolower($name)])) {
2321  $meta[$name]['time'] = $time_options['time_values'][strtolower($name)];
2322  if (empty($meta[$name]['time'])) {
2323  $meta[$name]['time'] = TRUE;
2324  }
2325  }
2326  }
2327 }
2328 
2342 function tpps_process_phenotype_data($row, array &$options = array()) {
2343  global $tpps_job;
2344  $job = $tpps_job;
2345  $iso = $options['iso'] ?? FALSE;
2346  $records = &$options['records'];
2347  $meta_headers = $options['meta_headers'] ?? NULL;
2348  $file_headers = $options['file_headers'] ?? NULL;
2349  $cvterms = $options['cvterms'];
2350  $meta = $options['meta'];
2351  $empty = $options['file-empty'] ?? NULL;
2352  $accession = $options['accession'];
2353  $suffix = &$options['suffix'];
2354  $tree_info = &$options['tree_info'];
2355  $phenotype_count = &$options['phenotype_count'];
2356  $organism_name = &$options['organism_name'];
2357  $record_group = variable_get('tpps_record_group', 10000);
2358  // $record_group = 1;
2359 
2360  // Get genus and species from the organism name
2361  $organism_name_parts = explode(' ', $organism_name, 2);
2362  $genus = $organism_name_parts[0];
2363  $species = $organism_name_parts[1];
2364 
2365  // Ensure that we got the genus and species or error out
2366  if ($genus == "" || $species == "") {
2367  throw new Exception('Organism genus and species could not be processed. Please ensure you added an organism that exists within the chado.organism table!');
2368  }
2369 
2370  // Query the organism table to get the organism id
2371  $organism_id_results = chado_query('SELECT * FROM chado.organism WHERE genus = :genus and species = :species ORDER BY organism_id ASC LIMIT 1', array(
2372  ':genus' => $genus,
2373  ':species' => $species
2374  ));
2375 
2376  // Dummy value for organism_id until we get it from the sql results row
2377  $organism_id = -1;
2378  foreach($organism_id_results as $organism_id_row) {
2379  $organism_id = $organism_id_row->organism_id;
2380  }
2381 
2382  // Check that the organism id is valid
2383  if($organism_id == -1 || $organism_id == "") {
2384  throw new Exception('Could not find organism id for ' . $organism_name. '. This organism does not seem to exist in the chado.organism table!');
2385  }
2386 
2387  $cvterm_id_4lettercode = -1;
2388  // Get the cvterm_id (which is the type_id) for the organism 4 letter code
2389  $cvterm_results = chado_query('SELECT * FROM chado.cvterm WHERE name = :name LIMIT 1', array(
2390  ':name' => 'organism 4 letter code'
2391  ));
2392  foreach($cvterm_results as $cvterm_row) {
2393  $cvterm_id_4lettercode = $cvterm_row->cvterm_id;
2394  }
2395  if($cvterm_id_4lettercode == -1 || $cvterm_id_4lettercode == "") {
2396  throw new Exception('Could not find the cvterm id for organism 4 letter code within the chado.cvterm table. This is needed to generate the phenotype name.');
2397  }
2398 
2399  // We need to use the cvterm_id 4 letter code to find the actual code within the organismprop table (using the organism_id)
2400  $value_4lettercode = "";
2401  $organismprop_results = chado_query('SELECT * FROM chado.organismprop WHERE type_id = :type_id AND organism_id = :organism_id LIMIT 1', array(
2402  ':type_id' => $cvterm_id_4lettercode,
2403  ':organism_id' => $organism_id
2404  ));
2405  foreach ($organismprop_results as $organismprop_row) {
2406  $value_4lettercode = $organismprop_row->value;
2407  }
2408 
2409  if($value_4lettercode == "" || $value_4lettercode == null) {
2410  throw new Exception('4 letter code could not be found for ' . $organism_name . ' in the chado.organismprop table. This is needed to create the phenotype_name.');
2411  }
2412 
2413  if (!$iso) {
2414  if (isset($meta_headers['name']) and (isset($meta_headers['value']))) {
2415  $id = $row[$meta_headers['value']];
2416  $values = array($id => $row[$meta_headers['name']]);
2417  }
2418 
2419  if (!empty($options['data_columns'])) {
2420  $values = $options['data_columns'];
2421  }
2422 
2423  $tree_id = $row[$options['tree_id']];
2424  $clone_col = $meta_headers['clone'] ?? NULL;
2425  if (isset($clone_col) and !empty($row[$clone_col]) and $row[$clone_col] !== $empty) {
2426  $tree_id .= "-" . $row[$clone_col];
2427  }
2428  }
2429  if ($iso) {
2430  foreach ($row as $id => $value) {
2431  if (empty($tree_id)) {
2432  $tree_id = $value;
2433  continue;
2434  }
2435  $values[$id] = $file_headers[$id];
2436  }
2437  }
2438 
2439  if($tree_id == null || $tree_id == "") {
2440  throw new Exception('tree_id was null or empty - there might be a problem with the format of the phenotype data file or selected column options for the file via the user information, cannot continue until resolved.');
2441  }
2442 
2443 
2444  // print_r($values);
2445  // throw new Exception('DEBUG');
2446  $phenotype_name_previous = "<none set>";
2447  foreach ($values as $id => $name) {
2448  if($name == null || $name == "") {
2449  throw new Exception('Phenotype name was null or empty - there might be a problem with the format of the phenotype data file or selected column options for the file via the user information, cannot continue until resolved.');
2450  }
2451  $attr_id = $iso ? $meta['attr_id'] : $meta[strtolower($name)]['attr_id'];
2452  // throw new Exception('debug');
2453  if($attr_id == null || $attr_id == "") {
2454  print_r('$meta[attr_id]:' . $meta['attr_id'] . "\n");
2455  print_r('$name:' . $name . "\n");
2456  print_r('$meta[$name]:' . $meta[strtolower($name)]['attr_id'] . "\n");
2457  print_r('$attr_id:' . $attr_id . "\n");
2458  throw new Exception('Attribute id is null which causes phenotype data to not be added to database correctly.');
2459  }
2460  $value = $row[$id];
2461  $phenotype_name = "$accession-$tree_id-$name-$suffix";
2462  $phenotype_name .= '-' . $value_4lettercode;
2463  $options['data']["$tree_id-$name-$suffix"] = array(
2464  'uniquename' => "$tree_id-$name-$suffix",
2465  'name' => $name,
2466  'stock_id' => $tree_info[$tree_id]['stock_id'],
2467  'time' => NULL,
2468  'value' => $value,
2469  );
2470 
2471 
2472  $records['phenotype'][$phenotype_name] = array(
2473  'uniquename' => $phenotype_name,
2474  'name' => $name,
2475  'attr_id' => $attr_id,
2476  'observable_id' => $meta[strtolower($name)]['struct_id'] ?? NULL,
2477  'value' => $value,
2478  );
2479  // print_r($records['phenotype'][$phenotype_name]);
2480 
2481  $records['stock_phenotype'][$phenotype_name] = array(
2482  'stock_id' => $tree_info[$tree_id]['stock_id'],
2483  '#fk' => array(
2484  'phenotype' => $phenotype_name,
2485  ),
2486  );
2487  // print_r($records['stock_phenotype'][$phenotype_name]);
2488 
2489  if (isset($meta[strtolower($name)]['time'])) {
2490  $records['phenotypeprop']["$phenotype_name-time"] = array(
2491  'type_id' => $cvterms['time'],
2492  'value' => $meta[strtolower($name)]['time'],
2493  '#fk' => array(
2494  'phenotype' => $phenotype_name,
2495  ),
2496  );
2497  // print_r($records['phenotypeprop']["$phenotype_name-time"]);
2498  $options['data'][$phenotype_name]['time'] = $meta[strtolower($name)]['time'];
2499  }
2500  elseif (isset($meta_headers['time'])) {
2501  $val = $row[$meta_headers['time']];
2502  if (is_int($val)) {
2503  $val = tpps_xlsx_translate_date($val);
2504  }
2505  $records['phenotypeprop']["$phenotype_name-time"] = array(
2506  'type_id' => $cvterms['time'],
2507  'value' => $val,
2508  '#fk' => array(
2509  'phenotype' => $phenotype_name,
2510  ),
2511  );
2512  // print_r($records['phenotypeprop']["$phenotype_name-time"]);
2513  $options['data'][$phenotype_name]['time'] = $val;
2514  }
2515 
2516  // print_r($meta);
2517  $records['phenotypeprop']["$phenotype_name-desc"] = array(
2518  'type_id' => $cvterms['desc'],
2519  'value' => $iso ? $meta['desc'] : $meta[strtolower($name)]['desc'],
2520  '#fk' => array(
2521  'phenotype' => $phenotype_name,
2522  ),
2523  );
2524  // print_r($phenotype_name-desc . "\n");
2525  // print_r($records['phenotypeprop']["$phenotype_name-desc"]);
2526 
2527  if ($iso) {
2528  $records['phenotypeprop']["$phenotype_name-unit"] = array(
2529  'type_id' => $cvterms['unit'],
2530  'value' => $meta['unit'],
2531  '#fk' => array(
2532  'phenotype' => $phenotype_name,
2533  ),
2534  );
2535  // print_r($records['phenotypeprop']["$phenotype_name-unit"]);
2536  }
2537 
2538  if (!$iso) {
2539  $records['phenotype_cvterm']["$phenotype_name-unit"] = array(
2540  'cvterm_id' => $meta[strtolower($name)]['unit_id'],
2541  '#fk' => array(
2542  'phenotype' => $phenotype_name,
2543  ),
2544  );
2545  // print_r($records['phenotype_cvterm']["$phenotype_name-unit"]);
2546  }
2547 
2548  if (isset($meta[strtolower($name)]['min'])) {
2549  $records['phenotypeprop']["$phenotype_name-min"] = array(
2550  'type_id' => $cvterms['min'],
2551  'value' => $meta[strtolower($name)]['min'],
2552  '#fk' => array(
2553  'phenotype' => $phenotype_name,
2554  ),
2555  );
2556  // print_r($records['phenotypeprop']["$phenotype_name-min"]);
2557  }
2558 
2559  if (isset($meta[strtolower($name)]['max'])) {
2560  $records['phenotypeprop']["$phenotype_name-max"] = array(
2561  'type_id' => $cvterms['max'],
2562  'value' => $meta[strtolower($name)]['max'],
2563  '#fk' => array(
2564  'phenotype' => $phenotype_name,
2565  ),
2566  );
2567  // print_r($records['phenotypeprop']["$phenotype_name-max"]);
2568  }
2569 
2570  if (!empty($meta[strtolower($name)]['env'])) {
2571  $records['phenotype_cvterm']["$phenotype_name-env"] = array(
2572  'cvterm_id' => $cvterms['environment'],
2573  '#fk' => array(
2574  'phenotype' => $phenotype_name,
2575  ),
2576  );
2577  // print_r($records['phenotype_cvterm']["$phenotype_name-env"]);
2578  }
2579 
2580 
2581 
2582  if ($phenotype_count > $record_group) {
2583  // print_r($records);
2584  // print_r('------------' . "\n");
2585  tpps_job_logger_write('[INFO] -- Inserting data into database using insert_multi...');
2586  $job->logMessage('[INFO] -- Inserting data into database using insert_multi...');
2587  // print_r($records);
2588  tpps_chado_insert_multi($records);
2589  tpps_job_logger_write('[INFO] - Done.');
2590  $job->logMessage('[INFO] - Done.');
2591 
2592  // $temp_results = chado_query('SELECT * FROM chado.phenotype WHERE uniquename ILIKE :phenotype_name', array(
2593  // ':phenotype_name' => $phenotype_name
2594  // ));
2595  // foreach($temp_results as $temp_row) {
2596  // echo "Found phenotype saved: " . $temp_row->uniquename . "\n";
2597  // }
2598 
2599  $records = array(
2600  'phenotype' => array(),
2601  'phenotypeprop' => array(),
2602  'stock_phenotype' => array(),
2603  );
2604  $phenotype_count = 0;
2605  }
2606 
2607  $phenotype_count++;
2608  }
2609  $suffix++;
2610 }
2611 
2625 function tpps_process_genotype_spreadsheet($row, array &$options = array()) {
2626  global $tpps_job;
2627  $job = $tpps_job;
2628  $type = $options['type'];
2629  $records = &$options['records'];
2630  $headers = $options['headers'];
2631  $tree_info = &$options['tree_info'];
2632  $species_codes = $options['species_codes'];
2633  $genotype_count = &$options['genotype_count'];
2634  $project_id = $options['project_id'];
2635  $marker = $options['marker'];
2636  $type_cvterm = $options['type_cvterm'];
2637  $seq_var_cvterm = $options['seq_var_cvterm'];
2638  $multi_insert_options = $options['multi_insert'];
2639  $associations = $options['associations'] ?? array();
2640 
2641  $record_group = variable_get('tpps_record_group', 10000);
2642  $stock_id = NULL;
2643 
2644  if (!empty($options['tree_id'])) {
2645  $val = $row[$options['tree_id']];
2646  $stock_id = $tree_info[trim($val)]['stock_id'];
2647  $current_id = $tree_info[trim($val)]['organism_id'];
2648  $species_code = $species_codes[$current_id];
2649  }
2650  foreach ($row as $key => $val) {
2651  if (empty($headers[$key])) {
2652  continue;
2653  }
2654 
2655  if (!isset($stock_id)) {
2656  $stock_id = $tree_info[trim($val)]['stock_id'];
2657  $current_id = $tree_info[trim($val)]['organism_id'];
2658  $species_code = $species_codes[$current_id];
2659  continue;
2660  }
2661  $genotype_count++;
2662 
2663  if ($type == 'ssrs' and !empty($options['empty']) and $val == $options['empty']) {
2664  continue;
2665  }
2666 
2667  if ($type == 'ssrs' and ($val === 0 or $val === "0")) {
2668  $val = "NA";
2669  }
2670 
2671  $variant_name = $headers[$key];
2672  $marker_name = $variant_name . $marker;
2673  $genotype_name = "$marker-$variant_name-$species_code-$val";
2674 
2675  $records['feature'][$marker_name] = array(
2676  'organism_id' => $current_id,
2677  'uniquename' => $marker_name,
2678  'type_id' => $seq_var_cvterm,
2679  );
2680 
2681  $records['feature'][$variant_name] = array(
2682  'organism_id' => $current_id,
2683  'uniquename' => $variant_name,
2684  'type_id' => $seq_var_cvterm,
2685  );
2686 
2687  if (!empty($associations) and !empty($associations[$variant_name])) {
2688  $association = $associations[$variant_name];
2689  $assoc_feature_name = "{$variant_name}-{$options['associations_type']}-{$association['trait']}";
2690 
2691  $records['feature'][$association['scaffold']] = array(
2692  'organism_id' => $current_id,
2693  'uniquename' => $association['scaffold'],
2694  'type_id' => $options['scaffold_cvterm'],
2695  );
2696 
2697  $records['feature'][$assoc_feature_name] = array(
2698  'organism_id' => $current_id,
2699  'uniquename' => $assoc_feature_name,
2700  'type_id' => $seq_var_cvterm,
2701  );
2702 
2703  if (!empty($association['trait_attr'])) {
2704  $records['feature_cvterm'][$assoc_feature_name] = array(
2705  'cvterm_id' => $association['trait_attr'],
2706  'pub_id' => $options['pub_id'],
2707  '#fk' => array(
2708  'feature' => $assoc_feature_name,
2709  ),
2710  );
2711 
2712  if (!empty($association['trait_obs'])) {
2713  $records['feature_cvtermprop'][$assoc_feature_name] = array(
2714  'type_id' => $association['trait_obs'],
2715  '#fk' => array(
2716  'feature_cvterm' => $assoc_feature_name,
2717  ),
2718  );
2719  }
2720  }
2721 
2722  $records['featureprop'][$assoc_feature_name] = array(
2723  'type_id' => $options['associations_type'],
2724  '#fk' => array(
2725  'feature' => $assoc_feature_name,
2726  ),
2727  );
2728 
2729  $records['featureloc'][$variant_name] = array(
2730  'fmin' => $association['start'],
2731  'fmax' => $association['stop'],
2732  'residue_info' => $association['allele'],
2733  '#fk' => array(
2734  'feature' => $variant_name,
2735  'srcfeature' => $association['scaffold'],
2736  ),
2737  );
2738 
2739  $records['feature_relationship'][$assoc_feature_name] = array(
2740  'type_id' => $options['associations_type'],
2741  'value' => $association['confidence'],
2742  '#fk' => array(
2743  'subject' => $variant_name,
2744  'object' => $assoc_feature_name,
2745  ),
2746  );
2747  }
2748 
2749  $records['genotype'][$genotype_name] = array(
2750  'name' => $genotype_name,
2751  'uniquename' => $genotype_name,
2752  'description' => $val,
2753  'type_id' => $type_cvterm,
2754  );
2755 
2756  $records['genotype_call']["$stock_id-$genotype_name"] = array(
2757  'project_id' => $project_id,
2758  'stock_id' => $stock_id,
2759  '#fk' => array(
2760  'genotype' => $genotype_name,
2761  'variant' => $variant_name,
2762  'marker' => $marker_name,
2763  ),
2764  );
2765 
2766  $records['stock_genotype']["$stock_id-$genotype_name"] = array(
2767  'stock_id' => $stock_id,
2768  '#fk' => array(
2769  'genotype' => $genotype_name,
2770  ),
2771  );
2772 
2773  if ($genotype_count >= $record_group) {
2774  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
2775  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
2776  tpps_chado_insert_multi($records, $multi_insert_options);
2777  tpps_job_logger_write('[INFO] - Done.');
2778  $job->logMessage('[INFO] - Done.');
2779  $records = array(
2780  'feature' => array(),
2781  'genotype' => array(),
2782  'genotype_call' => array(),
2783  'stock_genotype' => array(),
2784  );
2785  if (!empty($associations)) {
2786  $records['featureloc'] = array();
2787  $records['featureprop'] = array();
2788  }
2789  $options['genotype_total'] += $genotype_count;
2790  tpps_job_logger_write('[INFO] - Genotypes inserted:' + $options['genotype_total']);
2791  $job->logMessage('[INFO] - Genotypes inserted:' + $options['genotype_total']);
2792  $genotype_count = 0;
2793  }
2794  }
2795 }
2796 
2808 function tpps_process_snp_association($row, array &$options = array()) {
2809  global $tpps_job;
2810  $job = $tpps_job;
2811  $groups = $options['associations_groups'];
2812  $associations = &$options['associations'];
2813 
2814  $id = $row[$groups['SNP ID'][1]];
2815 
2816  preg_match('/^(\d+):(\d+)$/', $row[$groups['Position'][3]], $matches);
2817  $start = $matches[1];
2818  $stop = $matches[2];
2819  if ($start > $stop) {
2820  $temp = $start;
2821  $start = $stop;
2822  $stop = $temp;
2823  }
2824 
2825  $trait = $row[$groups['Associated Trait'][5]];
2826 
2827  $associations[$id] = array(
2828  'id' => $id,
2829  'scaffold' => $row[$groups['Scaffold'][2]],
2830  'start' => $start,
2831  'stop' => $stop,
2832  'allele' => $row[$groups['Allele'][4]],
2833  'trait' => $trait,
2834  'trait_attr' => $options['phenotype_meta'][strtolower($trait)]['attr_id'],
2835  'trait_obs' => $options['phenotype_meta'][strtolower($trait)]['struct_id'] ?? NULL,
2836  'confidence' => $row[$groups['Confidence Value'][6]],
2837  );
2838 }
2839 
2856 function tpps_ssrs_headers($fid, $ploidy) {
2857  $headers = tpps_file_headers($fid);
2858  if ($ploidy == 'Haploid') {
2859  return $headers;
2860  }
2861  $row_len = count($headers);
2862  $results = $headers;
2863 
2864  while (($k = array_search(NULL, $results))) {
2865  unset($results[$k]);
2866  }
2867 
2868  $marker_num = 0;
2869  $first = TRUE;
2870  reset($headers);
2871  $num_headers = count($results);
2872  $num_unique_headers = count(array_unique($results));
2873 
2874  foreach (array_keys($headers) as $key) {
2875  next($headers);
2876  $next_key = key($headers);
2877  if ($first) {
2878  $first = FALSE;
2879  continue;
2880  }
2881 
2882  switch ($ploidy) {
2883  case 'Diploid':
2884  if ($num_headers == ($row_len + 1) / 2) {
2885  // Every other marker column name is left blank.
2886  if (array_key_exists($key, $results)) {
2887  $last = $results[$key];
2888  $results[$key] .= "_A";
2889  break;
2890  }
2891  $results[$key] = $last . "_B";
2892  break;
2893  }
2894 
2895  if ($num_headers == $row_len) {
2896  // All of the marker column names are filled out.
2897  if ($num_headers != $num_unique_headers) {
2898  // The marker column names are duplicates, need to append
2899  // _A and _B.
2900  if ($results[$key] == $results[$next_key]) {
2901  $results[$key] .= "_A";
2902  break;
2903  }
2904  $results[$key] .= "_B";
2905  }
2906  }
2907  break;
2908 
2909  case 'Polyploid':
2910  if ($num_headers == $row_len) {
2911  // All of the marker column names are filled out.
2912  if ($num_unique_headers != $num_headers) {
2913  // The marker column names are duplicates, need to append
2914  // _1, _2, up to X ploidy.
2915  // The total number of headers divided by the number of
2916  // unique headers should be equal to the ploidy.
2917  $ploidy_suffix = ($marker_num % ($num_headers - 1 / $num_unique_headers - 1)) + 1;
2918  $results[$key] .= "_$ploidy_suffix";
2919  }
2920  $marker_num++;
2921  break;
2922  }
2923  $ploidy_suffix = ($marker_num % ($row_len - 1 / $num_headers - 1)) + 1;
2924  if (array_key_exists($key, $results)) {
2925  $last = $results[$key];
2926  $results[$key] .= "_$ploidy_suffix";
2927  $marker_num++;
2928  break;
2929  }
2930  $results[$key] = "{$last}_$ploidy_suffix";
2931  $marker_num++;
2932  break;
2933 
2934  default:
2935  break;
2936  }
2937  }
2938 
2939  return $results;
2940 }
2941 
2957 function tpps_other_marker_headers($fid, array $cols) {
2958  $headers = tpps_file_headers($fid);
2959  $results = array();
2960  foreach ($cols as $col) {
2961  $results[$col] = $headers[$col];
2962  }
2963  return $results;
2964 }
2965 
2978 function tpps_process_environment_layers($row, array &$options = array()) {
2979  global $tpps_job;
2980  $job = $tpps_job;
2981  $id_col = $options['tree_id'];
2982  $records = &$options['records'];
2983  $tree_info = &$options['tree_info'];
2984  $layers_params = $options['layers_params'];
2985  $env_count = &$options['env_count'];
2986  $accession = $options['accession'];
2987  $suffix = &$options['suffix'];
2988  $env_cvterm = $options['env_cvterm'];
2989  $record_group = variable_get('tpps_record_group', 10000);
2990 
2991  $tree_id = $row[$id_col];
2992  $stock_id = $tree_info[$tree_id]['stock_id'];
2993 
2994  $gps_query = chado_select_record('stockprop', array('value'), array(
2995  'stock_id' => $stock_id,
2996  'type_id' => tpps_load_cvterm('gps_latitude')->cvterm_id,
2997  ), array(
2998  'limit' => 1,
2999  ));
3000  $lat = current($gps_query)->value;
3001 
3002  $gps_query = chado_select_record('stockprop', array('value'), array(
3003  'stock_id' => $stock_id,
3004  'type_id' => tpps_load_cvterm('gps_longitude')->cvterm_id,
3005  ), array(
3006  'limit' => 1,
3007  ));
3008  $long = current($gps_query)->value;
3009 
3010  foreach ($layers_params as $layer_id => $params) {
3011  $layer_query = db_select('cartogratree_layers', 'l')
3012  ->fields('l', array('title'))
3013  ->condition('layer_id', $layer_id)
3014  ->execute();
3015 
3016  $layer_name = $layer_query->fetchObject()->title;
3017 
3018  foreach (array_keys($params) as $param_id) {
3019  $param_query = db_select('cartogratree_fields', 'f')
3020  ->fields('f', array('field_name'))
3021  ->condition('field_id', $param_id)
3022  ->execute();
3023 
3024  $param_name = $param_query->fetchObject()->field_name;
3025  $phenotype_name = "$accession-$tree_id-$layer_name-$param_name-$suffix";
3026 
3027  $value = tpps_get_environmental_layer_data($layer_id, $lat, $long, $param_name);
3028  $type = variable_get("tpps_param_{$param_id}_type", 'attr_id');
3029 
3030  $records['phenotype'][$phenotype_name] = array(
3031  'uniquename' => $phenotype_name,
3032  'name' => "$param_name",
3033  'value' => "$value",
3034  );
3035 
3036  $records['stock_phenotype'][$phenotype_name] = array(
3037  'stock_id' => $stock_id,
3038  '#fk' => array(
3039  'phenotype' => $phenotype_name,
3040  ),
3041  );
3042 
3043  if ($type == 'attr_id') {
3044  $records['phenotype'][$phenotype_name]['attr_id'] = $env_cvterm;
3045  }
3046  if ($type != 'attr_id') {
3047  $records['phenotype_cvterm'][$phenotype_name] = array(
3048  'cvterm_id' => $env_cvterm,
3049  '#fk' => array(
3050  'phenotype' => $phenotype_name,
3051  ),
3052  );
3053  }
3054 
3055  $env_count++;
3056  if ($env_count >= $record_group) {
3057  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
3058  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
3059  tpps_chado_insert_multi($records);
3060  tpps_job_logger_write('[INFO] - Done.');
3061  $job->logMessage('[INFO] - Done.');
3062  $records = array(
3063  'phenotype' => array(),
3064  'phenotype_cvterm' => array(),
3065  'stock_phenotype' => array(),
3066  );
3067  $env_count = 0;
3068  }
3069  }
3070  }
3071  $suffix++;
3072 }
3073 
3093 function tpps_get_environmental_layer_data($layer_id, $lat, $long, $param) {
3094 
3095  $response = tpps_get_env_response($layer_id, $lat, $long);
3096  $response = explode("\n", $response);
3097  if ($response) {
3098  $response = array_slice($response, 2, -2);
3099  foreach ($response as $line) {
3100  $item = explode("=", $line);
3101  if ($item and trim($item[0]) == $param) {
3102  return trim($item[1]);
3103  }
3104  }
3105  }
3106  return NULL;
3107 }
3108 
3122 function tpps_get_env_response($layer_id, $lat, $long) {
3123  if (db_table_exists('cartogratree_layers')) {
3124  $query = db_select('cartogratree_layers', 'l')
3125  ->fields('l', array('name'))
3126  ->condition('layer_id', $layer_id)
3127  ->execute();
3128 
3129  $result = $query->fetchObject();
3130  $layers = $result->name;
3131 
3132  $url = "http://treegenesdev.cam.uchc.edu:8080/geoserver/ct/wms?";
3133  $serv = "WMS";
3134  $ver = "1.3.0";
3135  $req = "GetFeatureInfo";
3136  $srs = "EPSG:4326";
3137  $format = "application/json";
3138  $bigger_lat = $lat + 0.0000001;
3139  $bigger_long = $long + 0.0000001;
3140  $bbox = "$lat,$long,$bigger_lat,$bigger_long";
3141  $pixels = "width=1&height=1&X=0&Y=0";
3142 
3143  $url .= "service=$serv&version=$ver&request=$req&layers=$layers&srs=$srs&format=$format&query_layers=$layers&bbox=$bbox&$pixels";
3144 
3145  return file_get_contents($url);
3146  }
3147 }
3148 
3155 function tpps_submit_summary(array &$form_state) {
3156  $analysis_options = array(
3157  'diversity' => 'Diversity',
3158  'population_structure' => 'Population Structure',
3159  'association_genetics' => 'Association Genetics',
3160  'landscape_genomics' => 'Landscape Genomics',
3161  'phenotype_environment' => 'Phenotype-Environment',
3162  );
3163 
3164  foreach ($analysis_options as $option => $label) {
3165  if (!empty($form_state['saved_values']['summarypage']['analysis']["{$option}_check"])) {
3166  tpps_chado_insert_record('projectprop', array(
3167  'project_id' => $form_state['ids']['project_id'],
3168  'type_id' => tpps_load_cvterm('analysis_type')->cvterm_id,
3169  'value' => $label,
3170  ));
3171 
3172  $fid = $form_state['saved_values']['summarypage']['analysis']["{$option}_file"];
3173  if (!empty($fid)) {
3174  tpps_add_project_file($form_state, $fid);
3175 
3176  tpps_chado_insert_record('projectprop', array(
3177  'project_id' => $form_state['ids']['project_id'],
3178  'type_id' => tpps_load_cvterm('source_description')->cvterm_id,
3179  'value' => $form_state['saved_values']['summarypage']['analysis']["{$option}_file_description"],
3180  ));
3181  }
3182  }
3183  }
3184 
3185  if (!empty($form_state['saved_values']['summarypage']['tree_pictures'])) {
3186  foreach ($form_state['saved_values']['summarypage']['tree_pictures'] as $name => $fid) {
3187  if (substr($name, -4) == '_url' or substr($name, -12) == '_attribution' or substr($name, -8) == '_license') {
3188  continue;
3189  }
3190  if (!empty($fid)) {
3191  $form_state['file_info']['summarypage'][$fid] = implode('_', explode(' ', $name)) . '.jpg';
3192  if (db_table_exists('treepictures_metadata')) {
3193  db_insert('treepictures_metadata')
3194  ->fields(array('species', 'source', 'attribution', 'license'))
3195  ->values(array(
3196  'species' => $form_state['file_info']['summarypage'][$fid],
3197  'source' => $form_state['saved_values']['summarypage']['tree_pictures']["{$name}_url"],
3198  'attribution' => $form_state['saved_values']['summarypage']['tree_pictures']["{$name}_attribution"],
3199  'license' => $form_state['saved_values']['summarypage']['tree_pictures']["{$name}_license"],
3200  ))
3201  ->execute();
3202  }
3203  }
3204  }
3205  }
3206 }
3207 
3218 function tpps_process_accession($row, array &$options, $job = NULL) {
3219  global $tpps_job;
3220  $job = $tpps_job;
3221  $cvterm = $options['cvterms'];
3222  $records = &$options['records'];
3223  $accession = $options['accession'];
3224  $cols = $options['column_ids'];
3225  $saved_ids = &$options['saved_ids'];
3226  $stock_count = &$options['stock_count'];
3227  $multi_insert_options = $options['multi_insert'];
3228  $tree_info = &$options['tree_info'];
3229  $record_group = variable_get('tpps_record_group', 10000);
3230  $geo_api_key = variable_get('tpps_geocode_api_key', NULL);
3231  $site_based = FALSE;
3232  $exact = $options['exact'] ?? NULL;
3233  $precision = $options['precision'] ?? NULL;
3234 
3235  $tree_id = $row[$cols['id']];
3236  $id = $saved_ids['organism_ids'][$options['org_num']];
3237  if ($options['org_names']['number'] != 1 and $options['single_file']) {
3238  $org_full_name = $row[$cols['org']] ?? "{$row[$cols['genus']]} {$row[$cols['species']]}";
3239  $id = $saved_ids['organism_ids'][array_search($org_full_name, $options['org_names'])];
3240  }
3241 
3242  $records['stock'][$tree_id] = array(
3243  'uniquename' => "$accession-$tree_id",
3244  'type_id' => $cvterm['org'],
3245  'organism_id' => $id,
3246  );
3247  $tree_info[$tree_id] = array(
3248  'organism_id' => $id,
3249  );
3250 
3251  $records['project_stock'][$tree_id] = array(
3252  'project_id' => $saved_ids['project_id'],
3253  '#fk' => array(
3254  'stock' => $tree_id,
3255  ),
3256  );
3257 
3258  if (isset($row[$cols['clone']]) and $row[$cols['clone']] !== $options['empty']) {
3259  $clone_name = $tree_id . '-' . $row[$cols['clone']];
3260 
3261  $records['stock'][$clone_name] = array(
3262  'uniquename' => $accession . '-' . $clone_name,
3263  'type_id' => $cvterm['clone'],
3264  'organism_id' => $id,
3265  );
3266  $tree_info[$clone_name] = array(
3267  'organism_id' => $id,
3268  );
3269 
3270  $records['project_stock'][$clone_name] = array(
3271  'project_id' => $saved_ids['project_id'],
3272  '#fk' => array(
3273  'stock' => $clone_name,
3274  ),
3275  );
3276  $job->logMessage('[INFO] CV Terms Data' . print_r($cvterm, 1));
3277  $records['stock_relationship'][$clone_name] = array(
3278  'type_id' => $cvterm['has_part'],
3279  '#fk' => array(
3280  'subject' => $tree_id,
3281  'object' => $clone_name,
3282  ),
3283  );
3284 
3285  $tree_id = $clone_name;
3286  }
3287 
3288  if (!empty($row[$cols['lat']]) and !empty($row[$cols['lng']])) {
3289  $raw_coord = $row[$cols['lat']] . ',' . $row[$cols['lng']];
3290  $standard_coord = explode(',', tpps_standard_coord($raw_coord));
3291  $lat = $standard_coord[0];
3292  $lng = $standard_coord[1];
3293  }
3294  elseif (!empty($row[$cols['state']]) and !empty($row[$cols['country']])) {
3295  $exact = FALSE;
3296  $records['stockprop']["$tree_id-country"] = array(
3297  'type_id' => $cvterm['country'],
3298  'value' => $row[$cols['country']],
3299  '#fk' => array(
3300  'stock' => $tree_id,
3301  ),
3302  );
3303 
3304  $records['stockprop']["$tree_id-state"] = array(
3305  'type_id' => $cvterm['state'],
3306  'value' => $row[$cols['state']],
3307  '#fk' => array(
3308  'stock' => $tree_id,
3309  ),
3310  );
3311 
3312  $location = "{$row[$cols['state']]}, {$row[$cols['country']]}";
3313 
3314  if (!empty($row[$cols['county']])) {
3315  $records['stockprop']["$tree_id-county"] = array(
3316  'type_id' => $cvterm['county'],
3317  'value' => $row[$cols['county']],
3318  '#fk' => array(
3319  'stock' => $tree_id,
3320  ),
3321  );
3322  $location = "{$row[$cols['county']]}, $location";
3323  }
3324 
3325  if (!empty($row[$cols['district']])) {
3326  $records['stockprop']["$tree_id-district"] = array(
3327  'type_id' => $cvterm['district'],
3328  'value' => $row[$cols['district']],
3329  '#fk' => array(
3330  'stock' => $tree_id,
3331  ),
3332  );
3333  $location = "{$row[$cols['district']]}, $location";
3334  }
3335 
3336  $tree_info[$tree_id]['location'] = $location;
3337 
3338  if (isset($geo_api_key) and !array_key_exists($location, $options['locations'])) {
3339  $query = urlencode($location);
3340  $url = "https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
3341  $response = json_decode(file_get_contents($url));
3342  $options['locations'][$location] = $response->results[0]->geometry ?? NULL;
3343 
3344  if ($response->total_results and $response->total_results > 1 and !isset($cols['district']) and !isset($cols['county'])) {
3345  foreach ($response->results as $item) {
3346  if ($item->components->_type == 'state') {
3347  $options['locations'][$location] = $item->geometry;
3348  break;
3349  }
3350  }
3351  }
3352  }
3353  $lat = $options['locations'][$location]->lat ?? NULL;
3354  $lng = $options['locations'][$location]->lng ?? NULL;
3355  }
3356  elseif (!empty($row[$cols['pop_group']])) {
3357  $site_based = TRUE;
3358  $location = $options['pop_group'][$row[$cols['pop_group']]];
3359  $coord = tpps_standard_coord($location);
3360 
3361  if ($coord) {
3362  $parts = explode(',', $coord);
3363  $lat = $parts[0];
3364  $lng = $parts[1];
3365  }
3366 
3367  if (!$coord) {
3368  $records['stockprop']["$tree_id-location"] = array(
3369  'type_id' => $cvterm['loc'],
3370  'value' => $location,
3371  '#fk' => array(
3372  'stock' => $tree_id,
3373  ),
3374  );
3375 
3376  $tree_info[$tree_id]['location'] = $location;
3377 
3378  if (isset($geo_api_key)) {
3379  $result = $options['locations'][$location] ?? NULL;
3380  if (empty($result)) {
3381  $query = urlencode($location);
3382  $url = "https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
3383  $response = json_decode(file_get_contents($url));
3384  $result = ($response->total_results) ? $response->results[0]->geometry : NULL;
3385  $options['locations'][$location] = $result;
3386  }
3387 
3388  if (!empty($result)) {
3389  $lat = $result->lat;
3390  $lng = $result->lng;
3391  }
3392  }
3393  }
3394  }
3395 
3396  if (!empty($lat) and !empty($lng)) {
3397  $records['stockprop']["$tree_id-lat"] = array(
3398  'type_id' => $cvterm['lat'],
3399  'value' => $lat,
3400  '#fk' => array(
3401  'stock' => $tree_id,
3402  ),
3403  );
3404 
3405  $records['stockprop']["$tree_id-long"] = array(
3406  'type_id' => $cvterm['lng'],
3407  'value' => $lng,
3408  '#fk' => array(
3409  'stock' => $tree_id,
3410  ),
3411  );
3412  $tree_info[$tree_id]['lat'] = $lat;
3413  $tree_info[$tree_id]['lng'] = $lng;
3414 
3415  $gps_type = "Site-based";
3416  if (!$site_based) {
3417  $gps_type = "Exact";
3418  if (!$exact) {
3419  $gps_type = "Approximate";
3420  }
3421  }
3422 
3423  $records['stockprop']["$tree_id-gps-type"] = array(
3424  'type_id' => $cvterm['gps_type'],
3425  'value' => $gps_type,
3426  '#fk' => array(
3427  'stock' => $tree_id,
3428  ),
3429  );
3430 
3431  if ($gps_type == "Approximate" and !empty($precision)) {
3432  $records['stockprop']["$tree_id-precision"] = array(
3433  'type_id' => $cvterm['precision'],
3434  'value' => $precision,
3435  '#fk' => array(
3436  'stock' => $tree_id,
3437  ),
3438  );
3439  }
3440  }
3441 
3442  $stock_count++;
3443  if ($stock_count >= $record_group) {
3444  tpps_job_logger_write('[INFO] - Inserting data into database using insert_multi...');
3445  $job->logMessage('[INFO] - Inserting data into database using insert_multi...');
3446  $new_ids = tpps_chado_insert_multi($records, $multi_insert_options);
3447  tpps_job_logger_write('[INFO] - Done.');
3448  $job->logMessage('[INFO] - Done.');
3449  foreach ($new_ids as $t_id => $stock_id) {
3450  $tree_info[$t_id]['stock_id'] = $stock_id;
3451  }
3452 
3453  $records = array(
3454  'stock' => array(),
3455  'stockprop' => array(),
3456  'stock_relationship' => array(),
3457  'project_stock' => array(),
3458  );
3459  $stock_count = 0;
3460  }
3461 }
3462 
3471 function tpps_clean_state(array &$form_state) {
3472  $new = array();
3473  unset($form_state['ids']);
3474  tpps_form_state_info($new, $form_state);
3475  $form_state = $new;
3476 }
3477 
3489 function tpps_get_species_codes($genus, $species) {
3490  $codes = array();
3491 
3492  foreach (tpps_get_code_parts($genus) as $genus_part) {
3493  foreach (tpps_get_code_parts($species) as $species_part) {
3494  $code = ucfirst($genus_part . $species_part);
3495  if (!array_key_exists($code, $codes)) {
3496  yield $code;
3497  $codes[$code] = TRUE;
3498  }
3499  }
3500  }
3501 }
3502 
3514 function tpps_get_code_parts($part) {
3515  for ($char1 = 0; $char1 <= strlen($part) - 2; $char1++) {
3516  for ($char2 = $char1 + 1; $char2 <= strlen($part) - 1; $char2++) {
3517  // Code parts should not repeat letters.
3518  if ($part[$char1] == $part[$char2]) {
3519  continue;
3520  }
3521 
3522  yield strtolower($part[$char1] . $part[$char2]);
3523  }
3524  }
3525 }
tpps_submission_get_tags($accession)
tpps_standard_coord($raw_coordinate)
tpps_submission_clear_db($accession)
const TPPS_PAGE_1
Definition: tpps.module:12
tpps_submit_environment(array &$form_state, $i, TripalJob &$job=NULL)
tpps_process_genotype_spreadsheet($row, array &$options=array())
tpps_ssrs_headers($fid, $ploidy)
tpps_refine_phenotype_meta(array &$meta, array $time_options=array(), TripalJob &$job=NULL)
tpps_update_submission(array $state, array $options=array())
$tpps_job
Definition: submit_all.php:14
tpps_submit_all($accession, TripalJob $job=NULL)
Definition: submit_all.php:25
tpps_other_marker_headers($fid, array $cols)
tpps_get_tag_id($name)
tpps_add_project_file(array &$state, &$fid)
tpps_xlsx_translate_date($date)
Definition: file_utils.inc:791
tpps_submit_page_3(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:630
tpps_submit_genotype(array &$form_state, array $species_codes, $i, TripalJob &$job=NULL)
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_get_env_response($layer_id, $lat, $long)
tpps_get_species_codes($genus, $species)
tpps_process_phenotype_meta($row, array &$options=array())
tpps_matching_trees($project_id)
Definition: match_trees.inc:14
tpps_submission_add_tag($accession, $tag)
tpps_submit_page_1(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:189
tpps_submit_page_4(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:854
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_check_organisms($row, array &$options=array())
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_process_environment_layers($row, array &$options=array())
tpps_form_state_info(array &$new, array &$old)
Definition: form_utils.inc:290
tpps_submission_rename_files($accession)
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:972
tpps_submit_phenotype(array &$form_state, $i, TripalJob &$job=NULL)
Definition: submit_all.php:976
tpps_get_code_parts($part)
tpps_submission_clear_default_tags($accession)
tpps_submit_page_2(array &$form_state, TripalJob &$job=NULL)
Definition: submit_all.php:447
tpps_clean_state(array &$form_state)
tpps_tripal_entity_publish($bundle_name, array $vals, array $options=array())
tpps_load_submission($accession, $state=TRUE)
Definition: submissions.inc:27
tpps_chado_prop_exists($base_table, $id, $name, array $options=array())
tpps_get_taxon($org_name, $rank)
tpps_process_accession($row, array &$options, $job=NULL)
tpps_generate_popstruct($study_accession, $vcf_location)
const TPPS_PAGE_4
Definition: tpps.module:15
tpps_submit_summary(array &$form_state)
tpps_process_snp_association($row, array &$options=array())
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
$tpps_job_logger
Definition: submit_all.php:13
tpps_get_location($location)
Definition: file_utils.inc:640
tpps_get_environmental_layer_data($layer_id, $lat, $long, $param)
const TPPS_PAGE_3
Definition: tpps.module:14
tpps_ols_install_term($info)
const TPPS_PAGE_2
Definition: tpps.module:13
tpps_file_iterator($fid, $function, array &$options=array())
tpps_submit_vcf_render_genotype_combination($raw_value, $ref, $alt)
tpps_process_phenotype_data($row, array &$options=array())