29 $log_path = drupal_realpath(
'public://') .
'/tpps_job_logs/';
35 $tpps_job_logger = [];
36 $tpps_job_logger[
'job_object'] = $job;
37 $tpps_job_logger[
'log_file_path'] = $log_path . $accession .
'_' . $tpps_job_logger[
'job_object']->getJobID() .
'.txt';
38 $tpps_job_logger[
'log_file_handle'] = fopen($tpps_job_logger[
'log_file_path'],
"w+");
41 $job->logMessage(
'[INFO] Setting up...');
44 $form_state[
'status'] =
'Submission Job Running';
46 $transaction = db_transaction();
53 $job->logMessage(
'[INFO] Clearing Database...');
56 $job->logMessage(
'[INFO] Database Cleared.');
57 $project_id = $form_state[
'ids'][
'project_id'] ?? NULL;
62 $firstpage = $form_state[
'saved_values'][
TPPS_PAGE_1];
63 $form_state[
'file_rank'] = 0;
64 $form_state[
'ids'] = array();
67 $job->logMessage(
'[INFO] Creating project record...');
68 $form_state[
'title'] = $firstpage[
'publication'][
'title'];
69 $form_state[
'abstract'] = $firstpage[
'publication'][
'abstract'];
70 $project_record = array(
71 'name' => $firstpage[
'publication'][
'title'],
72 'description' => $firstpage[
'publication'][
'abstract'],
74 if (!empty($project_id)) {
75 $project_record[
'project_id'] = $project_id;
78 tpps_job_logger_write(
"[INFO] Project record created. project_id: @pid\n", array(
'@pid' => $form_state[
'ids'][
'project_id']));
79 $job->logMessage(
"[INFO] Project record created. project_id: @pid\n", array(
'@pid' => $form_state[
'ids'][
'project_id']));
82 $firstpage[
'publication'][
'title'],
83 $form_state[
'ids'][
'project_id'],
87 $job->logMessage(
"[INFO] Submitting Publication/Species information...");
90 $job->logMessage(
"[INFO] Publication/Species information submitted!\n");
93 $job->logMessage(
"[INFO] Submitting Study Details...");
96 $job->logMessage(
"[INFO] Study Details sumbitted!\n");
99 $job->logMessage(
"[INFO] Submitting Accession information...");
102 $job->logMessage(
"[INFO] Accession information submitted!\n");
105 $job->logMessage(
"[INFO] Submitting Raw data...");
108 $job->logMessage(
"[INFO] Raw data submitted!\n");
111 $job->logMessage(
"[INFO] Submitting Summary information...");
114 $job->logMessage(
"[INFO] Summary information submitted!\n");
119 $job->logMessage(
"[INFO] Renaming files...");
122 $job->logMessage(
"[INFO] Files renamed!\n");
124 $form_state[
'status'] =
'Approved';
125 $form_state[
'loaded'] = time();
127 $job->logMessage(
"[INFO] Finishing up...");
130 $job->logMessage(
"[INFO] Complete!");
132 fclose($tpps_job_logger[
'log_file_handle']);
135 catch (Exception $e) {
136 $transaction->rollback();
138 $form_state[
'status'] =
'Pending Approval';
142 $job->logMessage(
'[ERROR] Job failed', array(), TRIPAL_ERROR);
144 $job->logMessage(
'[ERROR] Error message: @msg', array(
'@msg' => $e->getMessage()), TRIPAL_ERROR);
146 $job->logMessage(
"[ERROR] Trace: \n@trace", array(
'@trace' => $e->getTraceAsString()), TRIPAL_ERROR);
148 fclose($tpps_job_logger[
'log_file_handle']);
149 watchdog_exception(
'tpps', $e);
150 throw new Exception(
'Job failed.');
163 foreach ($replacements as $key_string => $replace_string) {
164 $string = str_replace($key_string, $replace_string, $string);
169 $timestamp_now = date(
'm/d/y g:i:s A', $time_now);
171 $string =
"\n" . $timestamp_now .
" " . $string;
173 fwrite($tpps_job_logger[
'log_file_handle'],$string);
174 fflush($tpps_job_logger[
'log_file_handle']);
176 catch (Exception $e) {
177 print_r($e->getMessage());
191 $dbxref_id = $form_state[
'dbxref_id'];
192 $firstpage = $form_state[
'saved_values'][
TPPS_PAGE_1];
193 $thirdpage = $form_state[
'saved_values'][
TPPS_PAGE_3];
194 $seconds = $firstpage[
'publication'][
'secondaryAuthors'];
197 'project_id' => $form_state[
'ids'][
'project_id'],
198 'dbxref_id' => $dbxref_id,
199 'is_current' => TRUE,
202 if (!empty($form_state[
'tpps_type']) and $form_state[
'tpps_type'] ==
'tppsc' and !empty($form_state[
'saved_values'][
TPPS_PAGE_1][
'doi'])) {
203 $dryad_db = chado_get_db(array(
'name' =>
'dryad'));
204 $dryad_dbxref = chado_insert_dbxref(array(
205 'db_id' => $dryad_db->db_id,
206 'accession' => $form_state[
'saved_values'][TPPS_PAGE_1][
'doi'],
209 'project_id' => $form_state[
'ids'][
'project_id'],
210 'dbxref_id' => $dryad_dbxref,
211 'is_current' => TRUE,
215 if (!empty($firstpage[
'photo'])) {
220 'name' => $firstpage[
'primaryAuthor'],
225 'project_id' => $form_state[
'ids'][
'project_id'],
226 'contact_id' => $primary_author_id,
229 $authors = array($firstpage[
'primaryAuthor']);
230 if ($seconds[
'number'] != 0) {
231 for ($i = 1; $i <= $seconds[
'number']; $i++) {
232 if(!empty($seconds[$i]) || $seconds[$i] !=
"") {
234 'name' => $seconds[$i],
238 $names = explode(
" ", $seconds[$i]);
239 $first_name = implode(
" ", array_slice($names, 0, -1));
240 $last_name = end($names);
241 $pubauthors[] = array(
243 'surname' => $last_name,
244 'givennames' => $first_name,
246 $authors[] = $seconds[$i];
249 tpps_job_logger_write(
'[INFO] - Secondary publishers error - found an empty secondary publisher name. Ignoring this input.');
250 $job->logMessage(
'[INFO] - Secondary publishers error - found an empty secondary publisher name. Ignoring this input.');
257 'title' => $firstpage[
'publication'][
'title'],
258 'series_name' => $firstpage[
'publication'][
'journal'],
260 'pyear' => $firstpage[
'publication'][
'year'],
261 'uniquename' => implode(
'; ', $authors) .
" {$firstpage['publication']['title']}. {$firstpage['publication']['journal']}; {$firstpage['publication']['year']}",
263 $form_state[
'ids'][
'pub_id'] = $publication_id;
265 $firstpage[
'publication'][
'title'],
268 $form_state[
'pyear'] = $firstpage[
'publication'][
'year'];
269 $form_state[
'journal'] = $firstpage[
'publication'][
'journal'];
271 if (!empty($firstpage[
'publication'][
'abstract'])) {
273 'pub_id' => $publication_id,
275 'value' => $firstpage[
'publication'][
'abstract'],
280 'pub_id' => $publication_id,
282 'value' => implode(
', ', $authors),
284 $form_state[
'authors'] = $authors;
287 'project_id' => $form_state[
'ids'][
'project_id'],
288 'pub_id' => $publication_id,
291 if (!empty($firstpage[
'organization'])) {
293 'name' => $firstpage[
'organization'],
299 'subject_id' => $primary_author_id,
300 'object_id' => $organization_id,
304 $names = explode(
" ", $firstpage[
'primaryAuthor']);
305 $first_name = implode(
" ", array_slice($names, 0, -1));
306 $last_name = end($names);
309 'pub_id' => $publication_id,
311 'surname' => $last_name,
312 'givennames' => $first_name,
315 if (!empty($pubauthors)) {
316 foreach ($pubauthors as $info) {
317 $info[
'pub_id'] = $publication_id;
322 $form_state[
'ids'][
'organism_ids'] = array();
323 $organism_number = $firstpage[
'organism'][
'number'];
325 for ($i = 1; $i <= $organism_number; $i++) {
326 $parts = explode(
" ", $firstpage[
'organism'][$i][
'name']);
328 $species = implode(
" ", array_slice($parts, 1));
330 if (isset($parts[2]) and ($parts[2] ==
'var.' or $parts[2] ==
'subsp.')) {
331 $infra = implode(
" ", array_slice($parts, 2));
336 'species' => $species,
337 'infraspecific_name' => $infra,
340 if (preg_match(
'/ x /', $species)) {
347 'organism_id' => $form_state[
'ids'][
'organism_ids'][$i],
349 'value' => $firstpage[
'organism'][$i][
'is_tree'] ?
'Tree' :
'Non-tree',
353 if ($organism_number != 1) {
354 if (!empty($thirdpage[
'tree-accession'][
'check']) and empty($thirdpage[
'tree-accession'][
"species-$i"][
'file'])) {
358 if (empty($thirdpage[
'tree-accession'][
'check'])) {
361 'search' => $firstpage[
'organism'][$i][
'name'],
364 $tree_accession = $thirdpage[
'tree-accession'][
"species-1"];
365 $groups = $tree_accession[
'file-groups'];
366 if ($groups[
'Genus and Species'][
'#type'] ==
'separate') {
367 $options[
'cols'][
'genus'] = $groups[
'Genus and Species'][
'6'];
368 $options[
'cols'][
'species'] = $groups[
'Genus and Species'][
'7'];
370 if ($groups[
'Genus and Species'][
'#type'] !=
'separate') {
371 $options[
'cols'][
'org'] = $groups[
'Genus and Species'][
'10'];
373 $fid = $tree_accession[
'file'];
375 if (!$options[
'found']) {
381 $code_exists =
tpps_chado_prop_exists(
'organism', $form_state[
'ids'][
'organism_ids'][$i],
'organism 4 letter code');
385 $new_code_query = chado_select_record(
'organismprop', array(
'value'), array(
387 'value' => $trial_code,
390 if (empty($new_code_query)) {
396 'organism_id' => $form_state[
'ids'][
'organism_ids'][$i],
398 'value' => $trial_code,
408 foreach ($ranks as $rank) {
411 $taxon =
tpps_get_taxon($firstpage[
'organism'][$i][
'name'], $rank);
414 'organism_id' => $form_state[
'ids'][
'organism_ids'][$i],
423 'organism_id' => $form_state[
'ids'][
'organism_ids'][$i],
424 'project_id' => $form_state[
'ids'][
'project_id'],
428 'organism_id' => $form_state[
'ids'][
'organism_ids'][$i],
429 'pub_id' => $publication_id,
434 $form_state[
'ids'][
'organism_ids'][$i],
449 $secondpage = $form_state[
'saved_values'][
TPPS_PAGE_2];
451 if (!empty($secondpage[
'StartingDate'])) {
453 'project_id' => $form_state[
'ids'][
'project_id'],
455 'value' => $secondpage[
'StartingDate'][
'month'] .
" " . $secondpage[
'StartingDate'][
'year'],
459 'project_id' => $form_state[
'ids'][
'project_id'],
461 'value' => $secondpage[
'EndingDate'][
'month'] .
" " . $secondpage[
'EndingDate'][
'year'],
466 'project_id' => $form_state[
'ids'][
'project_id'],
468 'value' => $secondpage[
'data_type'],
471 $studytype_options = array(
473 1 =>
'Natural Population (Landscape)',
474 2 =>
'Growth Chamber',
476 4 =>
'Experimental/Common Garden',
481 'project_id' => $form_state[
'ids'][
'project_id'],
483 'value' => $studytype_options[$secondpage[
'study_type']],
486 if (!empty($secondpage[
'study_info'][
'season'])) {
487 $seasons = implode($secondpage[
'study_info'][
'season']);
490 'project_id' => $form_state[
'ids'][
'project_id'],
496 if (!empty($secondpage[
'study_info'][
'assessions'])) {
498 'project_id' => $form_state[
'ids'][
'project_id'],
500 'value' => $secondpage[
'study_info'][
'assessions'],
504 if (!empty($secondpage[
'study_info'][
'temp'])) {
506 'project_id' => $form_state[
'ids'][
'project_id'],
508 'value' => $secondpage[
'study_info'][
'temp'][
'high'],
512 'project_id' => $form_state[
'ids'][
'project_id'],
514 'value' => $secondpage[
'study_info'][
'temp'][
'low'],
525 foreach ($types as $type) {
526 if (!empty($secondpage[
'study_info'][$type])) {
527 $set = $secondpage[
'study_info'][$type];
530 'project_id' => $form_state[
'ids'][
'project_id'],
532 'value' => ($set[
'option'] ==
'1') ?
'True' :
'False',
535 if ($set[
'option'] ==
'1') {
537 'project_id' => $form_state[
'ids'][
'project_id'],
539 'value' => $set[
'controlled'],
542 elseif (!empty($set[
'uncontrolled'])) {
544 'project_id' => $form_state[
'ids'][
'project_id'],
546 'value' => $set[
'uncontrolled'],
552 if (!empty($secondpage[
'study_info'][
'rooting'])) {
553 $root = $secondpage[
'study_info'][
'rooting'];
556 'project_id' => $form_state[
'ids'][
'project_id'],
558 'value' => $root[
'option'],
561 if ($root[
'option'] ==
'Soil') {
563 'project_id' => $form_state[
'ids'][
'project_id'],
565 'value' => ($root[
'soil'][
'type'] ==
'Other') ? $root[
'soil'][
'other'] : $root[
'soil'][
'type'],
569 'project_id' => $form_state[
'ids'][
'project_id'],
571 'value' => $root[
'soil'][
'container'],
575 if (!empty($secondpage[
'study_info'][
'rooting'][
'ph'])) {
576 $set = $secondpage[
'study_info'][
'rooting'][
'ph'];
579 'project_id' => $form_state[
'ids'][
'project_id'],
581 'value' => ($set[
'option'] ==
'1') ?
'True' :
'False',
584 if ($set[
'option'] ==
'1') {
586 'project_id' => $form_state[
'ids'][
'project_id'],
588 'value' => $set[
'controlled'],
591 elseif (!empty($set[
'uncontrolled'])) {
593 'project_id' => $form_state[
'ids'][
'project_id'],
595 'value' => $set[
'uncontrolled'],
600 $description = FALSE;
602 foreach ($root[
'treatment'] as $value) {
604 $record_next = ((bool) $value);
610 'project_id' => $form_state[
'ids'][
'project_id'],
617 $description = FALSE;
631 $firstpage = $form_state[
'saved_values'][
TPPS_PAGE_1];
632 $thirdpage = $form_state[
'saved_values'][
TPPS_PAGE_3];
633 $organism_number = $firstpage[
'organism'][
'number'];
634 $form_state[
'locations'] = array();
635 $form_state[
'tree_info'] = array();
637 $loc_name =
'Location (latitude/longitude or country/state or population group)';
639 if (!empty($thirdpage[
'skip_validation'])) {
643 if (!empty($thirdpage[
'study_location'])) {
644 $type = $thirdpage[
'study_location'][
'type'];
645 $locs = $thirdpage[
'study_location'][
'locations'];
646 $geo_api_key = variable_get(
'tpps_geocode_api_key', NULL);
648 for ($i = 1; $i <= $locs[
'number']; $i++) {
651 $latitude = $standard_coordinate[0];
652 $longitude = $standard_coordinate[1];
655 'project_id' => $form_state[
'ids'][
'project_id'],
657 'value' => $latitude,
662 'project_id' => $form_state[
'ids'][
'project_id'],
664 'value' => $longitude,
671 'project_id' => $form_state[
'ids'][
'project_id'],
677 if (isset($geo_api_key)) {
678 $query = urlencode($loc);
679 $url =
"https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
680 $response = json_decode(file_get_contents($url));
682 if ($response->total_results) {
683 $result = $response->results[0]->geometry;
684 $form_state[
'locations'][$loc] = $result;
687 'project_id' => $form_state[
'ids'][
'project_id'],
689 'value' => $result->lat,
694 'project_id' => $form_state[
'ids'][
'project_id'],
696 'value' => $result->lng,
721 'stockprop' => array(),
722 'stock_relationship' => array(),
723 'project_stock' => array(),
726 'stock_relationship' => array(
730 'subject_id' =>
'stock_id',
736 'object_id' =>
'stock_id',
742 $multi_insert_options = array(
743 'fk_overrides' => $overrides,
748 'prefix' => $form_state[
'accession'] .
'-',
753 for ($i = 1; $i <= $organism_number; $i++) {
754 $names[$i] = $firstpage[
'organism'][$i][
'name'];
756 $names[
'number'] = $firstpage[
'organism'][
'number'];
758 'cvterms' => $cvterms,
759 'records' => $records,
760 'overrides' => $overrides,
761 'locations' => &$form_state[
'locations'],
762 'accession' => $form_state[
'accession'],
763 'single_file' => empty($thirdpage[
'tree-accession'][
'check']),
764 'org_names' => $names,
765 'saved_ids' => &$form_state[
'ids'],
766 'stock_count' => &$stock_count,
767 'multi_insert' => $multi_insert_options,
768 'tree_info' => &$form_state[
'tree_info'],
772 for ($i = 1; $i <= $organism_number; $i++) {
773 $tree_accession = $thirdpage[
'tree-accession'][
"species-$i"];
774 $fid = $tree_accession[
'file'];
778 $column_vals = $tree_accession[
'file-columns'];
779 $groups = $tree_accession[
'file-groups'];
781 $options[
'org_num'] = $i;
782 $options[
'no_header'] = !empty($tree_accession[
'file-no-header']);
783 $options[
'empty'] = $tree_accession[
'file-empty'];
784 $options[
'pop_group'] = $tree_accession[
'pop-group'];
785 $options[
'exact'] = $tree_accession[
'exact_coords'] ?? NULL;
786 $options[
'precision'] = NULL;
787 if (!$options[
'exact']) {
788 $options[
'precision'] = $tree_accession[
'coord_precision'] ?? NULL;
793 $county = array_search(
'8', $column_vals);
794 $district = array_search(
'9', $column_vals);
795 $clone = array_search(
'13', $column_vals);
796 $options[
'column_ids'] = array(
797 'id' => $groups[
'Tree Id'][
'1'],
798 'lat' => $groups[$loc_name][
'4'] ?? NULL,
799 'lng' => $groups[$loc_name][
'5'] ?? NULL,
800 'country' => $groups[$loc_name][
'2'] ?? NULL,
801 'state' => $groups[$loc_name][
'3'] ?? NULL,
802 'county' => ($county !== FALSE) ? $county : NULL,
803 'district' => ($district !== FALSE) ? $district : NULL,
804 'clone' => ($clone !== FALSE) ? $clone : NULL,
805 'pop_group' => $groups[$loc_name][
'12'] ?? NULL,
808 if ($organism_number != 1 and empty($thirdpage[
'tree-accession'][
'check'])) {
809 if ($groups[
'Genus and Species'][
'#type'] ==
'separate') {
810 $options[
'column_ids'][
'genus'] = $groups[
'Genus and Species'][
'6'];
811 $options[
'column_ids'][
'species'] = $groups[
'Genus and Species'][
'7'];
813 if ($groups[
'Genus and Species'][
'#type'] !=
'separate') {
814 $options[
'column_ids'][
'org'] = $groups[
'Genus and Species'][
'10'];
818 $job->logMessage(
'[INFO] - Processing accession file data...');
821 $job->logMessage(
'[INFO] - Done.');
824 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
827 $job->logMessage(
'[INFO] - Done.');
828 foreach ($new_ids as $t_id => $stock_id) {
829 $form_state[
'tree_info'][$t_id][
'stock_id'] = $stock_id;
831 unset($options[
'records']);
833 if (empty($thirdpage[
'tree-accession'][
'check'])) {
838 if (!empty($thirdpage[
'existing_trees'])) {
855 $fourthpage = $form_state[
'saved_values'][
TPPS_PAGE_4];
856 $organism_number = $form_state[
'saved_values'][
TPPS_PAGE_1][
'organism'][
'number'];
857 $species_codes = array();
859 for ($i = 1; $i <= $organism_number; $i++) {
861 $species_codes[$form_state[
'ids'][
'organism_ids'][$i]] = current(chado_select_record(
'organismprop', array(
'value'), array(
863 'organism_id' => $form_state[
'ids'][
'organism_ids'][$i],
869 if (isset($fourthpage[
"organism-$i"][
'genotype'])) {
870 $ref_genome = $fourthpage[
"organism-$i"][
'genotype'][
'ref-genome'];
872 if ($ref_genome ===
'url' or $ref_genome ===
'manual' or $ref_genome ===
'manual2') {
874 $class =
'FASTAImporter';
875 tripal_load_include_importer_class($class);
877 $fasta = $fourthpage[
"organism-$i"][
'genotype'][
'tripal_fasta'];
879 $file_upload = isset($fasta[
'file'][
'file_upload']) ? trim($fasta[
'file'][
'file_upload']) : 0;
880 $file_existing = isset($fasta[
'file'][
'file_upload_existing']) ? trim($fasta[
'file'][
'file_upload_existing']) : 0;
881 $file_remote = isset($fasta[
'file'][
'file_remote']) ? trim($fasta[
'file'][
'file_remote']) : 0;
882 $analysis_id = $fasta[
'analysis_id'];
883 $seqtype = $fasta[
'seqtype'];
884 $organism_id = $form_state[
'ids'][
'organism_ids'][$i];
885 $re_accession = $fasta[
'db'][
're_accession'];
886 $db_id = $fasta[
'db'][
'db_id'];
889 'importer_class' => $class,
890 'file_remote' => $file_remote,
891 'analysis_id' => $analysis_id,
892 'seqtype' => $seqtype,
893 'organism_id' => $organism_id,
898 're_accession' => $re_accession,
905 $file_details = array();
907 if ($file_existing) {
908 $file_details[
'fid'] = $file_existing;
910 elseif ($file_upload) {
911 $file_details[
'fid'] = $file_upload;
913 elseif ($file_remote) {
914 $file_details[
'file_remote'] = $file_remote;
918 $importer =
new $class();
920 $importer->formSubmit($form, $form_state);
922 $importer->create($run_args, $file_details);
924 $importer->submitJob();
927 catch (Exception $ex) {
928 drupal_set_message(t(
'Cannot submit import: @msg', array(
'@msg' => $ex->getMessage())),
'error');
931 elseif ($ref_genome ===
'bio') {
932 $eutils = $fourthpage[
"organism-$i"][
'genotype'][
'tripal_eutils'];
933 $class =
'EutilsImporter';
934 tripal_load_include_importer_class($class);
937 'importer_class' => $class,
938 'db' => $eutils[
'db'],
939 'accession' => $eutils[
'accession'],
940 'linked_records' => $eutils[
'options'][
'linked_records'],
944 $importer =
new $class();
945 $importer->create($run_args);
946 $importer->submitJob();
948 catch (Exception $ex) {
949 drupal_set_message(t(
'Cannot submit BioProject: @msg', array(
'@msg' => $ex->getMessage())),
'error');
955 $form_state[
'data'][
'phenotype'] = array();
956 $form_state[
'data'][
'phenotype_meta'] = array();
959 for ($i = 1; $i <= $organism_number; $i++) {
978 $job->logMessage(
'[INFO] - Submitting phenotype data...');
979 $firstpage = $form_state[
'saved_values'][
TPPS_PAGE_1];
980 $fourthpage = $form_state[
'saved_values'][
TPPS_PAGE_4];
981 $phenotype = $fourthpage[
"organism-$i"][
'phenotype'] ?? NULL;
982 $organism_name = $firstpage[
'organism'][$i][
'name'];
983 if (empty($phenotype)) {
989 $phenotype_cvterms = array(
1000 'phenotype' => array(),
1001 'phenotypeprop' => array(),
1002 'stock_phenotype' => array(),
1003 'phenotype_cvterm' => array(),
1005 $phenotype_count = 0;
1008 'records' => $records,
1009 'cvterms' => $phenotype_cvterms,
1010 'accession' => $form_state[
'accession'],
1011 'tree_info' => $form_state[
'tree_info'],
1013 'phenotype_count' => $phenotype_count,
1014 'data' => &$form_state[
'data'][
'phenotype'],
1018 if (!empty($phenotype[
'normal-check'])) {
1019 $phenotype_number = $phenotype[
'phenotypes-meta'][
'number'];
1020 $phenotypes_meta = array();
1021 $data_fid = $phenotype[
'file'];
1022 $phenos_edit = $form_state[
'phenotypes_edit'] ?? NULL;
1026 $env_phenotypes = FALSE;
1028 for ($j = 1; $j <= $phenotype_number; $j++) {
1029 $name = strtolower($phenotype[
'phenotypes-meta'][$j][
'name']);
1030 if (!empty($phenos_edit[$j])) {
1033 $cvterm_id = $phenotype[
'phenotypes-meta'][$j][
'attribute'];
1034 $result = $phenos_edit[$j] + $phenotype[
'phenotypes-meta'][$j];
1035 $phenotype[
'phenotypes-meta'][$j] = $result;
1037 $phenotype[
'phenotypes-meta'][$j][
'attribute'] = $cvterm_id;
1039 $phenotypes_meta[$name] = array();
1040 $phenotypes_meta[$name][
'attr'] = $phenotype[
'phenotypes-meta'][$j][
'attribute'];
1043 if ($phenotype[
'phenotypes-meta'][$j][
'attribute'] ==
'other') {
1044 $phenotypes_meta[$name][
'attr-other'] = $phenotype[
'phenotypes-meta'][$j][
'attr-other'];
1046 $phenotypes_meta[$name][
'desc'] = $phenotype[
'phenotypes-meta'][$j][
'description'];
1047 $phenotypes_meta[$name][
'unit'] = $phenotype[
'phenotypes-meta'][$j][
'units'];
1048 if ($phenotype[
'phenotypes-meta'][$j][
'units'] ==
'other') {
1049 $phenotypes_meta[$name][
'unit-other'] = $phenotype[
'phenotypes-meta'][$j][
'unit-other'];
1051 $phenotypes_meta[$name][
'struct'] = $phenotype[
'phenotypes-meta'][$j][
'structure'];
1052 if ($phenotype[
'phenotypes-meta'][$j][
'structure'] ==
'other') {
1053 $phenotypes_meta[$name][
'struct-other'] = $phenotype[
'phenotypes-meta'][$j][
'struct-other'];
1055 if (!empty($phenotype[
'phenotypes-meta'][$j][
'val-check']) or !empty($phenotype[
'phenotypes-meta'][$j][
'bin-check'] or $phenotype[
'phenotypes-meta'][$j][
'units'] ==
tpps_load_cvterm(
'boolean')->cvterm_id)) {
1056 $phenotypes_meta[$name][
'min'] = $phenotype[
'phenotypes-meta'][$j][
'min'];
1057 $phenotypes_meta[$name][
'max'] = $phenotype[
'phenotypes-meta'][$j][
'max'];
1059 $phenotypes_meta[$name][
'env'] = !empty($phenotype[
'phenotypes-meta'][$j][
'env-check']);
1060 if ($phenotypes_meta[$name][
'env']) {
1061 $env_phenotypes = TRUE;
1064 if ($env_phenotypes) {
1069 if ($phenotype[
'check'] ==
'1' || $phenotype[
'check'] ==
'upload_file') {
1070 $meta_fid = $phenotype[
'metadata'];
1071 print_r(
'META_FID:' . $meta_fid .
"\n");
1073 if ($meta_fid > 0) {
1078 $groups = $phenotype[
'metadata-groups'];
1079 $column_vals = $phenotype[
'metadata-columns'];
1080 $struct = array_search(
'5', $column_vals);
1081 $min = array_search(
'6', $column_vals);
1082 $max = array_search(
'7', $column_vals);
1084 'name' => $groups[
'Phenotype Id'][
'1'],
1085 'attr' => $groups[
'Attribute'][
'2'],
1086 'desc' => $groups[
'Description'][
'3'],
1087 'unit' => $groups[
'Units'][
'4'],
1088 'struct' => !empty($struct) ? $struct : NULL,
1089 'min' => !empty($min) ? $min : NULL,
1090 'max' => !empty($max) ? $max : NULL,
1093 $meta_options = array(
1094 'no_header' => $phenotype[
'metadata-no-header'],
1095 'meta_columns' => $columns,
1096 'meta' => &$phenotypes_meta,
1100 $job->logMessage(
'[INFO] - Processing phenotype_meta file data...');
1103 $job->logMessage(
'[INFO] - Done.');
1106 tpps_job_logger_write(
'[WARNING] - phenotype_meta file id looks incorrect but the UI checkbox was selected. Need to double check this!');
1110 $time_options = array();
1111 if ($phenotype[
'time'][
'time-check']) {
1112 $time_options = $phenotype[
'time'];
1117 $groups = $phenotype[
'file-groups'];
1118 $column_vals = $phenotype[
'file-columns'];
1119 $time_index = ($phenotype[
'format'] == 0) ?
'2' :
'4';
1120 $clone_index = ($phenotype[
'format'] == 0) ?
'3' :
'5';
1121 $time = array_search($time_index, $column_vals);
1122 $clone = array_search($clone_index, $column_vals);
1123 $meta_headers = array(
1124 'name' => $groups[
'Phenotype Name/Identifier'][
'2'] ?? NULL,
1125 'value' => $groups[
'Phenotype Value(s)'][
'3'] ?? NULL,
1126 'time' => !empty($time) ? $time : NULL,
1127 'clone' => !empty($clone) ? $clone : NULL,
1131 if ($phenotype[
'format'] == 0) {
1133 $data_columns = array();
1134 if(is_array($groups[
'Phenotype Data'][
'0']) && !empty($groups[
'Phenotype Data'][
'0'])) {
1135 foreach ($groups[
'Phenotype Data'][
'0'] as $col) {
1136 $data_columns[$col] = $file_headers[$col];
1140 $col = $groups[
'Phenotype Data'][0];
1141 $data_columns[$col] = $file_headers[$col];
1143 unset($file_headers);
1146 $options[
'no_header'] = $phenotype[
'file-no-header'];
1147 $options[
'tree_id'] = $groups[
'Tree Identifier'][
'1'];
1148 $options[
'meta_headers'] = $meta_headers;
1149 $options[
'data_columns'] = $data_columns ?? NULL;
1150 $options[
'meta'] = $phenotypes_meta;
1151 $options[
'file_empty'] = $phenotype[
'file-empty'];
1152 $options[
'organism_name'] = $organism_name;
1154 print_r(
'DATA_FID:' . $data_fid .
"\n");
1156 $job->logMessage(
'[INFO] - Processing phenotype_data file data...');
1158 $form_state[
'data'][
'phenotype_meta'] += $phenotypes_meta;
1160 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1164 $job->logMessage(
'[INFO] - Done.');
1167 if (!empty($phenotype[
'iso-check'])) {
1168 $iso_fid = $phenotype[
'iso'];
1171 $options[
'iso'] = TRUE;
1172 $options[
'records'] = $records;
1173 $options[
'cvterms'] = $phenotype_cvterms;
1175 $options[
'organism_name'] = $organism_name;
1176 $options[
'meta'] = array(
1177 'desc' =>
"Mass Spectrometry",
1178 'unit' =>
"intensity (arbitrary units)",
1182 print_r(
'ISO_FID:' . $iso_fid .
"\n");
1184 $job->logMessage(
'[INFO] - Processing phenotype_data file data...');
1187 $job->logMessage(
'[INFO] - Inserting phenotype_data into database using insert_multi...');
1190 $job->logMessage(
'[INFO] - Done.');
1208 $job->logMessage(
'[INFO] - Submitting genotype data...');
1209 $firstpage = $form_state[
'saved_values'][
TPPS_PAGE_1];
1210 $fourthpage = $form_state[
'saved_values'][
TPPS_PAGE_4];
1211 $genotype = $fourthpage[
"organism-$i"][
'genotype'] ?? NULL;
1212 if (empty($genotype)) {
1217 $project_id = $form_state[
'ids'][
'project_id'];
1218 $record_group = variable_get(
'tpps_record_group', 10000);
1220 $genotype_count = 0;
1221 $genotype_total = 0;
1224 'genotype_call' => array(
1226 'table' =>
'feature',
1228 'variant_id' =>
'feature_id',
1232 'table' =>
'feature',
1234 'marker_id' =>
'feature_id',
1241 'feature' => array(),
1242 'genotype' => array(),
1243 'genotype_call' => array(),
1244 'stock_genotype' => array(),
1247 $multi_insert_options = array(
1248 'fk_overrides' => $overrides,
1249 'entities' => array(
1250 'label' =>
'Genotype',
1251 'table' =>
'genotype',
1256 'records' => $records,
1257 'tree_info' => $form_state[
'tree_info'],
1258 'species_codes' => $species_codes,
1259 'genotype_count' => &$genotype_count,
1260 'genotype_total' => &$genotype_total,
1261 'project_id' => $project_id,
1262 'seq_var_cvterm' => $seq_var_cvterm,
1263 'multi_insert' => &$multi_insert_options,
1267 if ($genotype[
'ref-genome'] ==
'manual' or $genotype[
'ref-genome'] ==
'manual2' or $genotype[
'ref-genome'] ==
'url') {
1268 if ($genotype[
'tripal_fasta'][
'file_upload']) {
1270 $assembly_user = $genotype[
'tripal_fasta'][
'file_upload'];
1273 if ($genotype[
'tripal_fasta'][
'file_upload_existing']) {
1275 $assembly_user = $genotype[
'tripal_fasta'][
'file_upload_existing'];
1278 if ($genotype[
'tripal_fasta'][
'file_remote']) {
1280 $assembly_user = $genotype[
'tripal_fasta'][
'file_remote'];
1282 'project_id' => $project_id,
1284 'value' => $assembly_user,
1285 'rank' => $form_state[
'file_rank'],
1287 $form_state[
'file_rank']++;
1290 elseif ($genotype[
'ref-genome'] !=
'none') {
1292 'project_id' => $project_id,
1294 'value' => $genotype[
'ref-genome'],
1298 if (!empty($genotype[
'files'][
'file-type'][
'SNPs Genotype Assay'])) {
1299 $snp_fid = $genotype[
'files'][
'snps-assay'];
1302 $options[
'type'] =
'snp';
1304 $options[
'marker'] =
'SNP';
1307 if (!empty($genotype[
'files'][
'file-type'][
'SNPs Associations'])) {
1308 $assoc_fid = $genotype[
'files'][
'snps-association'];
1311 $options[
'records'][
'featureloc'] = array();
1312 $options[
'records'][
'featureprop'] = array();
1313 $options[
'records'][
'feature_relationship'] = array();
1314 $options[
'records'][
'feature_cvterm'] = array();
1315 $options[
'records'][
'feature_cvtermprop'] = array();
1317 $options[
'associations'] = array();
1318 $options[
'associations_tool'] = $genotype[
'files'][
'snps-association-tool'];
1319 $options[
'associations_groups'] = $genotype[
'files'][
'snps-association-groups'];
1321 $options[
'phenotype_meta'] = $form_state[
'data'][
'phenotype_meta'];
1322 $options[
'pub_id'] = $form_state[
'ids'][
'pub_id'];
1324 switch ($genotype[
'files'][
'snps-association-type']) {
1329 case 'Genomic Inflation Factor (GIF)':
1333 case 'P-adjusted (FDR) / Q value':
1337 case 'P-adjusted (FWE)':
1341 case 'P-adjusted (Bonferroni)':
1349 $job->logMessage(
'[INFO] - Processing snp_association file data...');
1352 $job->logMessage(
'[INFO] - Done.');
1354 $multi_insert_options[
'fk_overrides'][
'featureloc'] = array(
1355 'srcfeature' => array(
1356 'table' =>
'feature',
1358 'srcfeature_id' =>
'feature_id',
1362 $multi_insert_options[
'fk_overrides'][
'feature_relationship'] = array(
1364 'table' =>
'feature',
1366 'subject_id' =>
'feature_id',
1370 'table' =>
'feature',
1372 'object_id' =>
'feature_id',
1377 $pop_struct_fid = $genotype[
'files'][
'snps-pop-struct'];
1380 $kinship_fid = $genotype[
'files'][
'snps-kinship'];
1384 $job->logMessage(
'[INFO] - Processing genotype_spreadsheet file data...');
1387 $job->logMessage(
'[INFO] - Done.');
1389 tpps_job_logger_write(
'[INFO] - Inserting genotype_spreadsheet data into database using insert_multi...');
1390 $job->logMessage(
'[INFO] - Inserting genotype_spreadsheet data into database using insert_multi...');
1393 $job->logMessage(
'[INFO] - Done');
1394 $options[
'records'] = $records;
1395 $genotype_total += $genotype_count;
1397 $job->logMessage(
'[INFO] - Genotype count:' . $genotype_count);
1398 $genotype_count = 0;
1401 if (!empty($genotype[
'files'][
'file-type'][
'Assay Design']) and $genotype[
'marker-type'][
'SNPs']) {
1402 if ($genotype[
'files'][
'assay-load'] ==
'new') {
1403 $design_fid = $genotype[
'files'][
'assay-design'];
1405 if ($genotype[
'files'][
'assay-load'] !=
'new') {
1406 $design_fid = $genotype[
'files'][
'assay-load'];
1411 if (!empty($genotype[
'files'][
'file-type'][
'SSRs/cpSSRs Genotype Spreadsheet'])) {
1412 $ssr_fid = $genotype[
'files'][
'ssrs'];
1415 $options[
'type'] =
'ssrs';
1416 $options[
'headers'] =
tpps_ssrs_headers($ssr_fid, $genotype[
'files'][
'ploidy']);
1417 $options[
'marker'] = $genotype[
'SSRs/cpSSRs'];
1419 $options[
'empty'] = $genotype[
'files'][
'ssrs-empty'];
1421 $job->logMessage(
'[INFO] - Processing genotype_spreadsheet file data...');
1424 $job->logMessage(
'[INFO] - Done.');
1427 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1430 $job->logMessage(
'[INFO] - Done.');
1431 $options[
'records'] = $records;
1432 $genotype_count = 0;
1434 if (!empty($genotype[
'files'][
'ssr-extra-check'])) {
1435 $extra_fid = $genotype[
'files'][
'ssrs_extra'];
1438 $options[
'marker'] = $genotype[
'files'][
'extra-ssr-type'];
1439 $options[
'headers'] =
tpps_ssrs_headers($extra_fid, $genotype[
'files'][
'extra-ploidy']);
1441 $job->logMessage(
'[INFO] - Processing genotype_spreadsheet file data...');
1444 $job->logMessage(
'[INFO] - Done.');
1447 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1450 $job->logMessage(
'[INFO] - Done.');
1451 $options[
'records'] = $records;
1452 $genotype_count = 0;
1456 if (!empty($genotype[
'files'][
'file-type'][
'Indel Genotype Spreadsheet'])) {
1457 $indel_fid = $genotype[
'files'][
'indels'];
1460 $options[
'type'] =
'indel';
1462 $options[
'marker'] =
'Indel';
1465 $job->logMessage(
'[INFO] - Processing genotype_spreadsheet file data...');
1468 $job->logMessage(
'[INFO] - Done.');
1471 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1474 $job->logMessage(
'[INFO] - Done.');
1475 $options[
'records'] = $records;
1476 $genotype_total += $genotype_count;
1478 $job->logMessage(
'[INFO] - Genotype count:' . $genotype_total);
1479 $genotype_count = 0;
1482 if (!empty($genotype[
'files'][
'file-type'][
'Other Marker Genotype Spreadsheet'])) {
1483 $other_fid = $genotype[
'files'][
'other'];
1487 if (!empty($genotype[
'files'][
'other-groups'])) {
1488 $groups = $genotype[
'files'][
'other-groups'];
1490 $options[
'tree_id'] = $groups[
'Tree Id'][1];
1493 $options[
'type'] =
'other';
1494 $options[
'marker'] = $genotype[
'other-marker'];
1497 $job->logMessage(
'[INFO] - Processing genotype_spreadsheet file data...');
1500 $job->logMessage(
'[INFO] - Done.');
1503 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1506 $job->logMessage(
'[INFO] - Done.');
1507 $options[
'records'] = $records;
1508 $genotype_count = 0;
1512 $disable_vcf_import = 0;
1513 if(isset($firstpage[
'disable_vcf_import'])) {
1514 $disable_vcf_import = $firstpage[
'disable_vcf_import'];
1516 tpps_job_logger_write(
'[INFO] Disable VCF Import is set to ' . $disable_vcf_import .
' (0 means allow vcf import, 1 ignore vcf import)');
1519 if (!empty($genotype[
'files'][
'file-type'][
'VCF'])) {
1520 if($disable_vcf_import == 0) {
1522 $vcf_fid = $genotype[
'files'][
'vcf'];
1527 $records[
'genotypeprop'] = array();
1537 $vcf_file = file_load($vcf_fid);
1539 echo
"VCF location: $location\n";
1541 $vcf_content = gzopen($location,
'r');
1544 $current_id = $form_state[
'ids'][
'organism_ids'][$i];
1545 $species_code = $species_codes[$current_id];
1548 echo
"[INFO] Processing Genotype VCF file\n";
1549 $file_progress_line_count = 0;
1551 while (($vcf_line = gzgets($vcf_content)) !== FALSE) {
1552 $file_progress_line_count++;
1553 if($file_progress_line_count % 10000 == 0 && $file_progress_line_count != 0) {
1554 echo
'[INFO] [VCF PROCESSING STATUS] ' . $file_progress_line_count .
" lines done\n";
1556 if ($vcf_line[0] !=
'#' && stripos($vcf_line,
'.vcf') === FALSE && trim($vcf_line) !=
"" && str_replace(
"\0",
"", $vcf_line) !=
"") {
1557 $line_process_start_time = microtime(
true);
1558 $record_count = $record_count + 1;
1559 print_r(
'Record count:' . $record_count .
"\n");
1560 $genotype_count += count($stocks);
1561 $vcf_line = explode(
"\t", $vcf_line);
1562 $scaffold_id = &$vcf_line[0];
1563 $position = &$vcf_line[1];
1564 $variant_name = &$vcf_line[2];
1565 $ref = &$vcf_line[3];
1566 $alt = &$vcf_line[4];
1567 $qual = &$vcf_line[5];
1568 $filter = &$vcf_line[6];
1569 $info = &$vcf_line[7];
1571 if (empty($variant_name) or $variant_name ==
'.') {
1573 $variant_name = $scaffold_id .
'_' . $position .
'SNP';
1576 $marker_name = $scaffold_id .
'_' . $position;
1577 $description =
"$ref:$alt";
1581 $detected_genotypes = array();
1582 $first_genotypes = array();
1583 $count_columns = count($vcf_line);
1584 for ($j = 9; $j < $count_columns; $j++) {
1588 $detected_genotypes[$marker_name . $genotype_combination] = TRUE;
1593 $first_genotypes[$marker_name . $genotype_combination] = TRUE;
1599 $records[
'feature'][$marker_name] = array(
1600 'organism_id' => $current_id,
1601 'uniquename' => $marker_name,
1602 'type_id' => $seq_var_cvterm,
1606 $records[
'feature'][$variant_name] = array(
1607 'organism_id' => $current_id,
1608 'uniquename' => $variant_name,
1609 'type_id' => $seq_var_cvterm,
1616 $genotype_names = array_keys($detected_genotypes);
1620 echo
"line#$file_progress_line_count ";
1621 print_r(
'genotypes per line: ' . count($genotype_names) .
" ");
1623 $genotype_name_progress_count = 0;
1624 foreach ($genotype_names as $genotype_name) {
1625 $genotype_name_progress_count++;
1626 $genotype_desc =
"$marker-$species_code-$genotype_name-$position-$description";
1630 $records[
'genotype'][$genotype_desc] = array(
1631 'name' => $genotype_name,
1632 'uniquename' => $genotype_desc,
1633 'description' => $description,
1634 'type_id' => $snp_cvterm,
1637 if ($format !=
"") {
1638 $records[
'genotypeprop'][
"$genotype_desc-format"] = array(
1639 'type_id' => $format_cvterm,
1642 'genotype' => $genotype_desc,
1647 $vcf_cols_count = count($vcf_line);
1649 echo
"gen_name_index:$genotype_name_progress_count colcount:$vcf_cols_count ";
1650 for ($j = 9; $j < $vcf_cols_count; $j++) {
1655 if($column_genotype_name == $genotype_name) {
1659 $records[
'genotype_call'][
"{$stocks[$j - 9]}-$genotype_name"] = array(
1660 'project_id' => $project_id,
1661 'stock_id' => $stocks[$j - 9],
1663 'genotype' => $genotype_desc,
1664 'variant' => $variant_name,
1665 'marker' => $marker_name,
1669 $records[
'stock_genotype'][
"{$stocks[$j - 9]}-$genotype_name"] = array(
1670 'stock_id' => $stocks[$j - 9],
1672 'genotype' => $genotype_desc,
1680 $records[
'genotypeprop'][
"$genotype_desc-qual"] = array(
1681 'type_id' => $qual_cvterm,
1684 'genotype' => $genotype_desc,
1689 $records[
'genotypeprop'][
"$genotype_desc-filter"] = array(
1690 'type_id' => $filter_cvterm,
1691 'value' => ($filter ==
'.') ?
"P" :
"NP",
1693 'genotype' => $genotype_desc,
1698 $info_vals = explode(
";", $info);
1699 foreach ($info_vals as $key => $val) {
1700 $parts = explode(
"=", $val);
1701 unset($info_vals[$key]);
1702 $info_vals[$parts[0]] = isset($parts[1]) ? $parts[1] :
'';
1707 if (isset($info_vals[
'AF']) and $info_vals[
'AF'] !=
'') {
1708 $records[
'genotypeprop'][
"$genotype_desc-freq"] = array(
1709 'type_id' => $freq_cvterm,
1710 'value' => $info_vals[
'AF'],
1712 'genotype' => $genotype_desc,
1719 if (isset($info_vals[
'DP']) and $info_vals[
'DP'] !=
'') {
1720 $records[
'genotypeprop'][
"$genotype_desc-depth"] = array(
1721 'type_id' => $depth_cvterm,
1722 'value' => $info_vals[
'DP'],
1724 'genotype' => $genotype_desc,
1731 if (isset($info_vals[
'NS']) and $info_vals[
'NS'] !=
'') {
1732 $records[
'genotypeprop'][
"$genotype_desc-n_sample"] = array(
1733 'type_id' => $n_sample_cvterm,
1734 'value' => $info_vals[
'NS'],
1736 'genotype' => $genotype_desc,
1741 $line_process_end_time = microtime(
true);
1742 $line_process_elapsed_time = $line_process_end_time - $line_process_start_time;
1743 echo
" PHP Proctime: $line_process_elapsed_time seconds\n";
1744 if(!isset($line_process_cumulative_time)) {
1745 $line_process_cumulative_time = 0;
1747 $line_process_cumulative_time += $line_process_elapsed_time;
1748 echo
"Cumulative PHP proctime: " . $line_process_cumulative_time .
" seconds\n";
1749 echo
"\nGenotype call records to insert (LINE:$file_progress_line_count): " . count($records[
'genotype_call']);
1750 echo
"\nrecord group threshold: $record_group ";
1755 if ($genotype_count > $record_group) {
1756 tpps_job_logger_write(
'[INFO] - Last bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1757 $job->logMessage(
'[INFO] - Last bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1758 tpps_job_logger_write(
'[INFO] - Last bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1759 $job->logMessage(
'[INFO] - Last bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1760 tpps_job_logger_write(
'[INFO] - Last insert cumulative time: ' . $insert_cumulative_time .
' seconds');
1761 $job->logMessage(
'[INFO] - Last insert cumulative time: ' . $insert_cumulative_time .
' seconds');
1762 $genotype_count = 0;
1763 $insert_start_time = microtime(
true);
1765 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1768 $job->logMessage(
'[INFO] - Done.');
1769 $insert_end_time = microtime(
true);
1770 $insert_elapsed_time = $insert_end_time - $insert_start_time;
1771 tpps_job_logger_write(
'[INFO] - Bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1772 $job->logMessage(
'[INFO] - Bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1773 tpps_job_logger_write(
'[INFO] - Bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1774 $job->logMessage(
'[INFO] - Bulk insert of ' . $record_group .
' took ' . $insert_elapsed_time .
' seconds');
1775 if(!isset($insert_cumulative_time)) {
1776 $insert_cumulative_time = 0;
1778 $insert_cumulative_time += $insert_elapsed_time;
1780 $job->logMessage(
'[INFO] - Insert cumulative time: ' . $insert_cumulative_time .
' seconds');
1783 'feature' => array(),
1784 'genotype' => array(),
1785 'genotype_call' => array(),
1786 'genotypeprop' => array(),
1787 'stock_genotype' => array(),
1789 $genotype_count = 0;
1792 elseif (preg_match(
'/##FORMAT=/', $vcf_line)) {
1793 $format .= substr($vcf_line, 9, -1);
1795 elseif (preg_match(
'/#CHROM/', $vcf_line)) {
1796 $vcf_line = explode(
"\t", $vcf_line);
1797 for ($j = 9; $j < count($vcf_line); $j++) {
1798 $stocks[] = $form_state[
'tree_info'][trim($vcf_line[$j])][
'stock_id'];
1804 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
1807 $job->logMessage(
'[INFO] - Done.');
1809 $genotype_count = 0;
1823 if ($study_accession ==
"") {
1828 if ($vcf_location ==
"") {
1834 $path =
'public://';
1835 $public_path = drupal_realpath($path);
1837 echo(
'[PUBLIC PATH] ' . $public_path .
"\n");
1840 $module_path = DRUPAL_ROOT .
'/' . drupal_get_path(
'module',
'tpps');
1842 echo(
'[MODULE PATH] ' . $module_path .
"\n");
1845 $tools_path = $module_path .
"/tools";
1847 echo(
'[TOOLS PATH] ' . $tools_path .
"\n");
1850 $popstruct_temp_dir = $public_path .
'/popstruct_temp/' . $study_accession;
1851 mkdir($popstruct_temp_dir, 0755,
true);
1854 $files = glob($popstruct_temp_dir .
'/*');
1855 foreach($files as $file){
1856 if(is_file($file)) {
1858 echo(
"[CLEAN UP BEFORE BEGIN] Removing $file from the popstruct directory\n");
1860 echo(
"[FILE CLEAN/DELETE] $file\n");
1866 $flag_using_temp_file =
false;
1870 $vcf_location_temp = $vcf_location;
1871 if (stripos($vcf_location,
'.gz') !== FALSE) {
1875 $flag_using_temp_file =
true;
1878 $file_name_without_ext = basename($vcf_location,
".gz");
1881 shell_exec(
"gunzip -c " . $vcf_location .
" > " . $popstruct_temp_dir .
"/" . $file_name_without_ext);
1884 $vcf_location_temp = $popstruct_temp_dir .
"/" . $file_name_without_ext;
1888 echo(
"[VCF_LOCATION_TEMP] $vcf_location_temp");
1896 echo(
"PERFORM PLINK");
1897 echo shell_exec($tools_path .
'/plink/plink --vcf ' . $vcf_location_temp .
" --allow-extra-chr --double-id --make-bed --out " . $popstruct_temp_dir .
'/' . $study_accession.
'_popstruct_plink');
1908 for($i=1; $i <= 10; $i++) {
1910 echo(
"Performing FastStructure for k = $i\n");
1911 $fast_structure_cmd =
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib; export CFLAGS="-I/usr/local/include"; export LDFLAGS="-L/usr/local/lib"; python ' . $tools_path .
"/fastStructure/structure.py -K " . $i .
" --input=" . $popstruct_temp_dir .
'/' . $study_accession.
'_popstruct_plink' .
" --output=" . $popstruct_temp_dir .
'/' . $study_accession.
'_popstruct_plink' .
' --full;';
1912 echo shell_exec($fast_structure_cmd);
1919 echo(
"[INFO] Perform chooseK...\n");
1920 $chooseK_cmd =
'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib; export CFLAGS="-I/usr/local/include"; export LDFLAGS="-L/usr/local/lib"; python ' . $tools_path .
'/fastStructure/chooseK.py --input=' . $popstruct_temp_dir .
'/' . $study_accession.
'_popstruct_plink';
1921 $chooseK_output = shell_exec($chooseK_cmd);
1922 echo $chooseK_output .
"\n";
1927 $chooseK_lines = explode(
"\n", $chooseK_output);
1928 $chooseK_lines_count = count($chooseK_lines);
1929 $chooseK_optimal = 0;
1930 for ($i=0; $i<$chooseK_lines_count; $i++) {
1931 $line = $chooseK_lines[$i];
1933 $chooseK_parts = explode(
'Model complexity that maximizes marginal likelihood = ', $line);
1936 $chooseK_parts = explode(
'Model components used to explain structure in data = ', $line);
1940 if($chooseK_parts[1] > $chooseK_optimal) {
1941 $chooseK_optimal = $chooseK_parts[1];
1945 echo(
"Optimal K is " . $chooseK_optimal .
"\n");
1952 echo(
"AWK AND SED adjustments");
1953 $cmd_custom_cmds1 =
"awk 'BEGIN { OFS = \"_\" } ;{print $1,$2}' " . $popstruct_temp_dir .
'/' . $study_accession .
'_popstruct_plink.fam > ' . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDPanel.txt;";
1954 $cmd_custom_cmds1 .=
"sed 's/_/\t/g' " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDPanel.txt > " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDPaneltab.txt;";
1955 $cmd_custom_cmds1 .=
"awk '{print $1,$2}' " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDPaneltab.txt > " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDfamPanel.txt;";
1956 echo shell_exec($cmd_custom_cmds1);
1959 $count_output = shell_exec(
"wc -l " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDPanel.txt");
1961 echo($count_output .
"\n");
1962 $count_output_parts = explode(
' ', $count_output);
1963 $population_count = $count_output_parts[0];
1965 echo(
"Population count:" . $population_count .
"\n");
1970 echo(
"RScript popstruct_from_panel execution\n");
1971 $cmd_custom_r_code =
"Rscript " . $tools_path .
"/popstruct_from_panel.R ";
1972 $cmd_custom_r_code .= $study_accession .
" ";
1973 $cmd_custom_r_code .= $population_count .
" ";
1974 $cmd_custom_r_code .= $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_plink." . $chooseK_optimal.
".meanQ ";
1975 $cmd_custom_r_code .= $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_IDfamPanel.txt ";
1976 $cmd_custom_r_code .= $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_PopPanel.txt";
1978 echo shell_exec($cmd_custom_r_code);
1982 $cmd_remove_column_code =
"cut -d\\ -f2- " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_PopPanel.txt > " . $popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_PopPanel_final.txt";
1983 echo shell_exec($cmd_remove_column_code);
1990 $file_handle = fopen($popstruct_temp_dir .
'/' . $study_accession .
"_popstruct_PopPanel_final.txt",
"r");
1993 while (($line = fgets($file_handle)) !==
false) {
1995 $line_space_parts = explode(
" ", $line);
1996 $tree_id = $study_accession .
'-' . $line_space_parts[0];
1998 'tree_id' => $tree_id,
2002 'study_accession' => $study_accession
2004 if(count($line_space_parts) >= 4) {
2005 $population_group = $line_space_parts[3];
2006 if (strpos($population_group,
'e') !== FALSE) {
2007 $population_group = 1;
2010 $population_group = intval(ceil($population_group)) + 1;
2012 echo $population_group .
',';
2013 $tree_info[
'population'] = $population_group;
2014 $tree_data[$tree_id] = $tree_info;
2018 fclose($file_handle);
2022 echo(
"Removing all popstruct data for accession $study_accession\n");
2023 chado_query(
"DELETE FROM public.cartogratree_popstruct_layer WHERE study_accession = '" . $study_accession .
"';");
2027 $sql_locations =
'SELECT * FROM public.ct_trees WHERE uniquename IN (';
2028 $sql_tree_ids_list =
'';
2030 $sql_tree_ids_list =
'';
2031 foreach($tree_data as $tree_info) {
2032 if($tree_id_count != 0) {
2033 $sql_tree_ids_list .=
',';
2035 $sql_tree_ids_list .=
"'" . $tree_info[
'tree_id'] .
"'";
2036 $tree_id_count = $tree_id_count + 1;
2038 $sql_locations .= $sql_tree_ids_list;
2040 $sql_locations .=
')';
2041 $results = chado_query($sql_locations);
2042 foreach($results as $row) {
2043 $tree_id = $row->uniquename;
2045 $tree_data[$tree_id][
'latitude'] = $row->latitude;
2046 $tree_data[$tree_id][
'longitude'] = $row->longitude;
2047 $insert_sql =
"INSERT INTO public.cartogratree_popstruct_layer (uniquename,population,study_accession,latitude,longitude) ";
2048 $insert_sql .=
"VALUES (";
2049 $insert_sql .=
"'" . $tree_id .
"',". $tree_data[$tree_id][
'population'] .
",";
2050 $insert_sql .=
"'" . $study_accession .
"',". $tree_data[$tree_id][
'latitude'] .
"," . $tree_data[$tree_id][
'latitude'] .
"";
2053 chado_query($insert_sql);
2057 echo(
"POPSTRUCT completed.\n");
2075 $raw_value_colon_parts = explode(
':',$raw_value);
2076 $ref_alt_indices = explode(
'/', $raw_value_colon_parts[0]);
2077 $genotype_combination =
"";
2078 $count_indices = count($ref_alt_indices);
2079 for($k = 0; $k < $count_indices; $k++) {
2080 $index_tmp = $ref_alt_indices[$k];
2082 $genotype_combination .=
':';
2084 if($index_tmp == 0) {
2085 $genotype_combination .= $ref;
2088 $genotype_combination .= $alt;
2091 return $genotype_combination;
2107 $job->logMessage(
'[INFO] - Submitting environment data...');
2108 $fourthpage = $form_state[
'saved_values'][
TPPS_PAGE_4];
2109 $environment = $fourthpage[
"organism-$i"][
'environment'] ?? NULL;
2110 if (empty($environment)) {
2115 $env_layers = isset($environment[
'env_layers']) ? $environment[
'env_layers'] : FALSE;
2116 $env_params = isset($environment[
'env_params']) ? $environment[
'env_params'] : FALSE;
2119 $species_index =
"species-$i";
2120 if (empty($form_state[
'saved_values'][
TPPS_PAGE_3][
'tree-accession'][
'check'])) {
2121 $species_index =
"species-1";
2123 $tree_accession = $form_state[
'saved_values'][
TPPS_PAGE_3][
'tree-accession'][$species_index];
2124 $tree_acc_fid = $tree_accession[
'file'];
2125 if (!empty($form_state[
'revised_files'][$tree_acc_fid]) and (file_load($form_state[
'revised_files'][$tree_acc_fid]))) {
2126 $tree_acc_fid = $form_state[
'revised_files'][$tree_acc_fid];
2131 if (db_table_exists(
'cartogratree_layers') and db_table_exists(
'cartogratree_fields')) {
2132 $layers_params = array();
2134 'phenotype' => array(),
2135 'phenotype_cvterm' => array(),
2136 'stock_phenotype' => array(),
2139 foreach ($env_layers as $layer_name => $layer_id) {
2140 if ($layer_name ==
'other' or $layer_name ==
'other_db' or $layer_name ==
'other_name' or $layer_name ==
'other_params') {
2143 if (!empty($layer_id) and !empty($env_params[$layer_name])) {
2144 $layers_params[$layer_id] = array();
2145 $params = $env_params[$layer_name];
2146 foreach ($params as $param_name => $param_id) {
2147 if (!empty($param_id)) {
2148 $layers_params[$layer_id][$param_id] = $param_name;
2152 elseif (!empty($layer_id) and preg_match(
'/worldclim_subgroup_(.+)/', $layer_id, $matches)) {
2153 $subgroup_id = $matches[1];
2154 $layers = db_select(
'cartogratree_layers',
'l')
2155 ->fields(
'l', array(
'layer_id'))
2156 ->condition(
'subgroup_id', $subgroup_id)
2158 while (($layer = $layers->fetchObject())) {
2159 $params = db_select(
'cartogratree_fields',
'f')
2160 ->fields(
'f', array(
'field_id',
'display_name'))
2161 ->condition(
'layer_id', $layer->layer_id)
2163 while (($param = $params->fetchObject())) {
2164 $layers_params[$layer->layer_id][$param->field_id] = $param->display_name;
2171 'no_header' => !empty($tree_accession[
'file-no-header']),
2172 'records' => $records,
2173 'tree_id' => $tree_accession[
'file-groups'][
'Tree Id'][1],
2174 'accession' => $form_state[
'accession'],
2175 'tree_info' => $form_state[
'tree_info'],
2176 'layers_params' => $layers_params,
2177 'env_count' => &$env_count,
2178 'env_cvterm' => $env_cvterm,
2183 $job->logMessage(
'[INFO] - Processing environmental_layers file data...');
2186 $job->logMessage(
'[INFO] - Done.');
2189 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
2192 $job->logMessage(
'[INFO] - Done.');
2193 unset($options[
'records']);
2207 $cols = $options[
'cols'];
2208 $search = &$options[
'search'];
2209 $org_full_name = $row[$cols[
'org']] ??
"{$row[$cols['genus']]} {$row[$cols['species']]}";
2210 if ($search == $org_full_name) {
2211 $options[
'found'] = TRUE;
2226 $columns = $options[
'meta_columns'];
2227 $meta = &$options[
'meta'];
2229 $name = strtolower($row[$columns[
'name']]);
2230 $meta[$name] = array();
2231 $meta[$name][
'attr'] =
'other';
2232 $meta[$name][
'attr-other'] = $row[$columns[
'attr']];
2233 $meta[$name][
'desc'] = $row[$columns[
'desc']];
2234 $meta[$name][
'unit'] =
'other';
2235 $meta[$name][
'unit-other'] = $row[$columns[
'unit']];
2236 if (!empty($columns[
'struct']) and isset($row[$columns[
'struct']]) and $row[$columns[
'struct']] !=
'') {
2237 $meta[$name][
'struct'] =
'other';
2238 $meta[$name][
'struct-other'] = $row[$columns[
'struct']];
2240 if (!empty($columns[
'min']) and isset($row[$columns[
'min']]) and $row[$columns[
'min']] !=
'') {
2241 $meta[$name][
'min'] = $row[$columns[
'min']];
2243 if (!empty($columns[
'max']) and isset($row[$columns[
'max']]) and $row[$columns[
'max']] !=
'') {
2244 $meta[$name][
'max'] = $row[$columns[
'max']];
2261 $cvt_cache = array();
2262 $local_cv = chado_get_cv(array(
'name' =>
'local'));
2263 $local_db = variable_get(
'tpps_local_db');
2264 $term_types = array(
2266 'label' =>
'Attribute',
2267 'ontology' =>
'pato',
2274 'label' =>
'Structure',
2279 foreach ($meta as $name => $data) {
2280 foreach ($term_types as $type => $info) {
2281 $meta[$name][
"{$type}_id"] = $data[
"{$type}"];
2282 if ($data[
"{$type}"] ==
'other') {
2283 $meta[$name][
"{$type}_id"] = $cvt_cache[$data[
"{$type}-other"]] ?? NULL;
2284 if (empty($meta[$name][
"{$type}_id"])) {
2286 if ($result !== FALSE) {
2287 $meta[$name][
"{$type}_id"] = $result->cvterm_id;
2288 $job->logMessage(
"[INFO] New OLS Term {$info['ontology']}:{$data["{$type}-other
"]} installed");
2291 if (empty($meta[$name][
"{$type}_id"])) {
2292 $term = chado_select_record(
'cvterm', array(
'cvterm_id'), array(
2294 'data' => $data[
"{$type}-other"],
2300 $meta[$name][
"{$type}_id"] = current($term)->cvterm_id ?? NULL;
2303 if (empty($meta[$name][
"{$type}_id"])) {
2304 $meta[$name][
"{$type}_id"] = chado_insert_cvterm(array(
2305 'id' =>
"{$local_db->name}:{$data["{$type}-other
"]}",
2307 'name' => $data[
"{$type}"] .
'-other',
2309 'cv_name' => $local_cv->name,
2311 if (!empty($meta[$name][
"{$type}_id"])) {
2312 $job->logMessage(
"[INFO] New Local {$info['label']} Term {$data["{$type}-other
"]} installed");
2315 $cvt_cache[$data[
"{$type}-other"]] = $meta[$name][
"{$type}_id"];
2320 if (!empty($time_options[
'time_phenotypes'][strtolower($name)])) {
2321 $meta[$name][
'time'] = $time_options[
'time_values'][strtolower($name)];
2322 if (empty($meta[$name][
'time'])) {
2323 $meta[$name][
'time'] = TRUE;
2345 $iso = $options[
'iso'] ?? FALSE;
2346 $records = &$options[
'records'];
2347 $meta_headers = $options[
'meta_headers'] ?? NULL;
2348 $file_headers = $options[
'file_headers'] ?? NULL;
2349 $cvterms = $options[
'cvterms'];
2350 $meta = $options[
'meta'];
2351 $empty = $options[
'file-empty'] ?? NULL;
2352 $accession = $options[
'accession'];
2353 $suffix = &$options[
'suffix'];
2354 $tree_info = &$options[
'tree_info'];
2355 $phenotype_count = &$options[
'phenotype_count'];
2356 $organism_name = &$options[
'organism_name'];
2357 $record_group = variable_get(
'tpps_record_group', 10000);
2361 $organism_name_parts = explode(
' ', $organism_name, 2);
2362 $genus = $organism_name_parts[0];
2363 $species = $organism_name_parts[1];
2366 if ($genus ==
"" || $species ==
"") {
2367 throw new Exception(
'Organism genus and species could not be processed. Please ensure you added an organism that exists within the chado.organism table!');
2371 $organism_id_results = chado_query(
'SELECT * FROM chado.organism WHERE genus = :genus and species = :species ORDER BY organism_id ASC LIMIT 1', array(
2373 ':species' => $species
2378 foreach($organism_id_results as $organism_id_row) {
2379 $organism_id = $organism_id_row->organism_id;
2383 if($organism_id == -1 || $organism_id ==
"") {
2384 throw new Exception(
'Could not find organism id for ' . $organism_name.
'. This organism does not seem to exist in the chado.organism table!');
2387 $cvterm_id_4lettercode = -1;
2389 $cvterm_results = chado_query(
'SELECT * FROM chado.cvterm WHERE name = :name LIMIT 1', array(
2390 ':name' =>
'organism 4 letter code' 2392 foreach($cvterm_results as $cvterm_row) {
2393 $cvterm_id_4lettercode = $cvterm_row->cvterm_id;
2395 if($cvterm_id_4lettercode == -1 || $cvterm_id_4lettercode ==
"") {
2396 throw new Exception(
'Could not find the cvterm id for organism 4 letter code within the chado.cvterm table. This is needed to generate the phenotype name.');
2400 $value_4lettercode =
"";
2401 $organismprop_results = chado_query(
'SELECT * FROM chado.organismprop WHERE type_id = :type_id AND organism_id = :organism_id LIMIT 1', array(
2402 ':type_id' => $cvterm_id_4lettercode,
2403 ':organism_id' => $organism_id
2405 foreach ($organismprop_results as $organismprop_row) {
2406 $value_4lettercode = $organismprop_row->value;
2409 if($value_4lettercode ==
"" || $value_4lettercode == null) {
2410 throw new Exception(
'4 letter code could not be found for ' . $organism_name .
' in the chado.organismprop table. This is needed to create the phenotype_name.');
2414 if (isset($meta_headers[
'name']) and (isset($meta_headers[
'value']))) {
2415 $id = $row[$meta_headers[
'value']];
2416 $values = array($id => $row[$meta_headers[
'name']]);
2419 if (!empty($options[
'data_columns'])) {
2420 $values = $options[
'data_columns'];
2423 $tree_id = $row[$options[
'tree_id']];
2424 $clone_col = $meta_headers[
'clone'] ?? NULL;
2425 if (isset($clone_col) and !empty($row[$clone_col]) and $row[$clone_col] !== $empty) {
2426 $tree_id .=
"-" . $row[$clone_col];
2430 foreach ($row as $id => $value) {
2431 if (empty($tree_id)) {
2435 $values[$id] = $file_headers[$id];
2439 if($tree_id == null || $tree_id ==
"") {
2440 throw new Exception(
'tree_id was null or empty - there might be a problem with the format of the phenotype data file or selected column options for the file via the user information, cannot continue until resolved.');
2446 $phenotype_name_previous =
"<none set>";
2447 foreach ($values as $id => $name) {
2448 if($name == null || $name ==
"") {
2449 throw new Exception(
'Phenotype name was null or empty - there might be a problem with the format of the phenotype data file or selected column options for the file via the user information, cannot continue until resolved.');
2451 $attr_id = $iso ? $meta[
'attr_id'] : $meta[strtolower($name)][
'attr_id'];
2453 if($attr_id == null || $attr_id ==
"") {
2454 print_r(
'$meta[attr_id]:' . $meta[
'attr_id'] .
"\n");
2455 print_r(
'$name:' . $name .
"\n");
2456 print_r(
'$meta[$name]:' . $meta[strtolower($name)][
'attr_id'] .
"\n");
2457 print_r(
'$attr_id:' . $attr_id .
"\n");
2458 throw new Exception(
'Attribute id is null which causes phenotype data to not be added to database correctly.');
2461 $phenotype_name =
"$accession-$tree_id-$name-$suffix";
2462 $phenotype_name .=
'-' . $value_4lettercode;
2463 $options[
'data'][
"$tree_id-$name-$suffix"] = array(
2464 'uniquename' =>
"$tree_id-$name-$suffix",
2466 'stock_id' => $tree_info[$tree_id][
'stock_id'],
2472 $records[
'phenotype'][$phenotype_name] = array(
2473 'uniquename' => $phenotype_name,
2475 'attr_id' => $attr_id,
2476 'observable_id' => $meta[strtolower($name)][
'struct_id'] ?? NULL,
2481 $records[
'stock_phenotype'][$phenotype_name] = array(
2482 'stock_id' => $tree_info[$tree_id][
'stock_id'],
2484 'phenotype' => $phenotype_name,
2489 if (isset($meta[strtolower($name)][
'time'])) {
2490 $records[
'phenotypeprop'][
"$phenotype_name-time"] = array(
2491 'type_id' => $cvterms[
'time'],
2492 'value' => $meta[strtolower($name)][
'time'],
2494 'phenotype' => $phenotype_name,
2498 $options[
'data'][$phenotype_name][
'time'] = $meta[strtolower($name)][
'time'];
2500 elseif (isset($meta_headers[
'time'])) {
2501 $val = $row[$meta_headers[
'time']];
2505 $records[
'phenotypeprop'][
"$phenotype_name-time"] = array(
2506 'type_id' => $cvterms[
'time'],
2509 'phenotype' => $phenotype_name,
2513 $options[
'data'][$phenotype_name][
'time'] = $val;
2517 $records[
'phenotypeprop'][
"$phenotype_name-desc"] = array(
2518 'type_id' => $cvterms[
'desc'],
2519 'value' => $iso ? $meta[
'desc'] : $meta[strtolower($name)][
'desc'],
2521 'phenotype' => $phenotype_name,
2528 $records[
'phenotypeprop'][
"$phenotype_name-unit"] = array(
2529 'type_id' => $cvterms[
'unit'],
2530 'value' => $meta[
'unit'],
2532 'phenotype' => $phenotype_name,
2539 $records[
'phenotype_cvterm'][
"$phenotype_name-unit"] = array(
2540 'cvterm_id' => $meta[strtolower($name)][
'unit_id'],
2542 'phenotype' => $phenotype_name,
2548 if (isset($meta[strtolower($name)][
'min'])) {
2549 $records[
'phenotypeprop'][
"$phenotype_name-min"] = array(
2550 'type_id' => $cvterms[
'min'],
2551 'value' => $meta[strtolower($name)][
'min'],
2553 'phenotype' => $phenotype_name,
2559 if (isset($meta[strtolower($name)][
'max'])) {
2560 $records[
'phenotypeprop'][
"$phenotype_name-max"] = array(
2561 'type_id' => $cvterms[
'max'],
2562 'value' => $meta[strtolower($name)][
'max'],
2564 'phenotype' => $phenotype_name,
2570 if (!empty($meta[strtolower($name)][
'env'])) {
2571 $records[
'phenotype_cvterm'][
"$phenotype_name-env"] = array(
2572 'cvterm_id' => $cvterms[
'environment'],
2574 'phenotype' => $phenotype_name,
2582 if ($phenotype_count > $record_group) {
2586 $job->logMessage(
'[INFO] -- Inserting data into database using insert_multi...');
2590 $job->logMessage(
'[INFO] - Done.');
2600 'phenotype' => array(),
2601 'phenotypeprop' => array(),
2602 'stock_phenotype' => array(),
2604 $phenotype_count = 0;
2628 $type = $options[
'type'];
2629 $records = &$options[
'records'];
2630 $headers = $options[
'headers'];
2631 $tree_info = &$options[
'tree_info'];
2632 $species_codes = $options[
'species_codes'];
2633 $genotype_count = &$options[
'genotype_count'];
2634 $project_id = $options[
'project_id'];
2635 $marker = $options[
'marker'];
2636 $type_cvterm = $options[
'type_cvterm'];
2637 $seq_var_cvterm = $options[
'seq_var_cvterm'];
2638 $multi_insert_options = $options[
'multi_insert'];
2639 $associations = $options[
'associations'] ?? array();
2641 $record_group = variable_get(
'tpps_record_group', 10000);
2644 if (!empty($options[
'tree_id'])) {
2645 $val = $row[$options[
'tree_id']];
2646 $stock_id = $tree_info[trim($val)][
'stock_id'];
2647 $current_id = $tree_info[trim($val)][
'organism_id'];
2648 $species_code = $species_codes[$current_id];
2650 foreach ($row as $key => $val) {
2651 if (empty($headers[$key])) {
2655 if (!isset($stock_id)) {
2656 $stock_id = $tree_info[trim($val)][
'stock_id'];
2657 $current_id = $tree_info[trim($val)][
'organism_id'];
2658 $species_code = $species_codes[$current_id];
2663 if ($type ==
'ssrs' and !empty($options[
'empty']) and $val == $options[
'empty']) {
2667 if ($type ==
'ssrs' and ($val === 0 or $val ===
"0")) {
2671 $variant_name = $headers[$key];
2672 $marker_name = $variant_name . $marker;
2673 $genotype_name =
"$marker-$variant_name-$species_code-$val";
2675 $records[
'feature'][$marker_name] = array(
2676 'organism_id' => $current_id,
2677 'uniquename' => $marker_name,
2678 'type_id' => $seq_var_cvterm,
2681 $records[
'feature'][$variant_name] = array(
2682 'organism_id' => $current_id,
2683 'uniquename' => $variant_name,
2684 'type_id' => $seq_var_cvterm,
2687 if (!empty($associations) and !empty($associations[$variant_name])) {
2688 $association = $associations[$variant_name];
2689 $assoc_feature_name =
"{$variant_name}-{$options['associations_type']}-{$association['trait']}";
2691 $records[
'feature'][$association[
'scaffold']] = array(
2692 'organism_id' => $current_id,
2693 'uniquename' => $association[
'scaffold'],
2694 'type_id' => $options[
'scaffold_cvterm'],
2697 $records[
'feature'][$assoc_feature_name] = array(
2698 'organism_id' => $current_id,
2699 'uniquename' => $assoc_feature_name,
2700 'type_id' => $seq_var_cvterm,
2703 if (!empty($association[
'trait_attr'])) {
2704 $records[
'feature_cvterm'][$assoc_feature_name] = array(
2705 'cvterm_id' => $association[
'trait_attr'],
2706 'pub_id' => $options[
'pub_id'],
2708 'feature' => $assoc_feature_name,
2712 if (!empty($association[
'trait_obs'])) {
2713 $records[
'feature_cvtermprop'][$assoc_feature_name] = array(
2714 'type_id' => $association[
'trait_obs'],
2716 'feature_cvterm' => $assoc_feature_name,
2722 $records[
'featureprop'][$assoc_feature_name] = array(
2723 'type_id' => $options[
'associations_type'],
2725 'feature' => $assoc_feature_name,
2729 $records[
'featureloc'][$variant_name] = array(
2730 'fmin' => $association[
'start'],
2731 'fmax' => $association[
'stop'],
2732 'residue_info' => $association[
'allele'],
2734 'feature' => $variant_name,
2735 'srcfeature' => $association[
'scaffold'],
2739 $records[
'feature_relationship'][$assoc_feature_name] = array(
2740 'type_id' => $options[
'associations_type'],
2741 'value' => $association[
'confidence'],
2743 'subject' => $variant_name,
2744 'object' => $assoc_feature_name,
2749 $records[
'genotype'][$genotype_name] = array(
2750 'name' => $genotype_name,
2751 'uniquename' => $genotype_name,
2752 'description' => $val,
2753 'type_id' => $type_cvterm,
2756 $records[
'genotype_call'][
"$stock_id-$genotype_name"] = array(
2757 'project_id' => $project_id,
2758 'stock_id' => $stock_id,
2760 'genotype' => $genotype_name,
2761 'variant' => $variant_name,
2762 'marker' => $marker_name,
2766 $records[
'stock_genotype'][
"$stock_id-$genotype_name"] = array(
2767 'stock_id' => $stock_id,
2769 'genotype' => $genotype_name,
2773 if ($genotype_count >= $record_group) {
2775 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
2778 $job->logMessage(
'[INFO] - Done.');
2780 'feature' => array(),
2781 'genotype' => array(),
2782 'genotype_call' => array(),
2783 'stock_genotype' => array(),
2785 if (!empty($associations)) {
2786 $records[
'featureloc'] = array();
2787 $records[
'featureprop'] = array();
2789 $options[
'genotype_total'] += $genotype_count;
2791 $job->logMessage(
'[INFO] - Genotypes inserted:' + $options[
'genotype_total']);
2792 $genotype_count = 0;
2811 $groups = $options[
'associations_groups'];
2812 $associations = &$options[
'associations'];
2814 $id = $row[$groups[
'SNP ID'][1]];
2816 preg_match(
'/^(\d+):(\d+)$/', $row[$groups[
'Position'][3]], $matches);
2817 $start = $matches[1];
2818 $stop = $matches[2];
2819 if ($start > $stop) {
2825 $trait = $row[$groups[
'Associated Trait'][5]];
2827 $associations[$id] = array(
2829 'scaffold' => $row[$groups[
'Scaffold'][2]],
2832 'allele' => $row[$groups[
'Allele'][4]],
2834 'trait_attr' => $options[
'phenotype_meta'][strtolower($trait)][
'attr_id'],
2835 'trait_obs' => $options[
'phenotype_meta'][strtolower($trait)][
'struct_id'] ?? NULL,
2836 'confidence' => $row[$groups[
'Confidence Value'][6]],
2858 if ($ploidy ==
'Haploid') {
2861 $row_len = count($headers);
2862 $results = $headers;
2864 while (($k = array_search(NULL, $results))) {
2865 unset($results[$k]);
2871 $num_headers = count($results);
2872 $num_unique_headers = count(array_unique($results));
2874 foreach (array_keys($headers) as $key) {
2876 $next_key = key($headers);
2884 if ($num_headers == ($row_len + 1) / 2) {
2886 if (array_key_exists($key, $results)) {
2887 $last = $results[$key];
2888 $results[$key] .=
"_A";
2891 $results[$key] = $last .
"_B";
2895 if ($num_headers == $row_len) {
2897 if ($num_headers != $num_unique_headers) {
2900 if ($results[$key] == $results[$next_key]) {
2901 $results[$key] .=
"_A";
2904 $results[$key] .=
"_B";
2910 if ($num_headers == $row_len) {
2912 if ($num_unique_headers != $num_headers) {
2917 $ploidy_suffix = ($marker_num % ($num_headers - 1 / $num_unique_headers - 1)) + 1;
2918 $results[$key] .=
"_$ploidy_suffix";
2923 $ploidy_suffix = ($marker_num % ($row_len - 1 / $num_headers - 1)) + 1;
2924 if (array_key_exists($key, $results)) {
2925 $last = $results[$key];
2926 $results[$key] .=
"_$ploidy_suffix";
2930 $results[$key] =
"{$last}_$ploidy_suffix";
2960 foreach ($cols as $col) {
2961 $results[$col] = $headers[$col];
2981 $id_col = $options[
'tree_id'];
2982 $records = &$options[
'records'];
2983 $tree_info = &$options[
'tree_info'];
2984 $layers_params = $options[
'layers_params'];
2985 $env_count = &$options[
'env_count'];
2986 $accession = $options[
'accession'];
2987 $suffix = &$options[
'suffix'];
2988 $env_cvterm = $options[
'env_cvterm'];
2989 $record_group = variable_get(
'tpps_record_group', 10000);
2991 $tree_id = $row[$id_col];
2992 $stock_id = $tree_info[$tree_id][
'stock_id'];
2994 $gps_query = chado_select_record(
'stockprop', array(
'value'), array(
2995 'stock_id' => $stock_id,
3000 $lat = current($gps_query)->value;
3002 $gps_query = chado_select_record(
'stockprop', array(
'value'), array(
3003 'stock_id' => $stock_id,
3008 $long = current($gps_query)->value;
3010 foreach ($layers_params as $layer_id => $params) {
3011 $layer_query = db_select(
'cartogratree_layers',
'l')
3012 ->fields(
'l', array(
'title'))
3013 ->condition(
'layer_id', $layer_id)
3016 $layer_name = $layer_query->fetchObject()->title;
3018 foreach (array_keys($params) as $param_id) {
3019 $param_query = db_select(
'cartogratree_fields',
'f')
3020 ->fields(
'f', array(
'field_name'))
3021 ->condition(
'field_id', $param_id)
3024 $param_name = $param_query->fetchObject()->field_name;
3025 $phenotype_name =
"$accession-$tree_id-$layer_name-$param_name-$suffix";
3028 $type = variable_get(
"tpps_param_{$param_id}_type",
'attr_id');
3030 $records[
'phenotype'][$phenotype_name] = array(
3031 'uniquename' => $phenotype_name,
3032 'name' =>
"$param_name",
3033 'value' =>
"$value",
3036 $records[
'stock_phenotype'][$phenotype_name] = array(
3037 'stock_id' => $stock_id,
3039 'phenotype' => $phenotype_name,
3043 if ($type ==
'attr_id') {
3044 $records[
'phenotype'][$phenotype_name][
'attr_id'] = $env_cvterm;
3046 if ($type !=
'attr_id') {
3047 $records[
'phenotype_cvterm'][$phenotype_name] = array(
3048 'cvterm_id' => $env_cvterm,
3050 'phenotype' => $phenotype_name,
3056 if ($env_count >= $record_group) {
3058 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
3061 $job->logMessage(
'[INFO] - Done.');
3063 'phenotype' => array(),
3064 'phenotype_cvterm' => array(),
3065 'stock_phenotype' => array(),
3096 $response = explode(
"\n", $response);
3098 $response = array_slice($response, 2, -2);
3099 foreach ($response as $line) {
3100 $item = explode(
"=", $line);
3101 if ($item and trim($item[0]) == $param) {
3102 return trim($item[1]);
3123 if (db_table_exists(
'cartogratree_layers')) {
3124 $query = db_select(
'cartogratree_layers',
'l')
3125 ->fields(
'l', array(
'name'))
3126 ->condition(
'layer_id', $layer_id)
3129 $result = $query->fetchObject();
3130 $layers = $result->name;
3132 $url =
"http://treegenesdev.cam.uchc.edu:8080/geoserver/ct/wms?";
3135 $req =
"GetFeatureInfo";
3137 $format =
"application/json";
3138 $bigger_lat = $lat + 0.0000001;
3139 $bigger_long = $long + 0.0000001;
3140 $bbox =
"$lat,$long,$bigger_lat,$bigger_long";
3141 $pixels =
"width=1&height=1&X=0&Y=0";
3143 $url .=
"service=$serv&version=$ver&request=$req&layers=$layers&srs=$srs&format=$format&query_layers=$layers&bbox=$bbox&$pixels";
3145 return file_get_contents($url);
3156 $analysis_options = array(
3157 'diversity' =>
'Diversity',
3158 'population_structure' =>
'Population Structure',
3159 'association_genetics' =>
'Association Genetics',
3160 'landscape_genomics' =>
'Landscape Genomics',
3161 'phenotype_environment' =>
'Phenotype-Environment',
3164 foreach ($analysis_options as $option => $label) {
3165 if (!empty($form_state[
'saved_values'][
'summarypage'][
'analysis'][
"{$option}_check"])) {
3167 'project_id' => $form_state[
'ids'][
'project_id'],
3172 $fid = $form_state[
'saved_values'][
'summarypage'][
'analysis'][
"{$option}_file"];
3177 'project_id' => $form_state[
'ids'][
'project_id'],
3179 'value' => $form_state[
'saved_values'][
'summarypage'][
'analysis'][
"{$option}_file_description"],
3185 if (!empty($form_state[
'saved_values'][
'summarypage'][
'tree_pictures'])) {
3186 foreach ($form_state[
'saved_values'][
'summarypage'][
'tree_pictures'] as $name => $fid) {
3187 if (substr($name, -4) ==
'_url' or substr($name, -12) ==
'_attribution' or substr($name, -8) ==
'_license') {
3191 $form_state[
'file_info'][
'summarypage'][$fid] = implode(
'_', explode(
' ', $name)) .
'.jpg';
3192 if (db_table_exists(
'treepictures_metadata')) {
3193 db_insert(
'treepictures_metadata')
3194 ->fields(array(
'species',
'source',
'attribution',
'license'))
3196 'species' => $form_state[
'file_info'][
'summarypage'][$fid],
3197 'source' => $form_state[
'saved_values'][
'summarypage'][
'tree_pictures'][
"{$name}_url"],
3198 'attribution' => $form_state[
'saved_values'][
'summarypage'][
'tree_pictures'][
"{$name}_attribution"],
3199 'license' => $form_state[
'saved_values'][
'summarypage'][
'tree_pictures'][
"{$name}_license"],
3221 $cvterm = $options[
'cvterms'];
3222 $records = &$options[
'records'];
3223 $accession = $options[
'accession'];
3224 $cols = $options[
'column_ids'];
3225 $saved_ids = &$options[
'saved_ids'];
3226 $stock_count = &$options[
'stock_count'];
3227 $multi_insert_options = $options[
'multi_insert'];
3228 $tree_info = &$options[
'tree_info'];
3229 $record_group = variable_get(
'tpps_record_group', 10000);
3230 $geo_api_key = variable_get(
'tpps_geocode_api_key', NULL);
3231 $site_based = FALSE;
3232 $exact = $options[
'exact'] ?? NULL;
3233 $precision = $options[
'precision'] ?? NULL;
3235 $tree_id = $row[$cols[
'id']];
3236 $id = $saved_ids[
'organism_ids'][$options[
'org_num']];
3237 if ($options[
'org_names'][
'number'] != 1 and $options[
'single_file']) {
3238 $org_full_name = $row[$cols[
'org']] ??
"{$row[$cols['genus']]} {$row[$cols['species']]}";
3239 $id = $saved_ids[
'organism_ids'][array_search($org_full_name, $options[
'org_names'])];
3242 $records[
'stock'][$tree_id] = array(
3243 'uniquename' =>
"$accession-$tree_id",
3244 'type_id' => $cvterm[
'org'],
3245 'organism_id' => $id,
3247 $tree_info[$tree_id] = array(
3248 'organism_id' => $id,
3251 $records[
'project_stock'][$tree_id] = array(
3252 'project_id' => $saved_ids[
'project_id'],
3254 'stock' => $tree_id,
3258 if (isset($row[$cols[
'clone']]) and $row[$cols[
'clone']] !== $options[
'empty']) {
3259 $clone_name = $tree_id .
'-' . $row[$cols[
'clone']];
3261 $records[
'stock'][$clone_name] = array(
3262 'uniquename' => $accession .
'-' . $clone_name,
3263 'type_id' => $cvterm[
'clone'],
3264 'organism_id' => $id,
3266 $tree_info[$clone_name] = array(
3267 'organism_id' => $id,
3270 $records[
'project_stock'][$clone_name] = array(
3271 'project_id' => $saved_ids[
'project_id'],
3273 'stock' => $clone_name,
3276 $job->logMessage(
'[INFO] CV Terms Data' . print_r($cvterm, 1));
3277 $records[
'stock_relationship'][$clone_name] = array(
3278 'type_id' => $cvterm[
'has_part'],
3280 'subject' => $tree_id,
3281 'object' => $clone_name,
3285 $tree_id = $clone_name;
3288 if (!empty($row[$cols[
'lat']]) and !empty($row[$cols[
'lng']])) {
3289 $raw_coord = $row[$cols[
'lat']] .
',' . $row[$cols[
'lng']];
3291 $lat = $standard_coord[0];
3292 $lng = $standard_coord[1];
3294 elseif (!empty($row[$cols[
'state']]) and !empty($row[$cols[
'country']])) {
3296 $records[
'stockprop'][
"$tree_id-country"] = array(
3297 'type_id' => $cvterm[
'country'],
3298 'value' => $row[$cols[
'country']],
3300 'stock' => $tree_id,
3304 $records[
'stockprop'][
"$tree_id-state"] = array(
3305 'type_id' => $cvterm[
'state'],
3306 'value' => $row[$cols[
'state']],
3308 'stock' => $tree_id,
3312 $location =
"{$row[$cols['state']]}, {$row[$cols['country']]}";
3314 if (!empty($row[$cols[
'county']])) {
3315 $records[
'stockprop'][
"$tree_id-county"] = array(
3316 'type_id' => $cvterm[
'county'],
3317 'value' => $row[$cols[
'county']],
3319 'stock' => $tree_id,
3322 $location =
"{$row[$cols['county']]}, $location";
3325 if (!empty($row[$cols[
'district']])) {
3326 $records[
'stockprop'][
"$tree_id-district"] = array(
3327 'type_id' => $cvterm[
'district'],
3328 'value' => $row[$cols[
'district']],
3330 'stock' => $tree_id,
3333 $location =
"{$row[$cols['district']]}, $location";
3336 $tree_info[$tree_id][
'location'] = $location;
3338 if (isset($geo_api_key) and !array_key_exists($location, $options[
'locations'])) {
3339 $query = urlencode($location);
3340 $url =
"https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
3341 $response = json_decode(file_get_contents($url));
3342 $options[
'locations'][$location] = $response->results[0]->geometry ?? NULL;
3344 if ($response->total_results and $response->total_results > 1 and !isset($cols[
'district']) and !isset($cols[
'county'])) {
3345 foreach ($response->results as $item) {
3346 if ($item->components->_type ==
'state') {
3347 $options[
'locations'][$location] = $item->geometry;
3353 $lat = $options[
'locations'][$location]->lat ?? NULL;
3354 $lng = $options[
'locations'][$location]->lng ?? NULL;
3356 elseif (!empty($row[$cols[
'pop_group']])) {
3358 $location = $options[
'pop_group'][$row[$cols[
'pop_group']]];
3362 $parts = explode(
',', $coord);
3368 $records[
'stockprop'][
"$tree_id-location"] = array(
3369 'type_id' => $cvterm[
'loc'],
3370 'value' => $location,
3372 'stock' => $tree_id,
3376 $tree_info[$tree_id][
'location'] = $location;
3378 if (isset($geo_api_key)) {
3379 $result = $options[
'locations'][$location] ?? NULL;
3380 if (empty($result)) {
3381 $query = urlencode($location);
3382 $url =
"https://api.opencagedata.com/geocode/v1/json?q=$query&key=$geo_api_key";
3383 $response = json_decode(file_get_contents($url));
3384 $result = ($response->total_results) ? $response->results[0]->geometry : NULL;
3385 $options[
'locations'][$location] = $result;
3388 if (!empty($result)) {
3389 $lat = $result->lat;
3390 $lng = $result->lng;
3396 if (!empty($lat) and !empty($lng)) {
3397 $records[
'stockprop'][
"$tree_id-lat"] = array(
3398 'type_id' => $cvterm[
'lat'],
3401 'stock' => $tree_id,
3405 $records[
'stockprop'][
"$tree_id-long"] = array(
3406 'type_id' => $cvterm[
'lng'],
3409 'stock' => $tree_id,
3412 $tree_info[$tree_id][
'lat'] = $lat;
3413 $tree_info[$tree_id][
'lng'] = $lng;
3415 $gps_type =
"Site-based";
3417 $gps_type =
"Exact";
3419 $gps_type =
"Approximate";
3423 $records[
'stockprop'][
"$tree_id-gps-type"] = array(
3424 'type_id' => $cvterm[
'gps_type'],
3425 'value' => $gps_type,
3427 'stock' => $tree_id,
3431 if ($gps_type ==
"Approximate" and !empty($precision)) {
3432 $records[
'stockprop'][
"$tree_id-precision"] = array(
3433 'type_id' => $cvterm[
'precision'],
3434 'value' => $precision,
3436 'stock' => $tree_id,
3443 if ($stock_count >= $record_group) {
3445 $job->logMessage(
'[INFO] - Inserting data into database using insert_multi...');
3448 $job->logMessage(
'[INFO] - Done.');
3449 foreach ($new_ids as $t_id => $stock_id) {
3450 $tree_info[$t_id][
'stock_id'] = $stock_id;
3455 'stockprop' => array(),
3456 'stock_relationship' => array(),
3457 'project_stock' => array(),
3473 unset($form_state[
'ids']);
3494 $code = ucfirst($genus_part . $species_part);
3495 if (!array_key_exists($code, $codes)) {
3497 $codes[$code] = TRUE;
3515 for ($char1 = 0; $char1 <= strlen($part) - 2; $char1++) {
3516 for ($char2 = $char1 + 1; $char2 <= strlen($part) - 1; $char2++) {
3518 if ($part[$char1] == $part[$char2]) {
3522 yield strtolower($part[$char1] . $part[$char2]);
tpps_submission_get_tags($accession)
tpps_standard_coord($raw_coordinate)
tpps_submission_clear_db($accession)
tpps_submit_environment(array &$form_state, $i, TripalJob &$job=NULL)
tpps_process_genotype_spreadsheet($row, array &$options=array())
tpps_ssrs_headers($fid, $ploidy)
tpps_refine_phenotype_meta(array &$meta, array $time_options=array(), TripalJob &$job=NULL)
tpps_update_submission(array $state, array $options=array())
tpps_submit_all($accession, TripalJob $job=NULL)
tpps_other_marker_headers($fid, array $cols)
tpps_add_project_file(array &$state, &$fid)
tpps_xlsx_translate_date($date)
tpps_submit_page_3(array &$form_state, TripalJob &$job=NULL)
tpps_submit_genotype(array &$form_state, array $species_codes, $i, TripalJob &$job=NULL)
tpps_chado_insert_record($table, $records, array $options=array())
tpps_get_env_response($layer_id, $lat, $long)
tpps_get_species_codes($genus, $species)
tpps_process_phenotype_meta($row, array &$options=array())
tpps_matching_trees($project_id)
tpps_submission_add_tag($accession, $tag)
tpps_submit_page_1(array &$form_state, TripalJob &$job=NULL)
tpps_submit_page_4(array &$form_state, TripalJob &$job=NULL)
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_check_organisms($row, array &$options=array())
tpps_job_logger_write($string, $replacements=[])
tpps_process_environment_layers($row, array &$options=array())
tpps_submission_rename_files($accession)
tpps_file_headers($fid, $no_header=FALSE)
tpps_submit_phenotype(array &$form_state, $i, TripalJob &$job=NULL)
tpps_get_code_parts($part)
tpps_submission_clear_default_tags($accession)
tpps_submit_page_2(array &$form_state, TripalJob &$job=NULL)
tpps_clean_state(array &$form_state)
tpps_tripal_entity_publish($bundle_name, array $vals, array $options=array())
tpps_load_submission($accession, $state=TRUE)
tpps_chado_prop_exists($base_table, $id, $name, array $options=array())
tpps_get_taxon($org_name, $rank)
tpps_process_accession($row, array &$options, $job=NULL)
tpps_generate_popstruct($study_accession, $vcf_location)
tpps_submit_summary(array &$form_state)
tpps_process_snp_association($row, array &$options=array())
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
tpps_get_location($location)
tpps_get_environmental_layer_data($layer_id, $lat, $long, $param)
tpps_ols_install_term($info)
tpps_file_iterator($fid, $function, array &$options=array())
tpps_submit_vcf_render_genotype_combination($raw_value, $ref, $alt)
tpps_process_phenotype_data($row, array &$options=array())