Tripal Plant PopGen Submit
file_parsing.inc
Go to the documentation of this file.
1 <?php
2 
8 require_once 'get_env_data.inc';
9 
20 function tpps_file_parsing($accession) {
21  $form_state = tpps_load_submission($accession);
22 
23  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
24  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
25  $organism_number = $firstpage['organism']['number'];
26  $file_rank = &$form_state['file_rank'];
27  $record_group = variable_get('tpps_record_group', 10000);
28  $project_id = $form_state['ids']['project_id'];
29  $species_codes = array();
30  for ($i = 1; $i <= $organism_number; $i++) {
31  $species_codes[$form_state['ids']['organism_ids'][$i]] = current(chado_select_record('organismprop', array('value'), array(
32  'type_id' => array(
33  'name' => 'organism 4 letter code',
34  ),
35  'organism_id' => $form_state['ids']['organism_ids'][$i],
36  ), array(
37  'limit' => 1,
38  )))->value;
39  }
40 
41  for ($i = 1; $i <= $organism_number; $i++) {
42  $parts = explode(" ", $firstpage['organism'][$i]);
43  $genus = $parts[0];
44  $species = implode(" ", array_slice($parts, 1));
45 
46  if (isset($parts[2]) and ($parts[2] == 'var.' or $parts[2] == 'subsp.')) {
47  $infra = implode(" ", array_slice($parts, 2));
48  }
49  else {
50  $infra = NULL;
51  }
52 
53  if (isset($fourthpage["organism-$i"]['phenotype'])) {
54  $phenotype = $fourthpage["organism-$i"]['phenotype'];
55 
56  // Get appropriate cvterms.
57  $phenotype_cvterms = array(
58  'time' => chado_get_cvterm(array(
59  'name' => 'time',
60  'is_obsolete' => 0,
61  ))->cvterm_id,
62  'desc' => chado_get_cvterm(array(
63  'name' => 'description',
64  'cv_id' => array(
65  'name' => 'schema',
66  ),
67  'is_obsolete' => 0,
68  ))->cvterm_id,
69  'unit' => chado_get_cvterm(array(
70  'name' => 'unit',
71  'cv_id' => array(
72  'name' => 'uo',
73  ),
74  'is_obsolete' => 0,
75  ))->cvterm_id,
76  'min' => chado_get_cvterm(array(
77  'cv_id' => array(
78  'name' => 'ncit',
79  ),
80  'name' => 'Minimum',
81  'is_obsolete' => 0,
82  ))->cvterm_id,
83  'max' => chado_get_cvterm(array(
84  'cv_id' => array(
85  'name' => 'ncit',
86  ),
87  'name' => 'Maximum',
88  'is_obsolete' => 0,
89  ))->cvterm_id,
90  );
91 
92  $records = array(
93  'phenotype' => array(),
94  'phenotypeprop' => array(),
95  'stock_phenotype' => array(),
96  );
97  $phenotype_count = 0;
98 
99  $options = array(
100  'records' => $records,
101  'cvterms' => $phenotype_cvterms,
102  'accession' => $form_state['accession'],
103  'tree_info' => $form_state['tree_info'],
104  'suffix' => 0,
105  'phenotype_count' => $phenotype_count,
106  );
107 
108  if (empty($phenotype['iso-check'])) {
109  $phenotype_number = $phenotype['phenotypes-meta']['number'];
110  $phenotypes_meta = array();
111 
112  tpps_chado_insert_record('projectprop', array(
113  'project_id' => $project_id,
114  'type_id' => array(
115  'cv_id' => array(
116  'name' => 'schema',
117  ),
118  'name' => 'url',
119  'is_obsolete' => 0,
120  ),
121  'value' => file_create_url(file_load($phenotype['file'])->uri),
122  'rank' => $file_rank,
123  ));
124  $file_rank++;
125 
126  // Populate $phenotypes_meta with manually entered metadata.
127  for ($j = 1; $j <= $phenotype_number; $j++) {
128  $name = strtolower($phenotype['phenotypes-meta'][$j]['name']);
129  $phenotypes_meta[$name] = array();
130  $phenotypes_meta[$name]['attr'] = $phenotype['phenotypes-meta'][$j]['attribute'];
131  $phenotypes_meta[$name]['desc'] = $phenotype['phenotypes-meta'][$j]['description'];
132  $phenotypes_meta[$name]['unit'] = $phenotype['phenotypes-meta'][$j]['units'];
133  if ($phenotype['phenotypes-meta'][$j]['struct-check'] == '1') {
134  $phenotypes_meta[$name]['struct'] = $phenotype['phenotypes-meta'][$j]['structure'];
135  }
136  if (!empty($phenotype['phenotypes-meta'][$j]['val-check']) or !empty($phenotype['phenotypes-meta'][$j]['bin-check'])) {
137  $phenotypes_meta[$name]['min'] = $phenotype['phenotypes-meta'][$j]['min'];
138  $phenotypes_meta[$name]['max'] = $phenotype['phenotypes-meta'][$j]['max'];
139  }
140  if ($phenotype['phenotypes-meta'][$j]['time-check'] == '1') {
141  $phenotypes_meta[$name]['time'] = $phenotype['phenotypes-meta'][$j]['time'];
142  }
143  }
144 
145  if ($phenotype['check'] == '1') {
146  $phenotype_meta_file_id = tpps_chado_insert_record('projectprop', array(
147  'project_id' => $project_id,
148  'type_id' => array(
149  'cv_id' => array(
150  'name' => 'schema',
151  ),
152  'name' => 'url',
153  'is_obsolete' => 0,
154  ),
155  'value' => file_create_url(file_load($phenotype['metadata'])->uri),
156  'rank' => $file_rank,
157  ));
158  $file_rank++;
159 
160  // Get metadata column values.
161  $groups = $phenotype['metadata-groups'];
162  $column_vals = $phenotype['metadata-columns'];
163  $struct = array_search('5', $column_vals);
164  $min = array_search('6', $column_vals);
165  $max = array_search('7', $column_vals);
166  $columns = array(
167  'name' => $groups['Phenotype Id']['1'],
168  'attr' => $groups['Attribute']['2'],
169  'desc' => $groups['Description']['3'],
170  'unit' => $groups['Units']['4'],
171  'struct' => !empty($struct) ? $struct : NULL,
172  'min' => !empty($min) ? $min : NULL,
173  'max' => !empty($max) ? $max : NULL,
174  );
175 
176  $meta_options = array(
177  'no_header' => $phenotype['metadata-no-header'],
178  'meta_columns' => $columns,
179  'meta' => &$phenotypes_meta,
180  );
181 
182  tpps_file_iterator($phenotype['metadata'], 'tpps_process_phenotype_meta', $meta_options);
183  }
184 
185  tpps_refine_phenotype_meta($phenotypes_meta);
186 
187  // Get metadata header values.
188  $groups = $phenotype['file-groups'];
189  $column_vals = $phenotype['file-columns'];
190  $time_index = ($phenotype['format'] == 0) ? '2' : '4';
191  $clone_index = ($phenotype['format'] == 0) ? '3' : '5';
192  $time = array_search($time_index, $column_vals);
193  $clone = array_search($clone_index, $column_vals);
194  $meta_headers = array(
195  'name' => $groups['Phenotype Name/Identifier']['2'] ?? NULL,
196  'value' => $groups['Phenotype Value(s)']['3'] ?? NULL,
197  'time' => !empty($time) ? $time : NULL,
198  'clone' => !empty($clone) ? $clone : NULL,
199  );
200 
201  // Get data header values.
202  if ($phenotype['format'] == 0) {
203  $file_headers = tpps_file_headers($phenotype['file'], $phenotype['file-no-header']);
204  $data_columns = array();
205  foreach ($groups['Phenotype Data']['0'] as $col) {
206  $data_columns[$col] = $file_headers[$col];
207  }
208  unset($file_headers);
209  }
210 
211  $options['no_header'] = $phenotype['file-no-header'];
212  $options['tree_id'] = $groups['Tree Identifier']['1'];
213  $options['meta_headers'] = $meta_headers;
214  $options['data_columns'] = $data_columns ?? NULL;
215  $options['meta'] = $phenotypes_meta;
216  $options['file_empty'] = $phenotype['file-empty'];
217 
218  tpps_file_iterator($phenotype['file'], 'tpps_process_phenotype_data', $options);
219  }
220  else {
221  tpps_chado_insert_record('projectprop', array(
222  'project_id' => $project_id,
223  'type_id' => array(
224  'cv_id' => array(
225  'name' => 'schema',
226  ),
227  'name' => 'url',
228  'is_obsolete' => 0,
229  ),
230  'value' => file_create_url(file_load($phenotype['iso'])->uri),
231  'rank' => $file_rank,
232  ));
233  $file_rank++;
234 
235  $options['iso'] = TRUE;
236  $options['records'] = $records;
237  $options['cvterms'] = $phenotype_cvterms;
238  $options['file_headers'] = tpps_file_headers($phenotype['iso']);
239  $options['meta'] = array(
240  'desc' => "Mass Spectrometry",
241  'unit' => "intensity (arbitrary units)",
242  'attr_id' => chado_get_cvterm(array(
243  'name' => 'intensity',
244  'is_obsolete' => 0,
245  ))->cvterm_id,
246  );
247 
248  tpps_file_iterator($phenotype['iso'], 'tpps_process_phenotype_data', $options);
249  }
250  tpps_chado_insert_multi($options['records']);
251  unset($options['records']);
252  }
253 
254  if (isset($fourthpage["organism-$i"]['genotype'])) {
255  $genotype = $fourthpage["organism-$i"]['genotype'];
256  $genotype_count = 0;
257  $genotype_total = 0;
258  $seq_var_cvterm = chado_get_cvterm(array(
259  'cv_id' => array(
260  'name' => 'sequence',
261  ),
262  'name' => 'sequence_variant',
263  'is_obsolete' => 0,
264  ))->cvterm_id;
265  $overrides = array(
266  'genotype_call' => array(
267  'variant' => array(
268  'table' => 'feature',
269  'columns' => array(
270  'variant_id' => 'feature_id',
271  ),
272  ),
273  'marker' => array(
274  'table' => 'feature',
275  'columns' => array(
276  'marker_id' => 'feature_id',
277  ),
278  ),
279  ),
280  );
281 
282  $records = array(
283  'feature' => array(),
284  'genotype' => array(),
285  'genotype_call' => array(),
286  'stock_genotype' => array(),
287  );
288 
289  $multi_insert_options = array(
290  'fk_overrides' => $overrides,
291  'entities' => array(
292  'label' => 'Genotype',
293  'table' => 'genotype',
294  ),
295  );
296 
297  $options = array(
298  'records' => $records,
299  'tree_info' => $form_state['tree_info'],
300  'species_codes' => $species_codes,
301  'genotype_count' => $genotype_count,
302  'genotype_total' => &$genotype_total,
303  'project_id' => $project_id,
304  'seq_var_cvterm' => $seq_var_cvterm,
305  'overrides' => $overrides,
306  'multi_insert' => $multi_insert_options,
307  );
308 
309  /*if ($genotype['ref-genome'] == 'bio') {
310 
311  $bioproject_id = tpps_chado_insert_record('dbxref', array(
312  'db_id' => array(
313  'name' => 'NCBI BioProject',
314  ),
315  'accession' => $genotype['BioProject-id'],
316  ));
317 
318  $project_dbxref_id = tpps_chado_insert_record('project_dbxref', array(
319  'project_id' => $project_id,
320  'dbxref_id' => $bioproject_id,
321  ));
322 
323  $bioproject_assembly_file_ids = array();
324  foreach ($genotype['assembly-auto'] as $key => $val) {
325  if ($val == '1') {
326  array_push($bioproject_assembly_file_ids, tpps_chado_insert_record('projectprop', array(
327  'project_id' => $project_id,
328  'type_id' => array(
329  'cv_id' => array(
330  'name' => 'schema',
331  ),
332  'name' => 'url',
333  'is_obsolete' => 0,
334  ),
335  'value' => "https://www.ncbi.nlm.nih.gov/nuccore/$key",
336  'rank' => $file_rank,
337  )));
338  $file_rank++;
339  }
340  }
341  }
342  else*/
343  if ($genotype['ref-genome'] == 'manual' or $genotype['ref-genome'] == 'manual2' or $genotype['ref-genome'] == 'url') {
344  if ($genotype['tripal_fasta']['file_upload']) {
345  // Uploaded new file.
346  $assembly_user = $genotype['tripal_fasta']['file_upload'];
347  $assembly_user_id = tpps_chado_insert_record('projectprop', array(
348  'project_id' => $project_id,
349  'type_id' => array(
350  'cv_id' => array(
351  'name' => 'schema',
352  ),
353  'name' => 'url',
354  'is_obsolete' => 0,
355  ),
356  'value' => file_create_url(file_load($assembly_user)->uri),
357  'rank' => $file_rank,
358  ));
359  $file_rank++;
360  }
361  if ($genotype['tripal_fasta']['file_upload_existing']) {
362  // Uploaded existing file.
363  $assembly_user = $genotype['tripal_fasta']['file_upload_existing'];
364  $assembly_user_id = tpps_chado_insert_record('projectprop', array(
365  'project_id' => $project_id,
366  'type_id' => array(
367  'cv_id' => array(
368  'name' => 'schema',
369  ),
370  'name' => 'url',
371  'is_obsolete' => 0,
372  ),
373  'value' => file_create_url(file_load($assembly_user)->uri),
374  'rank' => $file_rank,
375  ));
376  $file_rank++;
377  }
378  if ($genotype['tripal_fasta']['file_remote']) {
379  // Provided url to file.
380  $assembly_user = $genotype['tripal_fasta']['file_remote'];
381  $assembly_user_id = tpps_chado_insert_record('projectprop', array(
382  'project_id' => $project_id,
383  'type_id' => array(
384  'cv_id' => array(
385  'name' => 'schema',
386  ),
387  'name' => 'url',
388  'is_obsolete' => 0,
389  ),
390  'value' => $assembly_user,
391  'rank' => $file_rank,
392  ));
393  $file_rank++;
394  }
395  }
396  elseif ($genotype['ref-genome'] != 'none') {
397  $reference_genome_id = tpps_chado_insert_record('projectprop', array(
398  'project_id' => $project_id,
399  'type_id' => array(
400  'cv_id' => array(
401  'name' => 'sequence',
402  ),
403  'name' => 'reference_genome',
404  'is_obsolete' => 0,
405  ),
406  'value' => $genotype['ref-genome'],
407  ));
408  }
409 
410  if (!empty($genotype['files']['file-type']['SNPs Genotype Assay'])) {
411  tpps_chado_insert_record('projectprop', array(
412  'project_id' => $project_id,
413  'type_id' => array(
414  'cv_id' => array(
415  'name' => 'schema',
416  ),
417  'name' => 'url',
418  'is_obsolete' => 0,
419  ),
420  'value' => file_create_url(file_load($genotype['files']['snps-assay'])->uri),
421  'rank' => $file_rank,
422  ));
423  $file_rank++;
424 
425  $options['type'] = 'snp';
426  $options['headers'] = tpps_file_headers($genotype['files']['snps-assay']);
427  $options['marker'] = 'SNP';
428  $options['type_cvterm'] = chado_get_cvterm(array(
429  'cv_id' => array(
430  'name' => 'sequence',
431  ),
432  'name' => 'SNP',
433  'is_obsolete' => 0,
434  ))->cvterm_id;
435 
436  tpps_file_iterator($genotype['files']['snps-assay'], 'tpps_process_genotype_spreadsheet', $options);
437 
438  tpps_chado_insert_multi($options['records'], $multi_insert_options);
439  unset($options['records']);
440  $genotype_total += $genotype_count;
441  $genotype_count = 0;
442  }
443 
444  if (!empty($genotype['files']['file-type']['Assay Design']) and $genotype['marker-type']['SNPs']) {
445  tpps_chado_insert_record('projectprop', array(
446  'project_id' => $project_id,
447  'type_id' => array(
448  'cv_id' => array(
449  'name' => 'schema',
450  ),
451  'name' => 'url',
452  'is_obsolete' => 0,
453  ),
454  'value' => file_create_url(file_load($genotype['files']['assay-design'])->uri),
455  'rank' => $file_rank,
456  ));
457  $file_rank++;
458  }
459 
460  if (!empty($genotype['files']['file-type']['SSRs/cpSSRs Genotype Spreadsheet'])) {
461  tpps_chado_insert_record('projectprop', array(
462  'project_id' => $project_id,
463  'type_id' => array(
464  'cv_id' => array(
465  'name' => 'schema',
466  ),
467  'name' => 'url',
468  'is_obsolete' => 0,
469  ),
470  'value' => file_create_url(file_load($genotype['files']['ssrs'])->uri),
471  'rank' => $file_rank,
472  ));
473  $file_rank++;
474 
475  $options['type'] = 'ssrs';
476  $options['headers'] = tpps_ssrs_headers($genotype['files']['ssrs'], $genotype['files']['ploidy']);
477  $options['marker'] = $genotype['SSRs/cpSSRs'];
478  $options['type_cvterm'] = chado_get_cvterm(array(
479  'cv_id' => array(
480  'name' => 'sequence',
481  ),
482  'name' => 'microsatellite',
483  'is_obsolete' => 0,
484  ))->cvterm_id;
485 
486  tpps_file_iterator($genotype['files']['ssrs'], 'tpps_process_genotype_spreadsheet', $options);
487 
488  tpps_chado_insert_multi($options['records'], $multi_insert_options);
489  unset($options['records']);
490  $genotype_count = 0;
491  }
492 
493  if (!empty($genotype['files']['file-type']['Other Marker Genotype Spreadsheet'])) {
494  tpps_chado_insert_record('projectprop', array(
495  'project_id' => $project_id,
496  'type_id' => array(
497  'cv_id' => array(
498  'name' => 'schema',
499  ),
500  'name' => 'url',
501  'is_obsolete' => 0,
502  ),
503  'value' => file_create_url(file_load($genotype['files']['other'])->uri),
504  'rank' => $file_rank,
505  ));
506  $file_rank++;
507 
508  $groups = $genotype['files']['other-groups'];
509 
510  $options['type'] = 'other';
511  $options['headers'] = tpps_other_marker_headers($genotype['files']['other'], $groups['Genotype Data'][0]);
512  $options['marker'] = $genotype['other-marker'];
513  $options['type_cvterm'] = chado_get_cvterm(array(
514  'cv_id' => array(
515  'name' => 'sequence',
516  ),
517  'name' => 'genetic_marker',
518  'is_obsolete' => 0,
519  ))->cvterm_id;
520  $options['tree_id'] = $groups['Tree Id'][1];
521 
522  tpps_file_iterator($genotype['files']['other'], 'tpps_process_genotype_spreadsheet', $options);
523 
524  tpps_chado_insert_multi($options['records'], $multi_insert_options);
525  unset($options['records']);
526  $genotype_count = 0;
527  }
528 
529  if (!empty($genotype['files']['file-type']['VCF'])) {
530  // TODO: we probably want to use tpps_file_iterator to parse vcf files.
531 
532  $genotype_vcf_id = tpps_chado_insert_record('projectprop', array(
533  'project_id' => $project_id,
534  'type_id' => array(
535  'cv_id' => array(
536  'name' => 'schema',
537  ),
538  'name' => 'url',
539  'is_obsolete' => 0,
540  ),
541  'value' => file_create_url(file_load($genotype['files']['vcf'])->uri),
542  'rank' => $file_rank,
543  ));
544  $file_rank++;
545 
546  $marker = 'SNP';
547 
548  $records['genotypeprop'] = array();
549 
550  $snp_cvterm = chado_get_cvterm(array(
551  'cv_id' => array(
552  'name' => 'sequence',
553  ),
554  'name' => 'SNP',
555  'is_obsolete' => 0,
556  ))->cvterm_id;
557  $format_cvterm = chado_get_cvterm(array(
558  'cv_id' => array(
559  'name' => 'sep',
560  ),
561  'name' => 'format',
562  'is_obsolete' => 0,
563  ))->cvterm_id;
564  $qual_cvterm = chado_get_cvterm(array(
565  'cv_id' => array(
566  'name' => 'sequence',
567  ),
568  'name' => 'quality_value',
569  'is_obsolete' => 0,
570  ))->cvterm_id;
571  $filter_cvterm = chado_get_cvterm(array(
572  'cv_id' => array(
573  'name' => 'operation',
574  ),
575  'name' => 'Sequence contamination filtering',
576  'is_obsolete' => 0,
577  ))->cvterm_id;
578  $freq_cvterm = chado_get_cvterm(array(
579  'cv_id' => array(
580  'name' => 'sequence',
581  ),
582  'name' => 'allelic_frequency',
583  'is_obsolete' => 0,
584  ))->cvterm_id;
585  $depth_cvterm = chado_get_cvterm(array(
586  'cv_id' => array(
587  'name' => 'ncit',
588  ),
589  'name' => 'Read Depth',
590  'is_obsolete' => 0,
591  ))->cvterm_id;
592  $n_sample_cvterm = chado_get_cvterm(array(
593  'name' => 'number_samples',
594  'is_obsolete' => 0,
595  ))->cvterm_id;
596 
597  $vcf_file = file_load($genotype['files']['vcf']);
598  $location = drupal_realpath($vcf_file->uri);
599  $vcf_content = fopen($location, 'r');
600  $stocks = array();
601  $format = "";
602  $current_id = $form_state['ids']['organism_ids'][$i];
603  $species_code = $species_codes[$current_id];
604 
605  // dpm('start: ' . date('r'));.
606  while (($vcf_line = fgets($vcf_content)) !== FALSE) {
607  if ($vcf_line[0] != '#') {
608  $genotype_count++;
609  $vcf_line = explode("\t", $vcf_line);
610  $scaffold_id = &$vcf_line[0];
611  $position = &$vcf_line[1];
612  $marker_name = &$vcf_line[2];
613  $ref = &$vcf_line[3];
614  $alt = &$vcf_line[4];
615  $qual = &$vcf_line[5];
616  $filter = &$vcf_line[6];
617  $info = &$vcf_line[7];
618 
619  if (empty($variant_name) or $variant_name == '.') {
620  $variant_name = "{$scaffold_id}{$position}$ref:$alt";
621  }
622  $marker_name = $variant_name . $marker;
623  $description = "$ref:$alt";
624  $genotype_name = "$marker-$species_code-$scaffold_id-$position";
625  $genotype_desc = "$marker-$species_code-$scaffold_id-$position-$description";
626 
627  $records['feature'][$marker_name] = array(
628  'organism_id' => $current_id,
629  'uniquename' => $marker_name,
630  'type_id' => $seq_var_cvterm,
631  );
632 
633  $records['feature'][$variant_name] = array(
634  'organism_id' => $current_id,
635  'uniquename' => $variant_name,
636  'type_id' => $seq_var_cvterm,
637  );
638 
639  $records['genotype'][$genotype_desc] = array(
640  'name' => $genotype_name,
641  'uniquename' => $genotype_desc,
642  'description' => $description,
643  'type_id' => $snp_cvterm,
644  );
645 
646  if ($format != "") {
647  $records['genotypeprop']["$genotype_desc-format"] = array(
648  'type_id' => $format_cvterm,
649  'value' => $format,
650  '#fk' => array(
651  'genotype' => $genotype_desc,
652  ),
653  );
654  }
655 
656  for ($j = 9; $j < count($vcf_line); $j++) {
657  $records['genotype_call']["{$stocks[$j - 9]}-$genotype_name"] = array(
658  'project_id' => $project_id,
659  'stock_id' => $stocks[$j - 9],
660  '#fk' => array(
661  'genotype' => $genotype_desc,
662  'variant' => $variant_name,
663  'marker' => $marker_name,
664  ),
665  );
666 
667  $records['stock_genotype']["{$stocks[$j - 9]}-$genotype_name"] = array(
668  'stock_id' => $stocks[$j - 9],
669  '#fk' => array(
670  'genotype' => $genotype_desc,
671  ),
672  );
673  }
674 
675  // Quality score.
676  $records['genotypeprop']["$genotype_desc-qual"] = array(
677  'type_id' => $qual_cvterm,
678  'value' => $qual,
679  '#fk' => array(
680  'genotype' => $genotype_desc,
681  ),
682  );
683 
684  // filter: pass/fail.
685  $records['genotypeprop']["$genotype_desc-filter"] = array(
686  'type_id' => $filter_cvterm,
687  'value' => ($filter == '.') ? "P" : "NP",
688  '#fk' => array(
689  'genotype' => $genotype_desc,
690  ),
691  );
692 
693  // Break up info column.
694  $info_vals = explode(";", $info);
695  foreach ($info_vals as $key => $val) {
696  $parts = explode("=", $val);
697  unset($info_vals[$key]);
698  $info_vals[$parts[0]] = isset($parts[1]) ? $parts[1] : '';
699  }
700 
701  // Allele frequency, assuming that the info code for allele
702  // frequency is 'AF'.
703  if (isset($info_vals['AF']) and $info_vals['AF'] != '') {
704  $records['genotypeprop']["$genotype_desc-freq"] = array(
705  'type_id' => $freq_cvterm,
706  'value' => $info_vals['AF'],
707  '#fk' => array(
708  'genotype' => $genotype_desc,
709  ),
710  );
711  }
712 
713  // Depth coverage, assuming that the info code for depth coverage is
714  // 'DP'.
715  if (isset($info_vals['DP']) and $info_vals['DP'] != '') {
716  $records['genotypeprop']["$genotype_desc-depth"] = array(
717  'type_id' => $depth_cvterm,
718  'value' => $info_vals['DP'],
719  '#fk' => array(
720  'genotype' => $genotype_desc,
721  ),
722  );
723  }
724 
725  // Number of samples, assuming that the info code for number of
726  // samples is 'NS'.
727  if (isset($info_vals['NS']) and $info_vals['NS'] != '') {
728  $records['genotypeprop']["$genotype_desc-n_sample"] = array(
729  'type_id' => $n_sample_cvterm,
730  'value' => $info_vals['NS'],
731  '#fk' => array(
732  'genotype' => $genotype_desc,
733  ),
734  );
735  }
736  // Tripal Job has issues when all submissions are made at the same
737  // time, so break them up into groups of 10,000 genotypes along with
738  // their relevant genotypeprops.
739  if ($genotype_count > $record_group) {
740  $genotype_count = 0;
741  tpps_chado_insert_multi($records, $multi_insert_options);
742  $records = array(
743  'feature' => array(),
744  'genotype' => array(),
745  'genotype_call' => array(),
746  'genotypeprop' => array(),
747  'stock_genotype' => array(),
748  );
749  $genotype_count = 0;
750  }
751  }
752  elseif (preg_match('/##FORMAT=/', $vcf_line)) {
753  $format .= substr($vcf_line, 9, -1);
754  }
755  elseif (preg_match('/#CHROM/', $vcf_line)) {
756  $vcf_line = explode("\t", $vcf_line);
757  for ($j = 9; $j < count($vcf_line); $j++) {
758  $stocks[] = $form_state['tree_info'][trim($vcf_line[$j])]['stock_id'];
759  }
760  }
761  }
762  // Insert the last set of values.
763  tpps_chado_insert_multi($records, $multi_insert_options);
764  unset($records);
765  $genotype_count = 0;
766  // dpm('done: ' . date('r'));.
767  }
768  }
769 
770  if (isset($fourthpage["organism-$i"]['environment'])) {
771  $environment = $fourthpage["organism-$i"]['environment'];
772  $env_layers_check = isset($environment['use_layers']) ? $environment['use_layers'] : FALSE;
773  $env_layers = isset($environment['env_layers']) ? $environment['env_layers'] : FALSE;
774  $env_params = isset($environment['env_params']) ? $environment['env_params'] : FALSE;
775  $env_number = $environment['env_manual']['number'];
776  $env_count = 0;
777 
778  $species_index = "species-$i";
779  if (empty($form_state['saved_values'][TPPS_PAGE_3]['tree-accession']['check'])) {
780  $species_index = "species-1";
781  }
782  $tree_accession = $form_state['saved_values'][TPPS_PAGE_3]['tree-accession'][$species_index];
783  $id_col = $tree_accession['file-groups']['Tree Id'][1];
784 
785  $env_cvterm = chado_get_cvterm(array(
786  'cv_id' => array(
787  'name' => 'biomaterial_property',
788  ),
789  'name' => 'climate_environment',
790  'is_obsolete' => 0,
791  ))->cvterm_id;
792 
793  if ($env_layers_check and db_table_exists('cartogratree_layers') and db_table_exists('cartogratree_fields')) {
794  $layers_params = array();
795  $records = array(
796  'phenotype' => array(),
797  'phenotype_cvterm' => array(),
798  'stock_phenotype' => array(),
799  );
800 
801  foreach ($env_layers as $layer_name => $layer_id) {
802  if (!empty($layer_id) and !empty($env_params[$layer_name])) {
803  $layers_params[$layer_id] = array();
804  $params = $env_params[$layer_name];
805  foreach ($params as $param_name => $param_id) {
806  if (!empty($param_id)) {
807  $layers_params[$layer_id][$param_id] = $param_name;
808  }
809  }
810  }
811  elseif (!empty($layer_id) and preg_match('/worldclim_subgroup_(.+)/', $layer_id, $matches)) {
812  $subgroup_id = $matches[1];
813  $layers = db_select('cartogratree_layers', 'l')
814  ->fields('l', array('layer_id'))
815  ->condition('subgroup_id', $subgroup_id)
816  ->execute();
817  while (($layer = $layers->fetchObject())) {
818  $params = db_select('cartogratree_fields', 'f')
819  ->fields('f', array('field_id', 'display_name'))
820  ->condition('layer_id', $layer->layer_id)
821  ->execute();
822  while (($param = $params->fetchObject())) {
823  $layers_params[$layer->layer_id][$param->field_id] = $param->display_name;
824  }
825  }
826  }
827  }
828 
829  $options = array(
830  'no_header' => !empty($tree_accession['file-no-header']),
831  'records' => $records,
832  'tree_id' => $tree_accession['file-groups']['Tree Id'][1],
833  'accession' => $form_state['accession'],
834  'tree_info' => $form_state['tree_info'],
835  'layers_params' => $layers_params,
836  'env_count' => &$env_count,
837  'env_cvterm' => $env_cvterm,
838  'suffix' => 0,
839  );
840 
841  tpps_file_iterator($tree_accession['file'], 'tpps_process_environment_layers', $options);
842 
843  tpps_chado_insert_multi($options['records']);
844  unset($options['records']);
845  $env_count = 0;
846  }
847 
848  $env_meta = array();
849 
850  for ($j = 1; $j <= $env_number; $j++) {
851  $current_env = $environment['env_manual'][$j];
852  $env_meta[] = array(
853  'name' => $current_env['name'],
854  'desc' => $current_env['description'],
855  'unit' => $current_env['units'],
856  'val' => $current_env['value'],
857  );
858  }
859 
860  $records = array(
861  'phenotype' => array(),
862  'stock_phenotype' => array(),
863  'phenotypeprop' => array(),
864  );
865 
866  $options = array(
867  'no_header' => !empty($tree_accession['file-no-header']),
868  'accession' => $form_state['accession'],
869  'records' => $records,
870  'env_meta' => $env_meta,
871  'env_count' => $env_count,
872  'suffix' => 0,
873  'tree_info' => $form_state['tree_info'],
874  'tree_id' => $tree_accession['file-groups']['Tree Id'][1],
875  'env_cvterm' => $env_cvterm,
876  'desc_id' => chado_get_cvterm(array(
877  'name' => 'description',
878  'cv_id' => array(
879  'name' => 'schema',
880  ),
881  'is_obsolete' => 0,
882  ))->cvterm_id,
883  'unit_id' => chado_get_cvterm(array(
884  'name' => 'unit',
885  'cv_id' => array(
886  'name' => 'uo',
887  ),
888  'is_obsolete' => 0,
889  ))->cvterm_id,
890  );
891 
892  tpps_file_iterator($tree_accession['file'], 'tpps_process_environment_manual', $options);
893 
894  tpps_chado_insert_multi($options['records']);
895  unset($options['records']);
896  }
897  }
898 
899  tpps_update_submission($form_state);
900 }
901 
910 function tpps_process_phenotype_meta($row, array &$options = array()) {
911  $columns = $options['meta_columns'];
912  $meta = &$options['meta'];
913 
914  $name = strtolower($row[$columns['name']]);
915  $meta[$name] = array();
916  $meta[$name]['attr'] = $row[$columns['attr']];
917  $meta[$name]['desc'] = $row[$columns['desc']];
918  $meta[$name]['unit'] = $row[$columns['unit']];
919  if (!empty($columns['struct']) and isset($row[$columns['struct']]) and $row[$columns['struct']] != '') {
920  $meta[$name]['struct'] = $row[$columns['struct']];
921  }
922  if (!empty($columns['min']) and isset($row[$columns['min']]) and $row[$columns['min']] != '') {
923  $meta[$name]['min'] = $row[$columns['min']];
924  }
925  if (!empty($columns['max']) and isset($row[$columns['max']]) and $row[$columns['max']] != '') {
926  $meta[$name]['max'] = $row[$columns['max']];
927  }
928 }
929 
938 function tpps_refine_phenotype_meta(array &$meta) {
939  $cvt_cache = array();
940  $local_cv = chado_get_cv(array('name' => 'local'));
941  $local_db = variable_get('tpps_local_db');
942  foreach ($meta as $name => $data) {
943  if (!empty($cvt_cache[$data['attr']])) {
944  $meta[$name]['attr_id'] = $cvt_cache[$data['attr']];
945  }
946  else {
947  $attr = chado_select_record('cvterm', array('cvterm_id'), array(
948  'name' => array(
949  'data' => $data['attr'],
950  'op' => 'LIKE',
951  ),
952  ), array(
953  'limit' => 1,
954  ));
955  $meta[$name]['attr_id'] = current($attr)->cvterm_id ?? NULL;
956 
957  if (empty($meta[$name]['attr_id'])) {
958  $meta[$name]['attr_id'] = chado_insert_cvterm(array(
959  'id' => "{$local_db->name}:{$data['attr']}",
960  'name' => $data['attr'],
961  'definition' => '',
962  'cv_name' => $local_cv->name,
963  ))->cvterm_id;
964  }
965  $cvt_cache[$data['attr']] = $meta[$name]['attr_id'];
966  }
967 
968  if (!empty($data['struct'])) {
969  if (!empty($cvt_cache[$data['struct']])) {
970  $meta[$name]['struct_id'] = $cvt_cache[$data['struct']];
971  }
972  else {
973  $obs = chado_select_record('cvterm', array('cvterm_id'), array(
974  'name' => array(
975  'data' => $data['struct'],
976  'op' => 'LIKE',
977  ),
978  ), array(
979  'limit' => 1,
980  ));
981  $meta[$name]['struct_id'] = current($obs)->cvterm_id ?? NULL;
982 
983  if (empty($meta[$name]['struct_id'])) {
984  $meta[$name]['struct_id'] = chado_insert_cvterm(array(
985  'id' => "{$local_db->name}:{$data['struct']}",
986  'name' => $data['struct'],
987  'definition' => '',
988  'cv_name' => $local_cv->name,
989  ))->cvterm_id;
990  }
991  $cvt_cache[$data['struct']] = $meta[$name]['struct_id'];
992  }
993  }
994  else {
995  $meta[$name]['struct_id'] = NULL;
996  }
997  }
998 }
999 
1013 function tpps_process_phenotype_data($row, array &$options = array()) {
1014  $iso = $options['iso'] ?? FALSE;
1015  $records = &$options['records'];
1016  $meta_headers = $options['meta_headers'] ?? NULL;
1017  $file_headers = $options['file_headers'] ?? NULL;
1018  $cvterms = $options['cvterms'];
1019  $meta = $options['meta'];
1020  $empty = $options['file-empty'] ?? NULL;
1021  $accession = $options['accession'];
1022  $suffix = &$options['suffix'];
1023  $tree_info = &$options['tree_info'];
1024  $phenotype_count = &$options['phenotype_count'];
1025  $record_group = variable_get('tpps_record_group', 10000);
1026 
1027  if (!$iso) {
1028  if (isset($meta_headers['name']) and (isset($meta_headers['value']))) {
1029  $id = $row[$meta_headers['value']];
1030  $values = array($id => $row[$meta_headers['name']]);
1031  }
1032 
1033  if (!empty($options['data_columns'])) {
1034  $values = $options['data_columns'];
1035  }
1036 
1037  $tree_id = $row[$options['tree_id']];
1038  $clone_col = $meta_headers['clone'] ?? NULL;
1039  if (isset($clone_col) and !empty($row[$clone_col]) and $row[$clone_col] !== $empty) {
1040  $tree_id .= "-" . $row[$clone_col];
1041  }
1042  }
1043  else {
1044  foreach ($row as $id => $value) {
1045  if (empty($tree_id)) {
1046  $tree_id = $value;
1047  continue;
1048  }
1049  $values[$id] = $file_headers[$id];
1050  }
1051  }
1052 
1053  foreach ($values as $id => $name) {
1054  $attr_id = $iso ? $meta['attr_id'] : $meta[strtolower($name)]['attr_id'];
1055  $value = $row[$id];
1056  $phenotype_name = "$accession-$tree_id-$name-$suffix";
1057 
1058  $records['phenotype'][$phenotype_name] = array(
1059  'uniquename' => $phenotype_name,
1060  'name' => $name,
1061  'attr_id' => $attr_id,
1062  'observable_id' => $meta[strtolower($name)]['struct_id'] ?? NULL,
1063  'value' => $value,
1064  );
1065 
1066  $records['stock_phenotype'][$phenotype_name] = array(
1067  'stock_id' => $tree_info[$tree_id]['stock_id'],
1068  '#fk' => array(
1069  'phenotype' => $phenotype_name,
1070  ),
1071  );
1072 
1073  if (isset($meta[strtolower($name)]['time'])) {
1074  $records['phenotypeprop']["$phenotype_name-time"] = array(
1075  'type_id' => $cvterms['time'],
1076  'value' => $meta[strtolower($name)]['time'],
1077  '#fk' => array(
1078  'phenotype' => $phenotype_name,
1079  ),
1080  );
1081  }
1082  elseif (isset($meta_headers['time'])) {
1083  $records['phenotypeprop']["$phenotype_name-time"] = array(
1084  'type_id' => $cvterms['time'],
1085  'value' => $row[$meta_headers['time']],
1086  '#fk' => array(
1087  'phenotype' => $phenotype_name,
1088  ),
1089  );
1090  }
1091 
1092  $records['phenotypeprop']["$phenotype_name-desc"] = array(
1093  'type_id' => $cvterms['desc'],
1094  'value' => $iso ? $meta['desc'] : $meta[strtolower($name)]['desc'],
1095  '#fk' => array(
1096  'phenotype' => $phenotype_name,
1097  ),
1098  );
1099 
1100  $records['phenotypeprop']["$phenotype_name-unit"] = array(
1101  'type_id' => $cvterms['unit'],
1102  'value' => $iso ? $meta['unit'] : $meta[strtolower($name)]['unit'],
1103  '#fk' => array(
1104  'phenotype' => $phenotype_name,
1105  ),
1106  );
1107 
1108  if (isset($meta[strtolower($name)]['min'])) {
1109  $records['phenotypeprop']["$phenotype_name-min"] = array(
1110  'type_id' => $cvterms['min'],
1111  'value' => $meta[strtolower($name)]['min'],
1112  '#fk' => array(
1113  'phenotype' => $phenotype_name,
1114  ),
1115  );
1116  }
1117 
1118  if (isset($meta[strtolower($name)]['max'])) {
1119  $records['phenotypeprop']["$phenotype_name-max"] = array(
1120  'type_id' => $cvterms['max'],
1121  'value' => $meta[strtolower($name)]['max'],
1122  '#fk' => array(
1123  'phenotype' => $phenotype_name,
1124  ),
1125  );
1126  }
1127 
1128  if ($phenotype_count > $record_group) {
1129  tpps_chado_insert_multi($records);
1130  $records = array(
1131  'phenotype' => array(),
1132  'phenotypeprop' => array(),
1133  'stock_phenotype' => array(),
1134  );
1135  $phenotype_count = 0;
1136  }
1137 
1138  $phenotype_count++;
1139  }
1140  $suffix++;
1141 }
1142 
1156 function tpps_process_genotype_spreadsheet($row, array &$options = array()) {
1157  $type = $options['type'];
1158  $records = &$options['records'];
1159  $headers = $options['headers'];
1160  $tree_info = &$options['tree_info'];
1161  $species_codes = $options['species_codes'];
1162  $genotype_count = &$options['genotype_count'];
1163  $genotype_total = &$options['genotype_total'];
1164  $project_id = $options['project_id'];
1165  $marker = $options['marker'];
1166  $type_cvterm = $options['type_cvterm'];
1167  $seq_var_cvterm = $options['seq_var_cvterm'];
1168  $multi_insert_options = $options['multi_insert'];
1169  $record_group = variable_get('tpps_record_group', 10000);
1170  $stock_id = NULL;
1171  if ($type == 'other') {
1172  $val = $row[$options['tree_id']];
1173  $stock_id = $tree_info[trim($val)]['stock_id'];
1174  $current_id = $tree_info[trim($val)]['organism_id'];
1175  $species_code = $species_codes[$current_id];
1176  }
1177  foreach ($row as $key => $val) {
1178  if (empty($headers[$key])) {
1179  continue;
1180  }
1181 
1182  if (!isset($stock_id)) {
1183  $stock_id = $tree_info[trim($val)]['stock_id'];
1184  $current_id = $tree_info[trim($val)]['organism_id'];
1185  $species_code = $species_codes[$current_id];
1186  continue;
1187  }
1188  $genotype_count++;
1189 
1190  if ($type == 'ssrs' and ($val === 0 or $val === "0")) {
1191  $val = "NA";
1192  }
1193 
1194  $variant_name = $headers[$key];
1195  $marker_name = $variant_name . $marker;
1196  $genotype_name = "$marker-$variant_name-$species_code-$val";
1197 
1198  $records['feature'][$marker_name] = array(
1199  'organism_id' => $current_id,
1200  'uniquename' => $marker_name,
1201  'type_id' => $seq_var_cvterm,
1202  );
1203 
1204  $records['feature'][$variant_name] = array(
1205  'organism_id' => $current_id,
1206  'uniquename' => $variant_name,
1207  'type_id' => $seq_var_cvterm,
1208  );
1209 
1210  $records['genotype'][$genotype_name] = array(
1211  'name' => $genotype_name,
1212  'uniquename' => $genotype_name,
1213  'description' => $val,
1214  'type_id' => $type_cvterm,
1215  );
1216 
1217  $records['genotype_call']["$stock_id-$genotype_name"] = array(
1218  'project_id' => $project_id,
1219  'stock_id' => $stock_id,
1220  '#fk' => array(
1221  'genotype' => $genotype_name,
1222  'variant' => $variant_name,
1223  'marker' => $marker_name,
1224  ),
1225  );
1226 
1227  $records['stock_genotype']["$stock_id-$genotype_name"] = array(
1228  'stock_id' => $stock_id,
1229  '#fk' => array(
1230  'genotype' => $genotype_name,
1231  ),
1232  );
1233 
1234  if ($genotype_count >= $record_group) {
1235  tpps_chado_insert_multi($records, $multi_insert_options);
1236  $records = array(
1237  'feature' => array(),
1238  'genotype' => array(),
1239  'genotype_call' => array(),
1240  'stock_genotype' => array(),
1241  );
1242  $genotype_total += $genotype_count;
1243  $genotype_count = 0;
1244  }
1245  }
1246 }
1247 
1264 function tpps_ssrs_headers($fid, $ploidy) {
1265  $headers = tpps_file_headers($fid);
1266  if ($ploidy == 'Haploid') {
1267  return $headers;
1268  }
1269  $row_len = count($headers);
1270  $results = $headers;
1271 
1272  while (($k = array_search(NULL, $results))) {
1273  unset($results[$k]);
1274  }
1275 
1276  $marker_num = 0;
1277  $first = TRUE;
1278  reset($headers);
1279  $num_headers = count($results);
1280  $num_unique_headers = count(array_unique($results));
1281 
1282  foreach ($headers as $key => $val) {
1283  next($headers);
1284  $next_key = key($headers);
1285  if ($first) {
1286  $first = FALSE;
1287  continue;
1288  }
1289 
1290  switch ($ploidy) {
1291  case 'Diploid':
1292  if ($num_headers == ($row_len + 1) / 2) {
1293  // Every other marker column name is left blank.
1294  if (array_key_exists($key, $results)) {
1295  $last = $results[$key];
1296  $results[$key] .= "_A";
1297  break;
1298  }
1299  $results[$key] = $last . "_B";
1300  break;
1301  }
1302 
1303  if ($num_headers == $row_len) {
1304  // All of the marker column names are filled out.
1305  if ($num_headers != $num_unique_headers) {
1306  // The marker column names are duplicates, need to append
1307  // _A and _B.
1308  if ($results[$key] == $results[$next_key]) {
1309  $results[$key] .= "_A";
1310  break;
1311  }
1312  $results[$key] .= "_B";
1313  }
1314  }
1315  break;
1316 
1317  case 'Polyploid':
1318  if ($num_headers == $row_len) {
1319  // All of the marker column names are filled out.
1320  if ($num_unique_headers != $num_headers) {
1321  // The marker column names are duplicates, need to append
1322  // _1, _2, up to X ploidy.
1323  // The total number of headers divided by the number of
1324  // unique headers should be equal to the ploidy.
1325  $ploidy_suffix = ($marker_num % ($num_headers - 1 / $num_unique_headers - 1)) + 1;
1326  $results[$key] .= "_$ploidy_suffix";
1327  }
1328  $marker_num++;
1329  break;
1330  }
1331  $ploidy_suffix = ($marker_num % ($row_len - 1 / $num_headers - 1)) + 1;
1332  if (array_key_exists($key, $results)) {
1333  $last = $results[$key];
1334  $results[$key] .= "_$ploidy_suffix";
1335  }
1336  else {
1337  $results[$key] = "{$last}_$ploidy_suffix";
1338  }
1339  $marker_num++;
1340  break;
1341 
1342  default:
1343  break;
1344  }
1345  }
1346 
1347  return $results;
1348 }
1349 
1365 function tpps_other_marker_headers($fid, array $cols) {
1366  $headers = tpps_file_headers($fid);
1367  $results = array();
1368  foreach ($cols as $col) {
1369  $results[$col] = $headers[$col];
1370  }
1371  return $results;
1372 }
1373 
1386 function tpps_process_environment_layers($row, array &$options = array()) {
1387  $id_col = $options['tree_id'];
1388  $records = &$options['records'];
1389  $tree_info = &$options['tree_info'];
1390  $layers_params = $options['layers_params'];
1391  $env_count = &$options['env_count'];
1392  $accession = $options['accession'];
1393  $suffix = &$options['suffix'];
1394  $env_cvterm = $options['env_cvterm'];
1395  $record_group = variable_get('tpps_record_group', 10000);
1396 
1397  $tree_id = $row[$id_col];
1398  $stock_id = $tree_info[$tree_id]['stock_id'];
1399 
1400  $gps_query = chado_select_record('stockprop', array('value'), array(
1401  'stock_id' => $stock_id,
1402  'type_id' => array(
1403  'name' => 'gps_latitude',
1404  ),
1405  ), array(
1406  'limit' => 1,
1407  ));
1408  $lat = current($gps_query)->value;
1409 
1410  $gps_query = chado_select_record('stockprop', array('value'), array(
1411  'stock_id' => $stock_id,
1412  'type_id' => array(
1413  'name' => 'gps_longitude',
1414  ),
1415  ), array(
1416  'limit' => 1,
1417  ));
1418  $long = current($gps_query)->value;
1419 
1420  foreach ($layers_params as $layer_id => $params) {
1421  $layer_query = db_select('cartogratree_layers', 'l')
1422  ->fields('l', array('title'))
1423  ->condition('layer_id', $layer_id)
1424  ->execute();
1425 
1426  $layer_name = $layer_query->fetchObject()->title;
1427 
1428  foreach ($params as $param_id => $param) {
1429  $param_query = db_select('cartogratree_fields', 'f')
1430  ->fields('f', array('field_name'))
1431  ->condition('field_id', $param_id)
1432  ->execute();
1433 
1434  $param_name = $param_query->fetchObject()->field_name;
1435  $phenotype_name = "$accession-$tree_id-$layer_name-$param_name-$suffix";
1436 
1437  $value = tpps_get_environmental_layer_data($layer_id, $lat, $long, $param_name);
1438  $type = variable_get("tpps_param_{$param_id}_type", 'attr_id');
1439 
1440  if ($type == 'attr_id') {
1441  $records['phenotype'][$phenotype_name] = array(
1442  'uniquename' => $phenotype_name,
1443  'name' => $param_name,
1444  'attr_id' => $env_cvterm,
1445  'value' => $value,
1446  );
1447 
1448  $records['stock_phenotype'][$phenotype_name] = array(
1449  'stock_id' => $stock_id,
1450  '#fk' => array(
1451  'phenotype' => $phenotype_name,
1452  ),
1453  );
1454  }
1455  else {
1456  $records['phenotype'][$phenotype_name] = array(
1457  'uniquename' => $phenotype_name,
1458  'name' => "$param_name",
1459  'value' => "$value",
1460  );
1461 
1462  $records['phenotype_cvterm'][$phenotype_name] = array(
1463  'cvterm_id' => $env_cvterm,
1464  '#fk' => array(
1465  'phenotype' => $phenotype_name,
1466  ),
1467  );
1468 
1469  $records['stock_phenotype'][$phenotype_name] = array(
1470  'stock_id' => $stock_id,
1471  '#fk' => array(
1472  'phenotype' => $phenotype_name,
1473  ),
1474  );
1475  }
1476 
1477  $env_count++;
1478  if ($env_count >= $record_group) {
1479  tpps_chado_insert_multi($records);
1480  $records = array(
1481  'phenotype' => array(),
1482  'phenotype_cvterm' => array(),
1483  'stock_phenotype' => array(),
1484  );
1485  $env_count = 0;
1486  }
1487  }
1488  }
1489  $suffix++;
1490 }
1491 
1504 function tpps_process_environment_manual($row, array &$options = array()) {
1505  $records = &$options['records'];
1506  $accession = $options['accession'];
1507  $id_col = $options['tree_id'];
1508  $suffix = &$options['suffix'];
1509  $env_meta = $options['env_meta'];
1510  $env_count = &$options['env_count'];
1511  $desc_id = $options['desc_id'];
1512  $unit_id = $options['unit_id'];
1513  $env_cvterm = $options['env_cvterm'];
1514  $tree_info = &$options['tree_info'];
1515  $record_group = variable_get('tpps_record_group', 10000);
1516 
1517  $tree_id = $row[$id_col];
1518  foreach ($env_meta as $current_env) {
1519  $name = $current_env['name'];
1520  $desc = $current_env['desc'];
1521  $unit = $current_env['unit'];
1522  $val = $current_env['val'];
1523  $phenotype_name = "$accession-$tree_id-$name-$suffix";
1524 
1525  $records['phenotype'][$phenotype_name] = array(
1526  'uniquename' => $phenotype_name,
1527  'name' => $name,
1528  'attr_id' => $env_cvterm,
1529  'value' => $val,
1530  );
1531 
1532  $records['stock_phenotype'][$phenotype_name] = array(
1533  'stock_id' => $tree_info[$tree_id]['stock_id'],
1534  '#fk' => array(
1535  'phenotype' => $phenotype_name,
1536  ),
1537  );
1538 
1539  $records['phenotypeprop']["$phenotype_name-desc"] = array(
1540  'type_id' => $desc_id,
1541  'value' => $desc,
1542  '#fk' => array(
1543  'phenotype' => $phenotype_name,
1544  ),
1545  );
1546 
1547  $records['phenotypeprop']["$phenotype_name-unit"] = array(
1548  'type_id' => $unit_id,
1549  'value' => $unit,
1550  '#fk' => array(
1551  'phenotype' => $phenotype_name,
1552  ),
1553  );
1554 
1555  $env_count++;
1556  if ($env_count >= $record_group) {
1557  tpps_chado_insert_multi($records);
1558  $records = array(
1559  'phenotype' => array(),
1560  'stock_phenotype' => array(),
1561  'phenotypeprop' => array(),
1562  );
1563  $env_count = 0;
1564  }
1565  }
1566  $suffix++;
1567 }
tpps_process_genotype_spreadsheet($row, array &$options=array())
const TPPS_PAGE_1
Definition: tpps.module:10
tpps_ssrs_headers($fid, $ploidy)
tpps_update_submission(array $state, array $options=array())
tpps_process_phenotype_meta($row, array &$options=array())
tpps_process_environment_layers($row, array &$options=array())
tpps_process_phenotype_data($row, array &$options=array())
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:814
tpps_file_parsing($accession)
tpps_process_environment_manual($row, array &$options=array())
tpps_load_submission($accession, $state=TRUE)
Definition: submissions.inc:27
tpps_other_marker_headers($fid, array $cols)
const TPPS_PAGE_4
Definition: tpps.module:13
tpps_get_environmental_layer_data($layer_id, $lat, $long, $param)
const TPPS_PAGE_3
Definition: tpps.module:12
tpps_file_iterator($fid, $function, array &$options=array())
Definition: file_utils.inc:853
tpps_refine_phenotype_meta(array &$meta)