Tripal Plant PopGen Submit
file_parsing.inc File Reference

Go to the source code of this file.

Functions

 tpps_file_parsing ($accession)
 
 tpps_other_marker_headers ($fid, array $cols)
 
 tpps_process_environment_layers ($row, array &$options=array())
 
 tpps_process_environment_manual ($row, array &$options=array())
 
 tpps_process_genotype_spreadsheet ($row, array &$options=array())
 
 tpps_process_phenotype_data ($row, array &$options=array())
 
 tpps_process_phenotype_meta ($row, array &$options=array())
 
 tpps_refine_phenotype_meta (array &$meta)
 
 tpps_ssrs_headers ($fid, $ploidy)
 

Detailed Description

Defines function to parse genotype and phenotype files from the submission.

Definition in file file_parsing.inc.

Function Documentation

tpps_file_parsing (   $accession)

Parses genotype and phenotype files and creates records in the database.

This function is only called from Tripal Jobs because of the size of genotype and phenotype files and the number of records needed to properly store and integrate the data.

Parameters
string$accessionThe accession number of the completed submission.

Definition at line 20 of file file_parsing.inc.

20  {
21  $form_state = tpps_load_submission($accession);
22 
23  $fourthpage = $form_state['saved_values'][TPPS_PAGE_4];
24  $firstpage = $form_state['saved_values'][TPPS_PAGE_1];
25  $organism_number = $firstpage['organism']['number'];
26  $file_rank = &$form_state['file_rank'];
27  $record_group = variable_get('tpps_record_group', 10000);
28  $project_id = $form_state['ids']['project_id'];
29  $species_codes = array();
30  for ($i = 1; $i <= $organism_number; $i++) {
31  $species_codes[$form_state['ids']['organism_ids'][$i]] = current(chado_select_record('organismprop', array('value'), array(
32  'type_id' => array(
33  'name' => 'organism 4 letter code',
34  ),
35  'organism_id' => $form_state['ids']['organism_ids'][$i],
36  ), array(
37  'limit' => 1,
38  )))->value;
39  }
40 
41  for ($i = 1; $i <= $organism_number; $i++) {
42  $parts = explode(" ", $firstpage['organism'][$i]);
43  $genus = $parts[0];
44  $species = implode(" ", array_slice($parts, 1));
45 
46  if (isset($parts[2]) and ($parts[2] == 'var.' or $parts[2] == 'subsp.')) {
47  $infra = implode(" ", array_slice($parts, 2));
48  }
49  else {
50  $infra = NULL;
51  }
52 
53  if (isset($fourthpage["organism-$i"]['phenotype'])) {
54  $phenotype = $fourthpage["organism-$i"]['phenotype'];
55 
56  // Get appropriate cvterms.
57  $phenotype_cvterms = array(
58  'time' => chado_get_cvterm(array(
59  'name' => 'time',
60  'is_obsolete' => 0,
61  ))->cvterm_id,
62  'desc' => chado_get_cvterm(array(
63  'name' => 'description',
64  'cv_id' => array(
65  'name' => 'schema',
66  ),
67  'is_obsolete' => 0,
68  ))->cvterm_id,
69  'unit' => chado_get_cvterm(array(
70  'name' => 'unit',
71  'cv_id' => array(
72  'name' => 'uo',
73  ),
74  'is_obsolete' => 0,
75  ))->cvterm_id,
76  'min' => chado_get_cvterm(array(
77  'cv_id' => array(
78  'name' => 'ncit',
79  ),
80  'name' => 'Minimum',
81  'is_obsolete' => 0,
82  ))->cvterm_id,
83  'max' => chado_get_cvterm(array(
84  'cv_id' => array(
85  'name' => 'ncit',
86  ),
87  'name' => 'Maximum',
88  'is_obsolete' => 0,
89  ))->cvterm_id,
90  );
91 
92  $records = array(
93  'phenotype' => array(),
94  'phenotypeprop' => array(),
95  'stock_phenotype' => array(),
96  );
97  $phenotype_count = 0;
98 
99  $options = array(
100  'records' => $records,
101  'cvterms' => $phenotype_cvterms,
102  'accession' => $form_state['accession'],
103  'tree_info' => $form_state['tree_info'],
104  'suffix' => 0,
105  'phenotype_count' => $phenotype_count,
106  );
107 
108  if (empty($phenotype['iso-check'])) {
109  $phenotype_number = $phenotype['phenotypes-meta']['number'];
110  $phenotypes_meta = array();
111 
112  tpps_chado_insert_record('projectprop', array(
113  'project_id' => $project_id,
114  'type_id' => array(
115  'cv_id' => array(
116  'name' => 'schema',
117  ),
118  'name' => 'url',
119  'is_obsolete' => 0,
120  ),
121  'value' => file_create_url(file_load($phenotype['file'])->uri),
122  'rank' => $file_rank,
123  ));
124  $file_rank++;
125 
126  // Populate $phenotypes_meta with manually entered metadata.
127  for ($j = 1; $j <= $phenotype_number; $j++) {
128  $name = strtolower($phenotype['phenotypes-meta'][$j]['name']);
129  $phenotypes_meta[$name] = array();
130  $phenotypes_meta[$name]['attr'] = $phenotype['phenotypes-meta'][$j]['attribute'];
131  $phenotypes_meta[$name]['desc'] = $phenotype['phenotypes-meta'][$j]['description'];
132  $phenotypes_meta[$name]['unit'] = $phenotype['phenotypes-meta'][$j]['units'];
133  if ($phenotype['phenotypes-meta'][$j]['struct-check'] == '1') {
134  $phenotypes_meta[$name]['struct'] = $phenotype['phenotypes-meta'][$j]['structure'];
135  }
136  if (!empty($phenotype['phenotypes-meta'][$j]['val-check']) or !empty($phenotype['phenotypes-meta'][$j]['bin-check'])) {
137  $phenotypes_meta[$name]['min'] = $phenotype['phenotypes-meta'][$j]['min'];
138  $phenotypes_meta[$name]['max'] = $phenotype['phenotypes-meta'][$j]['max'];
139  }
140  if ($phenotype['phenotypes-meta'][$j]['time-check'] == '1') {
141  $phenotypes_meta[$name]['time'] = $phenotype['phenotypes-meta'][$j]['time'];
142  }
143  }
144 
145  if ($phenotype['check'] == '1') {
146  $phenotype_meta_file_id = tpps_chado_insert_record('projectprop', array(
147  'project_id' => $project_id,
148  'type_id' => array(
149  'cv_id' => array(
150  'name' => 'schema',
151  ),
152  'name' => 'url',
153  'is_obsolete' => 0,
154  ),
155  'value' => file_create_url(file_load($phenotype['metadata'])->uri),
156  'rank' => $file_rank,
157  ));
158  $file_rank++;
159 
160  // Get metadata column values.
161  $groups = $phenotype['metadata-groups'];
162  $column_vals = $phenotype['metadata-columns'];
163  $struct = array_search('5', $column_vals);
164  $min = array_search('6', $column_vals);
165  $max = array_search('7', $column_vals);
166  $columns = array(
167  'name' => $groups['Phenotype Id']['1'],
168  'attr' => $groups['Attribute']['2'],
169  'desc' => $groups['Description']['3'],
170  'unit' => $groups['Units']['4'],
171  'struct' => !empty($struct) ? $struct : NULL,
172  'min' => !empty($min) ? $min : NULL,
173  'max' => !empty($max) ? $max : NULL,
174  );
175 
176  $meta_options = array(
177  'no_header' => $phenotype['metadata-no-header'],
178  'meta_columns' => $columns,
179  'meta' => &$phenotypes_meta,
180  );
181 
182  tpps_file_iterator($phenotype['metadata'], 'tpps_process_phenotype_meta', $meta_options);
183  }
184 
185  tpps_refine_phenotype_meta($phenotypes_meta);
186 
187  // Get metadata header values.
188  $groups = $phenotype['file-groups'];
189  $column_vals = $phenotype['file-columns'];
190  $time_index = ($phenotype['format'] == 0) ? '2' : '4';
191  $clone_index = ($phenotype['format'] == 0) ? '3' : '5';
192  $time = array_search($time_index, $column_vals);
193  $clone = array_search($clone_index, $column_vals);
194  $meta_headers = array(
195  'name' => $groups['Phenotype Name/Identifier']['2'] ?? NULL,
196  'value' => $groups['Phenotype Value(s)']['3'] ?? NULL,
197  'time' => !empty($time) ? $time : NULL,
198  'clone' => !empty($clone) ? $clone : NULL,
199  );
200 
201  // Get data header values.
202  if ($phenotype['format'] == 0) {
203  $file_headers = tpps_file_headers($phenotype['file'], $phenotype['file-no-header']);
204  $data_columns = array();
205  foreach ($groups['Phenotype Data']['0'] as $col) {
206  $data_columns[$col] = $file_headers[$col];
207  }
208  unset($file_headers);
209  }
210 
211  $options['no_header'] = $phenotype['file-no-header'];
212  $options['tree_id'] = $groups['Tree Identifier']['1'];
213  $options['meta_headers'] = $meta_headers;
214  $options['data_columns'] = $data_columns ?? NULL;
215  $options['meta'] = $phenotypes_meta;
216  $options['file_empty'] = $phenotype['file-empty'];
217 
218  tpps_file_iterator($phenotype['file'], 'tpps_process_phenotype_data', $options);
219  }
220  else {
221  tpps_chado_insert_record('projectprop', array(
222  'project_id' => $project_id,
223  'type_id' => array(
224  'cv_id' => array(
225  'name' => 'schema',
226  ),
227  'name' => 'url',
228  'is_obsolete' => 0,
229  ),
230  'value' => file_create_url(file_load($phenotype['iso'])->uri),
231  'rank' => $file_rank,
232  ));
233  $file_rank++;
234 
235  $options['iso'] = TRUE;
236  $options['records'] = $records;
237  $options['cvterms'] = $phenotype_cvterms;
238  $options['file_headers'] = tpps_file_headers($phenotype['iso']);
239  $options['meta'] = array(
240  'desc' => "Mass Spectrometry",
241  'unit' => "intensity (arbitrary units)",
242  'attr_id' => chado_get_cvterm(array(
243  'name' => 'intensity',
244  'is_obsolete' => 0,
245  ))->cvterm_id,
246  );
247 
248  tpps_file_iterator($phenotype['iso'], 'tpps_process_phenotype_data', $options);
249  }
250  tpps_chado_insert_multi($options['records']);
251  unset($options['records']);
252  }
253 
254  if (isset($fourthpage["organism-$i"]['genotype'])) {
255  $genotype = $fourthpage["organism-$i"]['genotype'];
256  $genotype_count = 0;
257  $genotype_total = 0;
258  $seq_var_cvterm = chado_get_cvterm(array(
259  'cv_id' => array(
260  'name' => 'sequence',
261  ),
262  'name' => 'sequence_variant',
263  'is_obsolete' => 0,
264  ))->cvterm_id;
265  $overrides = array(
266  'genotype_call' => array(
267  'variant' => array(
268  'table' => 'feature',
269  'columns' => array(
270  'variant_id' => 'feature_id',
271  ),
272  ),
273  'marker' => array(
274  'table' => 'feature',
275  'columns' => array(
276  'marker_id' => 'feature_id',
277  ),
278  ),
279  ),
280  );
281 
282  $records = array(
283  'feature' => array(),
284  'genotype' => array(),
285  'genotype_call' => array(),
286  'stock_genotype' => array(),
287  );
288 
289  $multi_insert_options = array(
290  'fk_overrides' => $overrides,
291  'entities' => array(
292  'label' => 'Genotype',
293  'table' => 'genotype',
294  ),
295  );
296 
297  $options = array(
298  'records' => $records,
299  'tree_info' => $form_state['tree_info'],
300  'species_codes' => $species_codes,
301  'genotype_count' => $genotype_count,
302  'genotype_total' => &$genotype_total,
303  'project_id' => $project_id,
304  'seq_var_cvterm' => $seq_var_cvterm,
305  'overrides' => $overrides,
306  'multi_insert' => $multi_insert_options,
307  );
308 
309  /*if ($genotype['ref-genome'] == 'bio') {
310 
311  $bioproject_id = tpps_chado_insert_record('dbxref', array(
312  'db_id' => array(
313  'name' => 'NCBI BioProject',
314  ),
315  'accession' => $genotype['BioProject-id'],
316  ));
317 
318  $project_dbxref_id = tpps_chado_insert_record('project_dbxref', array(
319  'project_id' => $project_id,
320  'dbxref_id' => $bioproject_id,
321  ));
322 
323  $bioproject_assembly_file_ids = array();
324  foreach ($genotype['assembly-auto'] as $key => $val) {
325  if ($val == '1') {
326  array_push($bioproject_assembly_file_ids, tpps_chado_insert_record('projectprop', array(
327  'project_id' => $project_id,
328  'type_id' => array(
329  'cv_id' => array(
330  'name' => 'schema',
331  ),
332  'name' => 'url',
333  'is_obsolete' => 0,
334  ),
335  'value' => "https://www.ncbi.nlm.nih.gov/nuccore/$key",
336  'rank' => $file_rank,
337  )));
338  $file_rank++;
339  }
340  }
341  }
342  else*/
343  if ($genotype['ref-genome'] == 'manual' or $genotype['ref-genome'] == 'manual2' or $genotype['ref-genome'] == 'url') {
344  if ($genotype['tripal_fasta']['file_upload']) {
345  // Uploaded new file.
346  $assembly_user = $genotype['tripal_fasta']['file_upload'];
347  $assembly_user_id = tpps_chado_insert_record('projectprop', array(
348  'project_id' => $project_id,
349  'type_id' => array(
350  'cv_id' => array(
351  'name' => 'schema',
352  ),
353  'name' => 'url',
354  'is_obsolete' => 0,
355  ),
356  'value' => file_create_url(file_load($assembly_user)->uri),
357  'rank' => $file_rank,
358  ));
359  $file_rank++;
360  }
361  if ($genotype['tripal_fasta']['file_upload_existing']) {
362  // Uploaded existing file.
363  $assembly_user = $genotype['tripal_fasta']['file_upload_existing'];
364  $assembly_user_id = tpps_chado_insert_record('projectprop', array(
365  'project_id' => $project_id,
366  'type_id' => array(
367  'cv_id' => array(
368  'name' => 'schema',
369  ),
370  'name' => 'url',
371  'is_obsolete' => 0,
372  ),
373  'value' => file_create_url(file_load($assembly_user)->uri),
374  'rank' => $file_rank,
375  ));
376  $file_rank++;
377  }
378  if ($genotype['tripal_fasta']['file_remote']) {
379  // Provided url to file.
380  $assembly_user = $genotype['tripal_fasta']['file_remote'];
381  $assembly_user_id = tpps_chado_insert_record('projectprop', array(
382  'project_id' => $project_id,
383  'type_id' => array(
384  'cv_id' => array(
385  'name' => 'schema',
386  ),
387  'name' => 'url',
388  'is_obsolete' => 0,
389  ),
390  'value' => $assembly_user,
391  'rank' => $file_rank,
392  ));
393  $file_rank++;
394  }
395  }
396  elseif ($genotype['ref-genome'] != 'none') {
397  $reference_genome_id = tpps_chado_insert_record('projectprop', array(
398  'project_id' => $project_id,
399  'type_id' => array(
400  'cv_id' => array(
401  'name' => 'sequence',
402  ),
403  'name' => 'reference_genome',
404  'is_obsolete' => 0,
405  ),
406  'value' => $genotype['ref-genome'],
407  ));
408  }
409 
410  if (!empty($genotype['files']['file-type']['SNPs Genotype Assay'])) {
411  tpps_chado_insert_record('projectprop', array(
412  'project_id' => $project_id,
413  'type_id' => array(
414  'cv_id' => array(
415  'name' => 'schema',
416  ),
417  'name' => 'url',
418  'is_obsolete' => 0,
419  ),
420  'value' => file_create_url(file_load($genotype['files']['snps-assay'])->uri),
421  'rank' => $file_rank,
422  ));
423  $file_rank++;
424 
425  $options['type'] = 'snp';
426  $options['headers'] = tpps_file_headers($genotype['files']['snps-assay']);
427  $options['marker'] = 'SNP';
428  $options['type_cvterm'] = chado_get_cvterm(array(
429  'cv_id' => array(
430  'name' => 'sequence',
431  ),
432  'name' => 'SNP',
433  'is_obsolete' => 0,
434  ))->cvterm_id;
435 
436  tpps_file_iterator($genotype['files']['snps-assay'], 'tpps_process_genotype_spreadsheet', $options);
437 
438  tpps_chado_insert_multi($options['records'], $multi_insert_options);
439  unset($options['records']);
440  $genotype_total += $genotype_count;
441  $genotype_count = 0;
442  }
443 
444  if (!empty($genotype['files']['file-type']['Assay Design']) and $genotype['marker-type']['SNPs']) {
445  tpps_chado_insert_record('projectprop', array(
446  'project_id' => $project_id,
447  'type_id' => array(
448  'cv_id' => array(
449  'name' => 'schema',
450  ),
451  'name' => 'url',
452  'is_obsolete' => 0,
453  ),
454  'value' => file_create_url(file_load($genotype['files']['assay-design'])->uri),
455  'rank' => $file_rank,
456  ));
457  $file_rank++;
458  }
459 
460  if (!empty($genotype['files']['file-type']['SSRs/cpSSRs Genotype Spreadsheet'])) {
461  tpps_chado_insert_record('projectprop', array(
462  'project_id' => $project_id,
463  'type_id' => array(
464  'cv_id' => array(
465  'name' => 'schema',
466  ),
467  'name' => 'url',
468  'is_obsolete' => 0,
469  ),
470  'value' => file_create_url(file_load($genotype['files']['ssrs'])->uri),
471  'rank' => $file_rank,
472  ));
473  $file_rank++;
474 
475  $options['type'] = 'ssrs';
476  $options['headers'] = tpps_ssrs_headers($genotype['files']['ssrs'], $genotype['files']['ploidy']);
477  $options['marker'] = $genotype['SSRs/cpSSRs'];
478  $options['type_cvterm'] = chado_get_cvterm(array(
479  'cv_id' => array(
480  'name' => 'sequence',
481  ),
482  'name' => 'microsatellite',
483  'is_obsolete' => 0,
484  ))->cvterm_id;
485 
486  tpps_file_iterator($genotype['files']['ssrs'], 'tpps_process_genotype_spreadsheet', $options);
487 
488  tpps_chado_insert_multi($options['records'], $multi_insert_options);
489  unset($options['records']);
490  $genotype_count = 0;
491  }
492 
493  if (!empty($genotype['files']['file-type']['Other Marker Genotype Spreadsheet'])) {
494  tpps_chado_insert_record('projectprop', array(
495  'project_id' => $project_id,
496  'type_id' => array(
497  'cv_id' => array(
498  'name' => 'schema',
499  ),
500  'name' => 'url',
501  'is_obsolete' => 0,
502  ),
503  'value' => file_create_url(file_load($genotype['files']['other'])->uri),
504  'rank' => $file_rank,
505  ));
506  $file_rank++;
507 
508  $groups = $genotype['files']['other-groups'];
509 
510  $options['type'] = 'other';
511  $options['headers'] = tpps_other_marker_headers($genotype['files']['other'], $groups['Genotype Data'][0]);
512  $options['marker'] = $genotype['other-marker'];
513  $options['type_cvterm'] = chado_get_cvterm(array(
514  'cv_id' => array(
515  'name' => 'sequence',
516  ),
517  'name' => 'genetic_marker',
518  'is_obsolete' => 0,
519  ))->cvterm_id;
520  $options['tree_id'] = $groups['Tree Id'][1];
521 
522  tpps_file_iterator($genotype['files']['other'], 'tpps_process_genotype_spreadsheet', $options);
523 
524  tpps_chado_insert_multi($options['records'], $multi_insert_options);
525  unset($options['records']);
526  $genotype_count = 0;
527  }
528 
529  if (!empty($genotype['files']['file-type']['VCF'])) {
530  // TODO: we probably want to use tpps_file_iterator to parse vcf files.
531 
532  $genotype_vcf_id = tpps_chado_insert_record('projectprop', array(
533  'project_id' => $project_id,
534  'type_id' => array(
535  'cv_id' => array(
536  'name' => 'schema',
537  ),
538  'name' => 'url',
539  'is_obsolete' => 0,
540  ),
541  'value' => file_create_url(file_load($genotype['files']['vcf'])->uri),
542  'rank' => $file_rank,
543  ));
544  $file_rank++;
545 
546  $marker = 'SNP';
547 
548  $records['genotypeprop'] = array();
549 
550  $snp_cvterm = chado_get_cvterm(array(
551  'cv_id' => array(
552  'name' => 'sequence',
553  ),
554  'name' => 'SNP',
555  'is_obsolete' => 0,
556  ))->cvterm_id;
557  $format_cvterm = chado_get_cvterm(array(
558  'cv_id' => array(
559  'name' => 'sep',
560  ),
561  'name' => 'format',
562  'is_obsolete' => 0,
563  ))->cvterm_id;
564  $qual_cvterm = chado_get_cvterm(array(
565  'cv_id' => array(
566  'name' => 'sequence',
567  ),
568  'name' => 'quality_value',
569  'is_obsolete' => 0,
570  ))->cvterm_id;
571  $filter_cvterm = chado_get_cvterm(array(
572  'cv_id' => array(
573  'name' => 'operation',
574  ),
575  'name' => 'Sequence contamination filtering',
576  'is_obsolete' => 0,
577  ))->cvterm_id;
578  $freq_cvterm = chado_get_cvterm(array(
579  'cv_id' => array(
580  'name' => 'sequence',
581  ),
582  'name' => 'allelic_frequency',
583  'is_obsolete' => 0,
584  ))->cvterm_id;
585  $depth_cvterm = chado_get_cvterm(array(
586  'cv_id' => array(
587  'name' => 'ncit',
588  ),
589  'name' => 'Read Depth',
590  'is_obsolete' => 0,
591  ))->cvterm_id;
592  $n_sample_cvterm = chado_get_cvterm(array(
593  'name' => 'number_samples',
594  'is_obsolete' => 0,
595  ))->cvterm_id;
596 
597  $vcf_file = file_load($genotype['files']['vcf']);
598  $location = drupal_realpath($vcf_file->uri);
599  $vcf_content = fopen($location, 'r');
600  $stocks = array();
601  $format = "";
602  $current_id = $form_state['ids']['organism_ids'][$i];
603  $species_code = $species_codes[$current_id];
604 
605  // dpm('start: ' . date('r'));.
606  while (($vcf_line = fgets($vcf_content)) !== FALSE) {
607  if ($vcf_line[0] != '#') {
608  $genotype_count++;
609  $vcf_line = explode("\t", $vcf_line);
610  $scaffold_id = &$vcf_line[0];
611  $position = &$vcf_line[1];
612  $marker_name = &$vcf_line[2];
613  $ref = &$vcf_line[3];
614  $alt = &$vcf_line[4];
615  $qual = &$vcf_line[5];
616  $filter = &$vcf_line[6];
617  $info = &$vcf_line[7];
618 
619  if (empty($variant_name) or $variant_name == '.') {
620  $variant_name = "{$scaffold_id}{$position}$ref:$alt";
621  }
622  $marker_name = $variant_name . $marker;
623  $description = "$ref:$alt";
624  $genotype_name = "$marker-$species_code-$scaffold_id-$position";
625  $genotype_desc = "$marker-$species_code-$scaffold_id-$position-$description";
626 
627  $records['feature'][$marker_name] = array(
628  'organism_id' => $current_id,
629  'uniquename' => $marker_name,
630  'type_id' => $seq_var_cvterm,
631  );
632 
633  $records['feature'][$variant_name] = array(
634  'organism_id' => $current_id,
635  'uniquename' => $variant_name,
636  'type_id' => $seq_var_cvterm,
637  );
638 
639  $records['genotype'][$genotype_desc] = array(
640  'name' => $genotype_name,
641  'uniquename' => $genotype_desc,
642  'description' => $description,
643  'type_id' => $snp_cvterm,
644  );
645 
646  if ($format != "") {
647  $records['genotypeprop']["$genotype_desc-format"] = array(
648  'type_id' => $format_cvterm,
649  'value' => $format,
650  '#fk' => array(
651  'genotype' => $genotype_desc,
652  ),
653  );
654  }
655 
656  for ($j = 9; $j < count($vcf_line); $j++) {
657  $records['genotype_call']["{$stocks[$j - 9]}-$genotype_name"] = array(
658  'project_id' => $project_id,
659  'stock_id' => $stocks[$j - 9],
660  '#fk' => array(
661  'genotype' => $genotype_desc,
662  'variant' => $variant_name,
663  'marker' => $marker_name,
664  ),
665  );
666 
667  $records['stock_genotype']["{$stocks[$j - 9]}-$genotype_name"] = array(
668  'stock_id' => $stocks[$j - 9],
669  '#fk' => array(
670  'genotype' => $genotype_desc,
671  ),
672  );
673  }
674 
675  // Quality score.
676  $records['genotypeprop']["$genotype_desc-qual"] = array(
677  'type_id' => $qual_cvterm,
678  'value' => $qual,
679  '#fk' => array(
680  'genotype' => $genotype_desc,
681  ),
682  );
683 
684  // filter: pass/fail.
685  $records['genotypeprop']["$genotype_desc-filter"] = array(
686  'type_id' => $filter_cvterm,
687  'value' => ($filter == '.') ? "P" : "NP",
688  '#fk' => array(
689  'genotype' => $genotype_desc,
690  ),
691  );
692 
693  // Break up info column.
694  $info_vals = explode(";", $info);
695  foreach ($info_vals as $key => $val) {
696  $parts = explode("=", $val);
697  unset($info_vals[$key]);
698  $info_vals[$parts[0]] = isset($parts[1]) ? $parts[1] : '';
699  }
700 
701  // Allele frequency, assuming that the info code for allele
702  // frequency is 'AF'.
703  if (isset($info_vals['AF']) and $info_vals['AF'] != '') {
704  $records['genotypeprop']["$genotype_desc-freq"] = array(
705  'type_id' => $freq_cvterm,
706  'value' => $info_vals['AF'],
707  '#fk' => array(
708  'genotype' => $genotype_desc,
709  ),
710  );
711  }
712 
713  // Depth coverage, assuming that the info code for depth coverage is
714  // 'DP'.
715  if (isset($info_vals['DP']) and $info_vals['DP'] != '') {
716  $records['genotypeprop']["$genotype_desc-depth"] = array(
717  'type_id' => $depth_cvterm,
718  'value' => $info_vals['DP'],
719  '#fk' => array(
720  'genotype' => $genotype_desc,
721  ),
722  );
723  }
724 
725  // Number of samples, assuming that the info code for number of
726  // samples is 'NS'.
727  if (isset($info_vals['NS']) and $info_vals['NS'] != '') {
728  $records['genotypeprop']["$genotype_desc-n_sample"] = array(
729  'type_id' => $n_sample_cvterm,
730  'value' => $info_vals['NS'],
731  '#fk' => array(
732  'genotype' => $genotype_desc,
733  ),
734  );
735  }
736  // Tripal Job has issues when all submissions are made at the same
737  // time, so break them up into groups of 10,000 genotypes along with
738  // their relevant genotypeprops.
739  if ($genotype_count > $record_group) {
740  $genotype_count = 0;
741  tpps_chado_insert_multi($records, $multi_insert_options);
742  $records = array(
743  'feature' => array(),
744  'genotype' => array(),
745  'genotype_call' => array(),
746  'genotypeprop' => array(),
747  'stock_genotype' => array(),
748  );
749  $genotype_count = 0;
750  }
751  }
752  elseif (preg_match('/##FORMAT=/', $vcf_line)) {
753  $format .= substr($vcf_line, 9, -1);
754  }
755  elseif (preg_match('/#CHROM/', $vcf_line)) {
756  $vcf_line = explode("\t", $vcf_line);
757  for ($j = 9; $j < count($vcf_line); $j++) {
758  $stocks[] = $form_state['tree_info'][trim($vcf_line[$j])]['stock_id'];
759  }
760  }
761  }
762  // Insert the last set of values.
763  tpps_chado_insert_multi($records, $multi_insert_options);
764  unset($records);
765  $genotype_count = 0;
766  // dpm('done: ' . date('r'));.
767  }
768  }
769 
770  if (isset($fourthpage["organism-$i"]['environment'])) {
771  $environment = $fourthpage["organism-$i"]['environment'];
772  $env_layers_check = isset($environment['use_layers']) ? $environment['use_layers'] : FALSE;
773  $env_layers = isset($environment['env_layers']) ? $environment['env_layers'] : FALSE;
774  $env_params = isset($environment['env_params']) ? $environment['env_params'] : FALSE;
775  $env_number = $environment['env_manual']['number'];
776  $env_count = 0;
777 
778  $species_index = "species-$i";
779  if (empty($form_state['saved_values'][TPPS_PAGE_3]['tree-accession']['check'])) {
780  $species_index = "species-1";
781  }
782  $tree_accession = $form_state['saved_values'][TPPS_PAGE_3]['tree-accession'][$species_index];
783  $id_col = $tree_accession['file-groups']['Tree Id'][1];
784 
785  $env_cvterm = chado_get_cvterm(array(
786  'cv_id' => array(
787  'name' => 'biomaterial_property',
788  ),
789  'name' => 'climate_environment',
790  'is_obsolete' => 0,
791  ))->cvterm_id;
792 
793  if ($env_layers_check and db_table_exists('cartogratree_layers') and db_table_exists('cartogratree_fields')) {
794  $layers_params = array();
795  $records = array(
796  'phenotype' => array(),
797  'phenotype_cvterm' => array(),
798  'stock_phenotype' => array(),
799  );
800 
801  foreach ($env_layers as $layer_name => $layer_id) {
802  if (!empty($layer_id) and !empty($env_params[$layer_name])) {
803  $layers_params[$layer_id] = array();
804  $params = $env_params[$layer_name];
805  foreach ($params as $param_name => $param_id) {
806  if (!empty($param_id)) {
807  $layers_params[$layer_id][$param_id] = $param_name;
808  }
809  }
810  }
811  elseif (!empty($layer_id) and preg_match('/worldclim_subgroup_(.+)/', $layer_id, $matches)) {
812  $subgroup_id = $matches[1];
813  $layers = db_select('cartogratree_layers', 'l')
814  ->fields('l', array('layer_id'))
815  ->condition('subgroup_id', $subgroup_id)
816  ->execute();
817  while (($layer = $layers->fetchObject())) {
818  $params = db_select('cartogratree_fields', 'f')
819  ->fields('f', array('field_id', 'display_name'))
820  ->condition('layer_id', $layer->layer_id)
821  ->execute();
822  while (($param = $params->fetchObject())) {
823  $layers_params[$layer->layer_id][$param->field_id] = $param->display_name;
824  }
825  }
826  }
827  }
828 
829  $options = array(
830  'no_header' => !empty($tree_accession['file-no-header']),
831  'records' => $records,
832  'tree_id' => $tree_accession['file-groups']['Tree Id'][1],
833  'accession' => $form_state['accession'],
834  'tree_info' => $form_state['tree_info'],
835  'layers_params' => $layers_params,
836  'env_count' => &$env_count,
837  'env_cvterm' => $env_cvterm,
838  'suffix' => 0,
839  );
840 
841  tpps_file_iterator($tree_accession['file'], 'tpps_process_environment_layers', $options);
842 
843  tpps_chado_insert_multi($options['records']);
844  unset($options['records']);
845  $env_count = 0;
846  }
847 
848  $env_meta = array();
849 
850  for ($j = 1; $j <= $env_number; $j++) {
851  $current_env = $environment['env_manual'][$j];
852  $env_meta[] = array(
853  'name' => $current_env['name'],
854  'desc' => $current_env['description'],
855  'unit' => $current_env['units'],
856  'val' => $current_env['value'],
857  );
858  }
859 
860  $records = array(
861  'phenotype' => array(),
862  'stock_phenotype' => array(),
863  'phenotypeprop' => array(),
864  );
865 
866  $options = array(
867  'no_header' => !empty($tree_accession['file-no-header']),
868  'accession' => $form_state['accession'],
869  'records' => $records,
870  'env_meta' => $env_meta,
871  'env_count' => $env_count,
872  'suffix' => 0,
873  'tree_info' => $form_state['tree_info'],
874  'tree_id' => $tree_accession['file-groups']['Tree Id'][1],
875  'env_cvterm' => $env_cvterm,
876  'desc_id' => chado_get_cvterm(array(
877  'name' => 'description',
878  'cv_id' => array(
879  'name' => 'schema',
880  ),
881  'is_obsolete' => 0,
882  ))->cvterm_id,
883  'unit_id' => chado_get_cvterm(array(
884  'name' => 'unit',
885  'cv_id' => array(
886  'name' => 'uo',
887  ),
888  'is_obsolete' => 0,
889  ))->cvterm_id,
890  );
891 
892  tpps_file_iterator($tree_accession['file'], 'tpps_process_environment_manual', $options);
893 
894  tpps_chado_insert_multi($options['records']);
895  unset($options['records']);
896  }
897  }
898 
899  tpps_update_submission($form_state);
900 }
const TPPS_PAGE_1
Definition: tpps.module:10
tpps_ssrs_headers($fid, $ploidy)
tpps_update_submission(array $state, array $options=array())
tpps_chado_insert_record($table, $records, array $options=array())
Definition: chado_utils.inc:27
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:814
tpps_load_submission($accession, $state=TRUE)
Definition: submissions.inc:27
tpps_other_marker_headers($fid, array $cols)
const TPPS_PAGE_4
Definition: tpps.module:13
const TPPS_PAGE_3
Definition: tpps.module:12
tpps_file_iterator($fid, $function, array &$options=array())
Definition: file_utils.inc:853
tpps_refine_phenotype_meta(array &$meta)
tpps_other_marker_headers (   $fid,
array  $cols 
)

This function formats headers for the "other" type genotype markers.

The headers for the "other" genotype marker types are set by the users, so we need to return the names of the headers they have indicated, rather than the values provided in the file-groups array.

Parameters
int$fidThe Drupal managed file id of the file.
array$colsAn array of columns indicating which of the columns contain genotype data.
Returns
array The array of standardized headers for the spreadsheet.

Definition at line 1365 of file file_parsing.inc.

1365  {
1366  $headers = tpps_file_headers($fid);
1367  $results = array();
1368  foreach ($cols as $col) {
1369  $results[$col] = $headers[$col];
1370  }
1371  return $results;
1372 }
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:814
tpps_process_environment_layers (   $row,
array &  $options = array() 
)

This function processes a single row of a tree accession file.

This function populates the db with environmental data provided through CartograTree layers. This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 1386 of file file_parsing.inc.

1386  {
1387  $id_col = $options['tree_id'];
1388  $records = &$options['records'];
1389  $tree_info = &$options['tree_info'];
1390  $layers_params = $options['layers_params'];
1391  $env_count = &$options['env_count'];
1392  $accession = $options['accession'];
1393  $suffix = &$options['suffix'];
1394  $env_cvterm = $options['env_cvterm'];
1395  $record_group = variable_get('tpps_record_group', 10000);
1396 
1397  $tree_id = $row[$id_col];
1398  $stock_id = $tree_info[$tree_id]['stock_id'];
1399 
1400  $gps_query = chado_select_record('stockprop', array('value'), array(
1401  'stock_id' => $stock_id,
1402  'type_id' => array(
1403  'name' => 'gps_latitude',
1404  ),
1405  ), array(
1406  'limit' => 1,
1407  ));
1408  $lat = current($gps_query)->value;
1409 
1410  $gps_query = chado_select_record('stockprop', array('value'), array(
1411  'stock_id' => $stock_id,
1412  'type_id' => array(
1413  'name' => 'gps_longitude',
1414  ),
1415  ), array(
1416  'limit' => 1,
1417  ));
1418  $long = current($gps_query)->value;
1419 
1420  foreach ($layers_params as $layer_id => $params) {
1421  $layer_query = db_select('cartogratree_layers', 'l')
1422  ->fields('l', array('title'))
1423  ->condition('layer_id', $layer_id)
1424  ->execute();
1425 
1426  $layer_name = $layer_query->fetchObject()->title;
1427 
1428  foreach ($params as $param_id => $param) {
1429  $param_query = db_select('cartogratree_fields', 'f')
1430  ->fields('f', array('field_name'))
1431  ->condition('field_id', $param_id)
1432  ->execute();
1433 
1434  $param_name = $param_query->fetchObject()->field_name;
1435  $phenotype_name = "$accession-$tree_id-$layer_name-$param_name-$suffix";
1436 
1437  $value = tpps_get_environmental_layer_data($layer_id, $lat, $long, $param_name);
1438  $type = variable_get("tpps_param_{$param_id}_type", 'attr_id');
1439 
1440  if ($type == 'attr_id') {
1441  $records['phenotype'][$phenotype_name] = array(
1442  'uniquename' => $phenotype_name,
1443  'name' => $param_name,
1444  'attr_id' => $env_cvterm,
1445  'value' => $value,
1446  );
1447 
1448  $records['stock_phenotype'][$phenotype_name] = array(
1449  'stock_id' => $stock_id,
1450  '#fk' => array(
1451  'phenotype' => $phenotype_name,
1452  ),
1453  );
1454  }
1455  else {
1456  $records['phenotype'][$phenotype_name] = array(
1457  'uniquename' => $phenotype_name,
1458  'name' => "$param_name",
1459  'value' => "$value",
1460  );
1461 
1462  $records['phenotype_cvterm'][$phenotype_name] = array(
1463  'cvterm_id' => $env_cvterm,
1464  '#fk' => array(
1465  'phenotype' => $phenotype_name,
1466  ),
1467  );
1468 
1469  $records['stock_phenotype'][$phenotype_name] = array(
1470  'stock_id' => $stock_id,
1471  '#fk' => array(
1472  'phenotype' => $phenotype_name,
1473  ),
1474  );
1475  }
1476 
1477  $env_count++;
1478  if ($env_count >= $record_group) {
1479  tpps_chado_insert_multi($records);
1480  $records = array(
1481  'phenotype' => array(),
1482  'phenotype_cvterm' => array(),
1483  'stock_phenotype' => array(),
1484  );
1485  $env_count = 0;
1486  }
1487  }
1488  }
1489  $suffix++;
1490 }
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_get_environmental_layer_data($layer_id, $lat, $long, $param)
tpps_process_environment_manual (   $row,
array &  $options = array() 
)

This function processes a single row of a tree accession file.

This function populates the db with environmental data provided manually by the user (as opposed to CartograTree layers). This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 1504 of file file_parsing.inc.

1504  {
1505  $records = &$options['records'];
1506  $accession = $options['accession'];
1507  $id_col = $options['tree_id'];
1508  $suffix = &$options['suffix'];
1509  $env_meta = $options['env_meta'];
1510  $env_count = &$options['env_count'];
1511  $desc_id = $options['desc_id'];
1512  $unit_id = $options['unit_id'];
1513  $env_cvterm = $options['env_cvterm'];
1514  $tree_info = &$options['tree_info'];
1515  $record_group = variable_get('tpps_record_group', 10000);
1516 
1517  $tree_id = $row[$id_col];
1518  foreach ($env_meta as $current_env) {
1519  $name = $current_env['name'];
1520  $desc = $current_env['desc'];
1521  $unit = $current_env['unit'];
1522  $val = $current_env['val'];
1523  $phenotype_name = "$accession-$tree_id-$name-$suffix";
1524 
1525  $records['phenotype'][$phenotype_name] = array(
1526  'uniquename' => $phenotype_name,
1527  'name' => $name,
1528  'attr_id' => $env_cvterm,
1529  'value' => $val,
1530  );
1531 
1532  $records['stock_phenotype'][$phenotype_name] = array(
1533  'stock_id' => $tree_info[$tree_id]['stock_id'],
1534  '#fk' => array(
1535  'phenotype' => $phenotype_name,
1536  ),
1537  );
1538 
1539  $records['phenotypeprop']["$phenotype_name-desc"] = array(
1540  'type_id' => $desc_id,
1541  'value' => $desc,
1542  '#fk' => array(
1543  'phenotype' => $phenotype_name,
1544  ),
1545  );
1546 
1547  $records['phenotypeprop']["$phenotype_name-unit"] = array(
1548  'type_id' => $unit_id,
1549  'value' => $unit,
1550  '#fk' => array(
1551  'phenotype' => $phenotype_name,
1552  ),
1553  );
1554 
1555  $env_count++;
1556  if ($env_count >= $record_group) {
1557  tpps_chado_insert_multi($records);
1558  $records = array(
1559  'phenotype' => array(),
1560  'stock_phenotype' => array(),
1561  'phenotypeprop' => array(),
1562  );
1563  $env_count = 0;
1564  }
1565  }
1566  $suffix++;
1567 }
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_process_genotype_spreadsheet (   $row,
array &  $options = array() 
)

This function processes a single row of a genotype spreadsheet.

This function is used for SNP assay files, SSR spreadsheets, and other marker type spreadsheets. The functionality is slightly different based on the type of marker being processed (this is set in the options array). This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 1156 of file file_parsing.inc.

1156  {
1157  $type = $options['type'];
1158  $records = &$options['records'];
1159  $headers = $options['headers'];
1160  $tree_info = &$options['tree_info'];
1161  $species_codes = $options['species_codes'];
1162  $genotype_count = &$options['genotype_count'];
1163  $genotype_total = &$options['genotype_total'];
1164  $project_id = $options['project_id'];
1165  $marker = $options['marker'];
1166  $type_cvterm = $options['type_cvterm'];
1167  $seq_var_cvterm = $options['seq_var_cvterm'];
1168  $multi_insert_options = $options['multi_insert'];
1169  $record_group = variable_get('tpps_record_group', 10000);
1170  $stock_id = NULL;
1171  if ($type == 'other') {
1172  $val = $row[$options['tree_id']];
1173  $stock_id = $tree_info[trim($val)]['stock_id'];
1174  $current_id = $tree_info[trim($val)]['organism_id'];
1175  $species_code = $species_codes[$current_id];
1176  }
1177  foreach ($row as $key => $val) {
1178  if (empty($headers[$key])) {
1179  continue;
1180  }
1181 
1182  if (!isset($stock_id)) {
1183  $stock_id = $tree_info[trim($val)]['stock_id'];
1184  $current_id = $tree_info[trim($val)]['organism_id'];
1185  $species_code = $species_codes[$current_id];
1186  continue;
1187  }
1188  $genotype_count++;
1189 
1190  if ($type == 'ssrs' and ($val === 0 or $val === "0")) {
1191  $val = "NA";
1192  }
1193 
1194  $variant_name = $headers[$key];
1195  $marker_name = $variant_name . $marker;
1196  $genotype_name = "$marker-$variant_name-$species_code-$val";
1197 
1198  $records['feature'][$marker_name] = array(
1199  'organism_id' => $current_id,
1200  'uniquename' => $marker_name,
1201  'type_id' => $seq_var_cvterm,
1202  );
1203 
1204  $records['feature'][$variant_name] = array(
1205  'organism_id' => $current_id,
1206  'uniquename' => $variant_name,
1207  'type_id' => $seq_var_cvterm,
1208  );
1209 
1210  $records['genotype'][$genotype_name] = array(
1211  'name' => $genotype_name,
1212  'uniquename' => $genotype_name,
1213  'description' => $val,
1214  'type_id' => $type_cvterm,
1215  );
1216 
1217  $records['genotype_call']["$stock_id-$genotype_name"] = array(
1218  'project_id' => $project_id,
1219  'stock_id' => $stock_id,
1220  '#fk' => array(
1221  'genotype' => $genotype_name,
1222  'variant' => $variant_name,
1223  'marker' => $marker_name,
1224  ),
1225  );
1226 
1227  $records['stock_genotype']["$stock_id-$genotype_name"] = array(
1228  'stock_id' => $stock_id,
1229  '#fk' => array(
1230  'genotype' => $genotype_name,
1231  ),
1232  );
1233 
1234  if ($genotype_count >= $record_group) {
1235  tpps_chado_insert_multi($records, $multi_insert_options);
1236  $records = array(
1237  'feature' => array(),
1238  'genotype' => array(),
1239  'genotype_call' => array(),
1240  'stock_genotype' => array(),
1241  );
1242  $genotype_total += $genotype_count;
1243  $genotype_count = 0;
1244  }
1245  }
1246 }
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_process_phenotype_data (   $row,
array &  $options = array() 
)

This function will process a row from a phenotype data file.

This function is used for standard phenotypes of both phenotype formats, as well as phenotype isotope files. The functionality is slightly different based on the type of phenotype file being processed (set in the options array). This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 1013 of file file_parsing.inc.

1013  {
1014  $iso = $options['iso'] ?? FALSE;
1015  $records = &$options['records'];
1016  $meta_headers = $options['meta_headers'] ?? NULL;
1017  $file_headers = $options['file_headers'] ?? NULL;
1018  $cvterms = $options['cvterms'];
1019  $meta = $options['meta'];
1020  $empty = $options['file-empty'] ?? NULL;
1021  $accession = $options['accession'];
1022  $suffix = &$options['suffix'];
1023  $tree_info = &$options['tree_info'];
1024  $phenotype_count = &$options['phenotype_count'];
1025  $record_group = variable_get('tpps_record_group', 10000);
1026 
1027  if (!$iso) {
1028  if (isset($meta_headers['name']) and (isset($meta_headers['value']))) {
1029  $id = $row[$meta_headers['value']];
1030  $values = array($id => $row[$meta_headers['name']]);
1031  }
1032 
1033  if (!empty($options['data_columns'])) {
1034  $values = $options['data_columns'];
1035  }
1036 
1037  $tree_id = $row[$options['tree_id']];
1038  $clone_col = $meta_headers['clone'] ?? NULL;
1039  if (isset($clone_col) and !empty($row[$clone_col]) and $row[$clone_col] !== $empty) {
1040  $tree_id .= "-" . $row[$clone_col];
1041  }
1042  }
1043  else {
1044  foreach ($row as $id => $value) {
1045  if (empty($tree_id)) {
1046  $tree_id = $value;
1047  continue;
1048  }
1049  $values[$id] = $file_headers[$id];
1050  }
1051  }
1052 
1053  foreach ($values as $id => $name) {
1054  $attr_id = $iso ? $meta['attr_id'] : $meta[strtolower($name)]['attr_id'];
1055  $value = $row[$id];
1056  $phenotype_name = "$accession-$tree_id-$name-$suffix";
1057 
1058  $records['phenotype'][$phenotype_name] = array(
1059  'uniquename' => $phenotype_name,
1060  'name' => $name,
1061  'attr_id' => $attr_id,
1062  'observable_id' => $meta[strtolower($name)]['struct_id'] ?? NULL,
1063  'value' => $value,
1064  );
1065 
1066  $records['stock_phenotype'][$phenotype_name] = array(
1067  'stock_id' => $tree_info[$tree_id]['stock_id'],
1068  '#fk' => array(
1069  'phenotype' => $phenotype_name,
1070  ),
1071  );
1072 
1073  if (isset($meta[strtolower($name)]['time'])) {
1074  $records['phenotypeprop']["$phenotype_name-time"] = array(
1075  'type_id' => $cvterms['time'],
1076  'value' => $meta[strtolower($name)]['time'],
1077  '#fk' => array(
1078  'phenotype' => $phenotype_name,
1079  ),
1080  );
1081  }
1082  elseif (isset($meta_headers['time'])) {
1083  $records['phenotypeprop']["$phenotype_name-time"] = array(
1084  'type_id' => $cvterms['time'],
1085  'value' => $row[$meta_headers['time']],
1086  '#fk' => array(
1087  'phenotype' => $phenotype_name,
1088  ),
1089  );
1090  }
1091 
1092  $records['phenotypeprop']["$phenotype_name-desc"] = array(
1093  'type_id' => $cvterms['desc'],
1094  'value' => $iso ? $meta['desc'] : $meta[strtolower($name)]['desc'],
1095  '#fk' => array(
1096  'phenotype' => $phenotype_name,
1097  ),
1098  );
1099 
1100  $records['phenotypeprop']["$phenotype_name-unit"] = array(
1101  'type_id' => $cvterms['unit'],
1102  'value' => $iso ? $meta['unit'] : $meta[strtolower($name)]['unit'],
1103  '#fk' => array(
1104  'phenotype' => $phenotype_name,
1105  ),
1106  );
1107 
1108  if (isset($meta[strtolower($name)]['min'])) {
1109  $records['phenotypeprop']["$phenotype_name-min"] = array(
1110  'type_id' => $cvterms['min'],
1111  'value' => $meta[strtolower($name)]['min'],
1112  '#fk' => array(
1113  'phenotype' => $phenotype_name,
1114  ),
1115  );
1116  }
1117 
1118  if (isset($meta[strtolower($name)]['max'])) {
1119  $records['phenotypeprop']["$phenotype_name-max"] = array(
1120  'type_id' => $cvterms['max'],
1121  'value' => $meta[strtolower($name)]['max'],
1122  '#fk' => array(
1123  'phenotype' => $phenotype_name,
1124  ),
1125  );
1126  }
1127 
1128  if ($phenotype_count > $record_group) {
1129  tpps_chado_insert_multi($records);
1130  $records = array(
1131  'phenotype' => array(),
1132  'phenotypeprop' => array(),
1133  'stock_phenotype' => array(),
1134  );
1135  $phenotype_count = 0;
1136  }
1137 
1138  $phenotype_count++;
1139  }
1140  $suffix++;
1141 }
tpps_chado_insert_multi(array $record_groups, array $options=array())
tpps_process_phenotype_meta (   $row,
array &  $options = array() 
)

This function will process a row from a phenotype metadata file.

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 910 of file file_parsing.inc.

910  {
911  $columns = $options['meta_columns'];
912  $meta = &$options['meta'];
913 
914  $name = strtolower($row[$columns['name']]);
915  $meta[$name] = array();
916  $meta[$name]['attr'] = $row[$columns['attr']];
917  $meta[$name]['desc'] = $row[$columns['desc']];
918  $meta[$name]['unit'] = $row[$columns['unit']];
919  if (!empty($columns['struct']) and isset($row[$columns['struct']]) and $row[$columns['struct']] != '') {
920  $meta[$name]['struct'] = $row[$columns['struct']];
921  }
922  if (!empty($columns['min']) and isset($row[$columns['min']]) and $row[$columns['min']] != '') {
923  $meta[$name]['min'] = $row[$columns['min']];
924  }
925  if (!empty($columns['max']) and isset($row[$columns['max']]) and $row[$columns['max']] != '') {
926  $meta[$name]['max'] = $row[$columns['max']];
927  }
928 }
tpps_refine_phenotype_meta ( array &  $meta)

This function will further refine existing phenotype metadata.

The function mostly just adds cvterm ids where applicable.

Parameters
array$metaThe existing metadata array.

Definition at line 938 of file file_parsing.inc.

938  {
939  $cvt_cache = array();
940  $local_cv = chado_get_cv(array('name' => 'local'));
941  $local_db = variable_get('tpps_local_db');
942  foreach ($meta as $name => $data) {
943  if (!empty($cvt_cache[$data['attr']])) {
944  $meta[$name]['attr_id'] = $cvt_cache[$data['attr']];
945  }
946  else {
947  $attr = chado_select_record('cvterm', array('cvterm_id'), array(
948  'name' => array(
949  'data' => $data['attr'],
950  'op' => 'LIKE',
951  ),
952  ), array(
953  'limit' => 1,
954  ));
955  $meta[$name]['attr_id'] = current($attr)->cvterm_id ?? NULL;
956 
957  if (empty($meta[$name]['attr_id'])) {
958  $meta[$name]['attr_id'] = chado_insert_cvterm(array(
959  'id' => "{$local_db->name}:{$data['attr']}",
960  'name' => $data['attr'],
961  'definition' => '',
962  'cv_name' => $local_cv->name,
963  ))->cvterm_id;
964  }
965  $cvt_cache[$data['attr']] = $meta[$name]['attr_id'];
966  }
967 
968  if (!empty($data['struct'])) {
969  if (!empty($cvt_cache[$data['struct']])) {
970  $meta[$name]['struct_id'] = $cvt_cache[$data['struct']];
971  }
972  else {
973  $obs = chado_select_record('cvterm', array('cvterm_id'), array(
974  'name' => array(
975  'data' => $data['struct'],
976  'op' => 'LIKE',
977  ),
978  ), array(
979  'limit' => 1,
980  ));
981  $meta[$name]['struct_id'] = current($obs)->cvterm_id ?? NULL;
982 
983  if (empty($meta[$name]['struct_id'])) {
984  $meta[$name]['struct_id'] = chado_insert_cvterm(array(
985  'id' => "{$local_db->name}:{$data['struct']}",
986  'name' => $data['struct'],
987  'definition' => '',
988  'cv_name' => $local_cv->name,
989  ))->cvterm_id;
990  }
991  $cvt_cache[$data['struct']] = $meta[$name]['struct_id'];
992  }
993  }
994  else {
995  $meta[$name]['struct_id'] = NULL;
996  }
997  }
998 }
tpps_ssrs_headers (   $fid,
  $ploidy 
)

This function formats headers for a microsatellite spreadsheet.

SSR/cpSSR spreadsheets will often have blank or duplicate headers, depending on the ploidy of the organism they are meant for. This file standardizes the headers for the spreadsheet so that they can be used with the tpps_process_genotype_spreadsheet() function.

Parameters
int$fidThe Drupal managed file id of the file.
string$ploidyThe ploidy of the organism, as indicated by the user.
Returns
array The array of standardized headers for the spreadsheet.

Definition at line 1264 of file file_parsing.inc.

1264  {
1265  $headers = tpps_file_headers($fid);
1266  if ($ploidy == 'Haploid') {
1267  return $headers;
1268  }
1269  $row_len = count($headers);
1270  $results = $headers;
1271 
1272  while (($k = array_search(NULL, $results))) {
1273  unset($results[$k]);
1274  }
1275 
1276  $marker_num = 0;
1277  $first = TRUE;
1278  reset($headers);
1279  $num_headers = count($results);
1280  $num_unique_headers = count(array_unique($results));
1281 
1282  foreach ($headers as $key => $val) {
1283  next($headers);
1284  $next_key = key($headers);
1285  if ($first) {
1286  $first = FALSE;
1287  continue;
1288  }
1289 
1290  switch ($ploidy) {
1291  case 'Diploid':
1292  if ($num_headers == ($row_len + 1) / 2) {
1293  // Every other marker column name is left blank.
1294  if (array_key_exists($key, $results)) {
1295  $last = $results[$key];
1296  $results[$key] .= "_A";
1297  break;
1298  }
1299  $results[$key] = $last . "_B";
1300  break;
1301  }
1302 
1303  if ($num_headers == $row_len) {
1304  // All of the marker column names are filled out.
1305  if ($num_headers != $num_unique_headers) {
1306  // The marker column names are duplicates, need to append
1307  // _A and _B.
1308  if ($results[$key] == $results[$next_key]) {
1309  $results[$key] .= "_A";
1310  break;
1311  }
1312  $results[$key] .= "_B";
1313  }
1314  }
1315  break;
1316 
1317  case 'Polyploid':
1318  if ($num_headers == $row_len) {
1319  // All of the marker column names are filled out.
1320  if ($num_unique_headers != $num_headers) {
1321  // The marker column names are duplicates, need to append
1322  // _1, _2, up to X ploidy.
1323  // The total number of headers divided by the number of
1324  // unique headers should be equal to the ploidy.
1325  $ploidy_suffix = ($marker_num % ($num_headers - 1 / $num_unique_headers - 1)) + 1;
1326  $results[$key] .= "_$ploidy_suffix";
1327  }
1328  $marker_num++;
1329  break;
1330  }
1331  $ploidy_suffix = ($marker_num % ($row_len - 1 / $num_headers - 1)) + 1;
1332  if (array_key_exists($key, $results)) {
1333  $last = $results[$key];
1334  $results[$key] .= "_$ploidy_suffix";
1335  }
1336  else {
1337  $results[$key] = "{$last}_$ploidy_suffix";
1338  }
1339  $marker_num++;
1340  break;
1341 
1342  default:
1343  break;
1344  }
1345  }
1346 
1347  return $results;
1348 }
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:814