Tripal Plant PopGen Submit
cvterm_utils.inc
Go to the documentation of this file.
1 <?php
2 
21 function tpps_ols_search($query, $ontology = NULL, $exact = TRUE) {
22  $exact = $exact ? 'true' : 'false';
23  $args = array();
24  $args[] = "q=" . urlencode($query);
25  $args[] = "exact=$exact";
26  $args[] = "queryFields=label";
27  if (!empty($ontology)) {
28  $args[] = "ontology=$ontology";
29  }
30  return tpps_ols_call('search', $args);
31 }
32 
42 function tpps_ols_ontology_info($id) {
43  $response = @tpps_ols_call(array(
44  'ontologies',
45  $id,
46  ));
47  if (empty($response)) {
48  return NULL;
49  }
50  $name = $response->config->preferredPrefix;
51  $base_uri = $response->config->baseUris[0] ?? NULL;
52  if (substr($base_uri, -1 * (strlen($name) + 1)) == "_$name") {
53  $url_prefix = substr($base_uri, 0, -5) . '{db}_{accession}';
54  }
55 
56  return array(
57  'id' => $response->ontologyId,
58  'name' => $name,
59  'description' => $response->config->title,
60  'url' => $response->config->homepage,
61  'urlprefix' => $url_prefix ?? NULL,
62  'cvname' => $name,
63  'namespace' => $response->config->annotations->{'default-namespace'}[0],
64  );
65 }
66 
78 function tpps_ols_call($type, array $args = array()) {
79  $url = tpps_ols_url($type, $args);
80  $data = file_get_contents($url);
81  return json_decode($data);
82 }
83 
95 function tpps_ols_url($types, array $args = array()) {
96  $url = TPPS_OLS_BASE;
97 
98  if (!is_array($types)) {
99  $url .= "$types/";
100  }
101  else {
102  foreach ($types as $type) {
103  $url .= "$type/";
104  }
105  }
106 
107  if (!empty($args)) {
108  $url .= "?" . implode('&', $args);
109  }
110 
111  print_r('OLS URL:' . $url . "\n");
112  try {
113  if(function_exists('tpps_job_logger_write')) {
114  tpps_job_logger_write('[INFO] -- ' . 'OLS URL:' . $url);
115  }
116  }
117  catch (Exception $ex) {
118 
119  }
120  return $url;
121 }
122 
132 function tpps_load_cv($info) {
133  $id = $info;
134  if (!is_string($info)) {
135  $id = $info['id'];
136  }
137  $id = strtolower($id);
138 
139  $cache = cache_get('tpps_cv')->data ?? array();
140  // print_r($cache);
141 
142  // Try to load cv_id from cache.
143  $cv_id = $cache[$id] ?? NULL;
144  if (!empty($cv_id)) {
145  return chado_get_cv(array(
146  'cv_id' => $cv_id,
147  ));
148  }
149 
150  $cv_info = $info;
151  if (is_string($info)) {
152  $cv_info = tpps_ols_ontology_info($info);
153  }
154  if (empty($cv_info)) {
155  $cv_info = array(
156  'cvname' => $info,
157  'namespace' => $info,
158  );
159  }
160 
161  $query = array(
162  'name' => array(
163  'op' => 'ILIKE',
164  'data' => $cv_info['cvname'],
165  ),
166  );
167 
168  $cv = chado_generate_var('cv', $query);
169  if (!$cv and !empty($cv_info['namespace'])) {
170  $query['name']['data'] = $cv_info['namespace'];
171  $cv = chado_generate_var('cv', $query);
172  }
173 
174  if (is_array($cv)) {
175  // This is a critical temporary patch until confirmation from Emily (slack: 10/19/2021 with Rish)
176  if($query['name']['data'] == 'OBI') {
177  $query['name']['data'] = strtolower($query['name']['data']); // for the use of lowercase OBI
178  }
179  $query['name']['op'] = 'LIKE';
180  $cv = chado_generate_var('cv', $query);
181  }
182 
183  if (!$cv) {
184  return NULL;
185  }
186 
187  $cache[$id] = $cv->cv_id;
188  cache_set('tpps_cv', $cache);
189  return $cv;
190 }
191 
208 function tpps_load_cvterm($term, array $options = array(), $version = NULL, $refresh_cache = FALSE) {
209  $check_cache = variable_get('tpps_load_cvterm_' . md5($term), null);
210  if($refresh_cache == TRUE) {
211  $check_cache = null;
212  }
213  if ($_GET['refresh_cache'] == 'true') {
214  $check_cache = null;
215  }
216  if ($check_cache == null) {
217  $cvt_v = variable_get('tpps_cvterms_version', '1_1');
218  $get_cvt = 'tpps_get_cvterms_v' . ($version ?? $cvt_v);
219  $terms = $get_cvt();
220  $info = $terms[$term] ?? NULL;
221 
222  if (empty($info)) {
223  $info = ":$term";
224  }
225  // dpm($info);
226  // drupal_set_message('info:'. $info);
227  preg_match('/^([^:]*):([^:]+):?([^:]+)?$/', $info, $matches);
228  $ontology = $matches[1];
229  $name = $matches[2];
230  $alt_ontology = $matches[3] ?? NULL;
231 
232  $identifiers = array(
233  'name' => $term,
234  'is_obsolete' => 0,
235  );
236 
237  if (!empty($ontology)) {
238  if ($ontology == 'local') {
239  $identifiers['definition'] = $name;
240  }
241  else {
242  $identifiers['name'] = $name;
243  $identifiers['cv_id'] = tpps_load_cv($ontology)->cv_id;
244  }
245  }
246  $result = chado_get_cvterm($identifiers, $options);
247 
248  // Cater for CV types like GO: Biological Process
249  if(empty($result)) {
250  preg_match('/^([\w]*):([\s]+[\w\s]*):([\s\w]*):?([\s\w]*)?$/', $info, $matches2);
251  if(count($matches2) > 0) {
252  // dpm($matches2);
253  $ontology = $matches2[1] . ":" . $matches2[2];
254  // drupal_set_message('ONTOLOGY:' . $ontology);
255  $name = $matches2[3];
256  $alt_ontology = $matches2[4] ?? NULL;
257  if (!empty($ontology)) {
258  if ($ontology == 'local') {
259  $identifiers['definition'] = $name;
260  }
261  else {
262  $identifiers['name'] = $name;
263  $identifiers['cv_id'] = tpps_load_cv($ontology)->cv_id;
264  }
265  }
266  $result = chado_get_cvterm($identifiers, $options);
267  }
268  }
269 
270  if(empty($result) && $ontology == 'local') {
271  // Added by Rish (1/5/2022)
272  unset($identifiers['definition']);
273  $identifiers['name'] = $name;
274  $identifiers['cv_id'] = tpps_load_cv($ontology)->cv_id;
275  $result = chado_get_cvterm($identifiers, $options);
276  }
277  // drupal_set_message('Identifiers: ' . var_export($identifiers,true));
278  // drupal_set_message('CVTERM_ID: ' . $result->cvterm_id);
279  // echo var_dump($result);
280 
281  if (empty($result) and $ontology != 'local' and !empty($alt_ontology)) {
282  $identifiers['cv_id'] = tpps_load_cv($alt_ontology)->cv_id;
283  $result = chado_get_cvterm($identifiers, $options);
284  }
285 
286  if (empty($result)) {
287  // Try searching our TreeGenes Internal Ontology (desperate measures: eg study_type)
288  $identifiers['cv_id'] = tpps_load_cv('TreeGenes Internal Ontology')->cv_id;
289  $result = chado_get_cvterm($identifiers, $options);
290  }
291 
292  if (empty($result)) {
293  // Try searching our TreeGenes Internal Ontology (desperate measures: eg study_type)
294  $identifiers['cv_id'] = tpps_load_cv('TreeGenes Internal Ontology')->cv_id;
295  $result = chado_get_cvterm($identifiers, $options);
296  }
297 
298  if (empty($result)) {
299  if (!empty($options['job'])) {
300  $options['job']->logMessage('[WARNING] Failed to load cvterm: @term', array('@term' => $term), TRIPAL_WARNING);
301  throw new Exception('FATAL - after extensive searching, cvterm ' . $term . ' could not be found. This will cause insertion errors later on so it needs to be fixed');
302  }
303  else {
304  $trace = debug_backtrace()[1];
305  $job = NULL;
306  if (!empty($trace['args']) and gettype($trace['args']) == 'array') {
307  foreach ($trace['args'] as $key => $arg) {
308  if (gettype($arg) == 'object' and get_class($arg) == 'TripalJob') {
309  $job = &$trace['args'][$key];
310  }
311  }
312  if (!empty($job)) {
313  $job->logMessage('[WARNING] Failed to load cvterm: @term', array('@term' => $term), TRIPAL_WARNING);
314  }
315  }
316  throw new Exception('FATAL - after extensive searching, cvterm ' . $term . ' could not be found. This will cause insertion errors later on so it needs to be fixed');
317  }
318  }
319 
320 
321  variable_set('tpps_load_cvterm_' . md5($term), serialize($result));
322  }
323  else {
324  $result = unserialize($check_cache);
325  }
326  return $result;
327 }
328 
339 function tpps_cvterms_clear_cache(TripalJob $job = NULL) {
340  if($job) {
341  $job->logMessage('[INFO] Initializing cvterms cache refresh...');
342  }
343  // Reset the cv ids
344  cache_set('tpps_cv', NULL);
345 
346  $cvt_v = variable_get('tpps_cvterms_version', '1_1');
347  $get_cvt = 'tpps_get_cvterms_v' . ($version ?? $cvt_v);
348  $terms = $get_cvt();
349  foreach ($terms as $term => $ontology_and_term) {
350  tpps_load_cvterm($term, array(), $cvt_v, TRUE);
351  }
352  if($job) {
353  $job->logMessage('[INFO] Complete!');
354  }
355 }
356 
369  return array(
370  // Local terms.
371  'assession_number' => 'local:Number of times the trees were assessed, on average.',
372  'temperature_high' => 'local:Average high temperature of the environment.',
373  'temperature_low' => 'local:Average low temperature of the environment.',
374  'rooting_type' => 'local:Type of rooting. Aeroponics, Hydroponics, or Soil.',
375  'organism 4 letter code' => 'local:4 letter abbreviation of species. Usually first two letters of genus + first two letters of species, may be different if that code already exists (use next letters from species if possible).',
376  'source_description' => 'local:A textual description of study type and comments for a source.',
377  'email' => 'local:Email address',
378  'number_samples' => 'local:Number of samples',
379  'gps_type' => 'local:Type of GPS coordinates. Exact, Approximate, or Site-based.',
380  'gps_precision' => 'local:Precision of GPS coordinates.',
381 
382  // PAG 2022 request by Emily
383  'precipitation' => 'local:precipitation',
384  'absorbance' => 'local:absorbance',
385  'water use efficiency' => 'local:water use efficiency',
386  'delta' => 'ncit:Delta',
387  'carbon-13 atom' => 'chebi_ontology:carbon-13 atom',
388  'rate' => 'pato:rate',
389  'chlorophyll' => 'chebi:chlorophyll',
390  'transpiration' => 'GO: Biological Process:transpiration',
391  'photosynthesis' => 'GO: Biological Process:photosynthesis',
392  'degrees celsius per millimeter' => 'local:degrees celsius per millimeter',
393  'centimeters per day' => 'local:centimeters per day',
394  'kilograms per meter cubed' => 'local:kilograms per meter cubed',
395  'no unit' => 'local:no unit',
396  'log(centimeters per day)' => 'local:log(centimeters per day)',
397  'log(centimeters cubed per day)' => 'local:log(centimeters cubed per day)',
398  'micromoles carbon dioxide per meter squared per second' => 'local:micromoles carbon dioxide per meter squared per second',
399  'micromoles carbon dioxide per gram per second' => 'local:micromoles carbon dioxide per gram per second',
400  'absorbance unit' => 'local:absorbance unit',
401  'number' => 'local:number',
402  'milligrams per millimeter' => 'local:milligrams per millimeter',
403  'milligrams per millimeter squared' => 'local:milligrams per millimeter squared',
404  'milligrams per milligram' => 'local:milligrams per milligram',
405  'micromoles carbon dioxide per gram Nitrogen per second' => 'local:micromoles carbon dioxide per gram Nitrogen per second',
406  'micromoles carbon dioxide per millimole water' => 'local:micromoles carbon dioxide per millimole water',
407  'moles water per meter squared per second' => 'local:moles water per meter squared per second',
408 
409  // Tripal terms.
410  'article' => 'tripal_pub:Journal Article',
411  'abstract' => 'tripal_pub:Abstract',
412  'authors' => 'tripal_pub:Authors',
413  'person' => 'tripal_contact:Person',
414  'contact_part_of' => 'tripal_contact:part of',
415  'organization' => 'tripal_contact:Organization',
416  'country' => 'tripal_contact:Country',
417  'state' => 'tripal_contact:State',
418  'analysis_type' => 'analysis_property:Analysis Type',
419 
420 
421  // TREEGENES TO specific
422  'gravity' => 'tree_genes_to:specific gravity',
423 
424  // PPEO terms.
425  'experiment_location' => 'ppeo:location',
426  'file_path' => 'ppeo:data file',
427 
428  // NCIT terms.
429  'study_start' => 'ncit:Study Start Date',
430  'district' => 'ncit:Locality',
431  'study_end' => 'ncit:Study Completion Date',
432  'assession_season' => 'ncit:Season',
433  'soil_container' => 'ncit:Container',
434  'gps_latitude' => 'ncit:Latitude',
435  'gps_longitude' => 'ncit:Longitude',
436  'location' => 'ncit:Location',
437  'association_results_type' => 'ncit:Data Type',
438  'county' => 'ncit:County',
439  'read_depth' => 'ncit:Read Depth',
440  'format' => 'ncit:Format',
441  'minimum' => 'ncit:Minimum',
442  'maximum' => 'ncit:Maximum',
443  'indel' => 'ncit:Indel Mutation',
444  'p_value' => 'ncit:P-Value',
445  'lambda' => 'ncit:Lambda',
446  'bonferroni' => 'ncit:Adjusted Bonferroni Correction',
447  'boolean' => 'ncit:Boolean',
448 
449  // PATO terms.
450  'age' => 'pato:age',
451  'alive' => 'pato:alive',
452  'amount' => 'pato:amount',
453  'angle' => 'pato:angle',
454  'area' => 'pato:area',
455  'bent' => 'pato:bent',
456  'circumference' => 'pato:circumference',
457  'color' => 'pato:color',
458  'composition' => 'pato:composition',
459  'concentration_of' => 'pato:concentration of',
460  'damage' => 'pato:damage',
461  'description' => 'pato:description',
462  'diameter' => 'pato:diameter',
463  'distance' => 'pato:distance',
464  'growth_quality_of_occurrent' => 'pato:growth quality of occurrent',
465  'growth_rate' => 'pato:growth rate',
466  'has_number_of' => 'pato:has number of',
467  'height' => 'pato:height',
468  'humidity_level' => 'pato:humidity',
469  'intensity' => 'pato:intensity',
470  'length' => 'pato:length',
471  'lesioned' => 'pato:lesioned',
472  'maturity' => 'pato:maturity',
473  'pH_level' => 'pato:acidity',
474  'position' => 'pato:position',
475  'pressure' => 'pato:pressure',
476  'proportionality_to' => 'pato:proportionality to',
477  'qualitative' => 'pato:qualitative',
478  'rate' => 'pato:rate',
479  'rough' => 'pato:rough',
480  'shape' => 'pato:shape',
481  'size' => 'pato:size',
482  'temperature' => 'pato:temperature',
483  'texture' => 'pato:texture',
484  'thickness' => 'pato:thickness',
485  'time' => 'pato:time',
486  'volume' => 'pato:volume',
487  'weight' => 'pato:weight',
488  'width' => 'pato:width',
489  'sex' => 'pato:phenotypic sex',
490 
491  // PO terms.
492  'whole plant' => 'po:whole plant:plant_ontology',
493  'bark' => 'po:bark:plant_anatomy',
494  'branch' => 'po:branch:plant_anatomy',
495  'bud' => 'po:bud:plant_anatomy',
496  'catkin_inflorescence' => 'po:catkin inflorescence:plant_anatomy',
497  'endocarp' => 'po:endocarp:plant_anatomy',
498  'floral_organ' => 'po:floral organ:plant_anatomy',
499  'flower' => 'po:flower:plant_anatomy',
500  'flower_bud' => 'po:flower bud:plant_anatomy',
501  'flower_fascicle' => 'po:flower fascicle:plant_anatomy',
502  'fruit' => 'po:fruit:plant_anatomy',
503  'leaf' => 'po:leaf:plant_anatomy',
504  'leaf_rachis' => 'po:leaf rachis:plant_anatomy',
505  'leaflet' => 'po:leaflet:plant_anatomy',
506  'nut_fruit' => 'po:nut fruit:plant_anatomy',
507  'petal' => 'po:petal:plant_anatomy',
508  'petiole' => 'po:petiole:plant_anatomy',
509  'phloem' => 'po:phloem:plant_anatomy',
510  'plant_callus' => 'po:plant callus:plant_anatomy',
511  'primary_thickening_meristem' => 'po:primary thickening meristem:plant_anatomy',
512  // 'root' => 'po:root:plant_anatomy',
513  'root' => 'plant_anatomy:root',
514  'secondary_xylem' => 'po:secondary xylem:plant_anatomy',
515  'seed' => 'po:seed:plant_anatomy',
516  'shoot_system' => 'po:shoot system:plant_anatomy',
517  'stem' => 'po:stem:plant_anatomy',
518  'stomatal_complex' => 'po:stomatal complex:plant_anatomy',
519  'strobilus' => 'po:strobilus:plant_anatomy',
520  'terminal_bud' => 'po:terminal bud:plant_anatomy',
521  'vascular_leaf' => 'po:vascular leaf:plant_anatomy',
522 
523  // PECO terms.
524  'co2_control' => 'plant_experimental_conditions_ontology:carbon dioxide exposure',
525  'humidity_control' => 'plant_experimental_conditions_ontology:humidity exposure',
526  'light_control' => 'plant_experimental_conditions_ontology:light intensity exposure',
527  'pH_control' => 'plant_experimental_conditions_ontology:pH exposure',
528  'treatment' => 'plant_experimental_conditions_ontology:plant exposure',
529  'salinity_control' => 'plant_experimental_conditions_ontology:salt exposure',
530  'biotic_environment' => 'plant_experimental_conditions_ontology:biotic plant exposure',
531  'study_type' => 'plant_experimental_conditions_ontology:study type',
532 
533  // ENVO terms.
534  'co2_level' => 'envo:atmospheric carbon dioxide',
535  'light_level' => 'envo:visible spectrum radiation',
536  'soil_type' => 'envo:soil',
537  'environment' => 'envo:environmental system',
538  'atmosphere' => 'envo:atmosphere',
539 
540  // CHEBI terms.
541  'salinity_level' => 'chebi:salt',
542 
543  // AGRO terms.
544  'irrigation_type' => 'agro:irrigation process',
545 
546  // SO terms.
547  'reference_genome' => 'sequence:reference_genome',
548  'genotype' => 'sequence:genotype',
549  'sequence_variant' => 'sequence:sequence_variant',
550  'snp' => 'sequence:SNP',
551  'ssr' => 'sequence:microsatellite',
552  'genetic_marker' => 'sequence:genetic_marker',
553  'quality_value' => 'sequence:quality_value',
554  'allelic_frequency' => 'sequence:allelic_frequency',
555  'synonymous' => 'sequence:synonymous',
556  'clone' => 'sequence:clone',
557  'supercontig' => 'sequence:supercontig',
558  'scaffold' => 'sequence:chromosome',
559  'has_part' => 'sequence:has_part',
560 
561  // EDAM terms.
562  'filter' => 'edam:Sequence contamination filtering:operation',
563 
564  // UO terms.
565  'unit' => 'uo:unit',
566  'centimeter' => 'uo:centimeter',
567  'cubic_centimeter' => 'uo:cubic centimeter',
568  'day' => 'uo:day',
569  'degrees_celsius' => 'uo:degree Celsius',
570  'degrees_fahrenheit' => 'uo:degree Fahrenheit',
571  'grams_per_square_meter' => 'uo:gram per square meter',
572  'gram' => 'uo:gram',
573  'luminous_intensity_unit' => 'uo:luminous intensity unit',
574  'kilogram' => 'uo:kilogram',
575  'kilogram_per_cubic_meter' => 'uo:kilogram per cubic meter',
576  'liter' => 'uo:liter',
577  'cubic_meter' => 'uo:cubic meter',
578  'pascal' => 'uo:pascal',
579  'meter' => 'uo:meter',
580  'milligram' => 'uo:milligram',
581  'milliliter' => 'uo:milliliter',
582  'millimeter' => 'uo:millimeter',
583  'micrometer' => 'uo:micrometer',
584  'percent' => 'uo:percent',
585  // 'ratio' => 'uo:ratio',
586  'square_micrometer' => 'uo:square micrometer',
587  'square_millimeter' => 'uo:square millimeter',
588  'watt_per_square_meter' => 'uo:watt per square meter',
589  'year' => 'uo:year',
590 
591  // SBO terms.
592  'phenotype' => 'sbo:phenotype',
593 
594  // OBI terms.
595  'organism' => 'obi:organism',
596  'q_value' => 'obi:q-value',
597  'p_adj_fwe' => 'obi:FWER adjusted p-value',
598 
599  // TAXRANK terms.
600  'family' => 'taxonomic_rank:family',
601  'subkingdom' => 'taxonomic_rank:subkingdom',
602  'order' => 'taxonomic_rank:order',
603  'speciesaggregate' => 'taxonomic_rank:speciesaggregate',
604 
605  // NCBI terms.
606  'common name' => 'ncbitaxon:common name',
607 
608  // Abandoned terms.
609  'phenotype_binary_type' => NULL,
610  'contact photo' => NULL,
611  'cpSSR' => NULL,
612  'SSR' => NULL,
613  );
614 }
615 
628  return array(
629  'study_start' => 'local:The month and year that the study began.',
630  'study_end' => 'local:The month and year that the study ended.',
631  'experiment_location' => 'local:Geographic location of the experiment.',
632  'assession_season' => 'local:Season the trees were assessed.',
633  'assession_number' => 'local:Number of times the trees were assessed, on average.',
634  'co2_control' => 'local:Whether or not the co2 level of the environment was controlled. True or False.',
635  'co2_level' => 'local:Must have an associated co2_control property. If co2_control is True, this term describes the co2 level the environment was kept at. If co2_control is False, this term describes the average measured co2 value in the environment.',
636  'humidity_control' => 'local:Whether or not the air humidity level of the environment was controlled. True or False.',
637  'humidity_level' => 'local:Must have an associated humidity_control property. If humidity_control is True, this term describes the air humidity level the environment was kept at. If humidity_control is False, this term describes the average measured air humidity value in the environment.',
638  'light_control' => 'local:Whether or not the light intensity level of the environment was controlled. True or False.',
639  'light_level' => 'local:Must have an associated light_control property. If light_control is True, this term describes the light intensity level the environment was kept at. If light_control is False, this term describes the average measured light intensity value in the environment.',
640  'temperature_high' => 'local:Average high temperature of the environment.',
641  'temperature_low' => 'local:Average low temperature of the environment.',
642  'rooting_type' => 'local:Type of rooting. Aeroponics, Hydroponics, or Soil.',
643  'soil_type' => 'local:Type of soil. For example: Sand, Peat, Mixed, etc.',
644  'soil_container' => 'local:Type of soil container.',
645  'pH_control' => 'local:Whether or not the pH level of the environment was controlled. True or False.',
646  'pH_level' => 'local:Must have an associated pH_control property. If pH_control is True, this term describes the pH level the environment was kept at. If pH_control is False, this term describes the average measured pH value in the environment.',
647  'treatment' => 'local:Describes the treatment of the trees during the study. For example, air or soil temperature regimes, chemical administration, fertilizer, non-mineral nutrient, or rainfall regimes, disease status, etc.',
648  'irrigation_type' => 'local:Describes the type of irrigation. For example, drip irrigation, irrigation from the top, no irrigation, etc.',
649  'salinity_control' => 'local:Whether or not the salinity level of the environment was controlled. True or False.',
650  'salinity_level' => 'local:Must have an associated salinity_control property. If salinity_control is True, this term describes the salinity level the environment was kept at. If salinity_control is False, this term describes the average measured salinity value in the environment.',
651  'biotic_environment' => 'local:Describes the biotic environment.',
652  'study_type' => 'local:Describes the type of environment the trees were in during the study. Possible values are Natural Population (Landscape), Growth Chamber, Greenhouse, Experimental/Common Garden, Plantation.',
653  'phenotype_binary_type' => 'local:Describes one type of a binary phenotype.',
654  'file_path' => 'schema:url',
655  'contact photo' => 'local:A profile photo for a contact record',
656  'gps_latitude' => 'local:Coordinate that specifies north-south position on Earth\'s surface',
657  'gps_longitude' => 'local:Coordinate that specifies east-west position on Earth\'s surface',
658  'association_results_type' => 'local:The type of the association results.',
659  'county' => 'local:A political and administrative division of a state',
660  'district' => 'local:A distinct area of a geographic entity, such as a country or city',
661  'organism 4 letter code' => 'local:4 letter abbreviation of species. Usually first two letters of genus + first two letters of species, may be different if that code already exists (use next letters from species if possible).',
662  'cpSSR' => 'local:Chloroplast simple sequence repeats.',
663  'SSR' => 'local:Simple sequence repeats. Microsatellite.',
664  'time' => 'local:A quality in which events occur in sequence.',
665  'source_description' => 'local:A textual description of study type and comments for a source.',
666  'location' => 'nd_geolocation_property:Location',
667  'format' => 'sep:format',
668  'environment' => 'biomaterial_property:climate_environment',
669  'description' => 'schema:description',
670  );
671 }
672 
682 function tpps_ols_add_cv($ontology_id) {
683  $cv_info = tpps_ols_ontology_info($ontology_id);
684  $query = array(
685  'name' => array(
686  'op' => 'ILIKE',
687  'data' => $cv_info['cvname'],
688  ),
689  );
690 
691  $db = chado_get_db($query);
692  if (!$db) {
693  drupal_set_message("Adding db $ontology_id", 'status');
694  chado_insert_db(array(
695  'name' => $cv_info['cvname'],
696  'description' => $cv_info['description'],
697  'url' => $cv_info['url'],
698  'urlprefix' => $cv_info['urlprefix'],
699  ));
700  }
701 
702  $cv = tpps_load_cv($cv_info);
703 
704  if (!$cv) {
705  drupal_set_message("Adding vocabulary $ontology_id", 'status');
706  $cv = chado_insert_cv($cv_info['name'], $cv_info['description']);
707  }
708  if (!$cv) {
709  drupal_set_message("There was an error adding vocabulary $ontology_id", 'error');
710  }
711  return $cv;
712 }
713 
723 function tpps_ols_add_cvterm($term) {
724  $cv = tpps_load_cv($term->ontology_name);
725 
726  $query = array(
727  'name' => $term->label,
728  'cv_id' => $cv->cv_id,
729  );
730 
731  $cvt = chado_get_cvterm($query);
732  if (!$cvt) {
733  drupal_set_message("Adding cvterm {$term->label}, description: {$term->description[0]} to ontology {$term->ontology_prefix}", 'status');
734  $cvterm = array(
735  'id' => $term->obo_id,
736  'name' => $term->label,
737  'definition' => $term->description[0],
738  'cv_name' => $cv->name,
739  );
740  $cvt = chado_insert_cvterm($cvterm);
741  }
742 
743  if (!$cvt) {
744  drupal_set_message("There was an error adding cvterm {$term->label} to ontology {$term->ontology_prefix}", 'error');
745  }
746 
747  return $cvt;
748 }
749 
759 function tpps_ols_install_term($info) {
760  // print_r('tpps_ols_install_term:');
761  // print_r('overall_info:' . $info . "\n");
762  $parts = explode(':', $info);
763  $ontology = $parts[0];
764  if ($ontology == 'local') {
765  return 'local';
766  }
767  $term = $parts[1];
768  $alt_ontology = $parts[2] ?? NULL;
769 
770  $cv = tpps_load_cv($ontology);
771  if (!$cv) {
772  $cv = tpps_ols_add_cv($ontology);
773  }
774 
775 
776  // print_r('term:' . $term . "\n");
777  // print_r('cv_id:' . $cv->cv_id . "\n");
778  $cvt = chado_get_cvterm(array(
779  'name' => $term,
780  'cv_id' => $cv->cv_id,
781  ));
782 
783  if (!$cvt and !empty($alt_ontology)) {
784  $cv = tpps_load_cv($alt_ontology);
785  if ($cv) {
786  $cvt = chado_get_cvterm(array(
787  'name' => $term,
788  'cv_id' => $cv->cv_id,
789  ));
790  }
791  }
792 
793  if(!$cvt) {
794  // Double check original term
795  try {
796  // Since tpps_load_cvterm is used else where for require data
797  // we had set this to throw Exceptions if a cvterm isn't found
798  // so this is why it is placed in a try clause
799  $cvt = tpps_load_cvterm($term);
800  }
801  catch (Exception $ex) {
802 
803  }
804  }
805 
806  if (!$cvt) {
807  print_r("Searching OLS search for term: $term with ontology: $ontology\n");
808  try {
809  tpps_job_logger_write("[INFO] -- Searching OLS search for term: $term with ontology: $ontology");
810  }
811  catch (Exception $ex) {
812 
813  }
814  $response = tpps_ols_search($term, $ontology)->response;
815  $doc_index = 0; // default value of 0 (index from OLS search for first value found)
816  if ($response->numFound == 0) {
817  return FALSE;
818  }
819  if ($response->numFound != 1) {
820  $docs = array();
821  foreach ($response->docs as $doc) {
822  $docs[$doc->iri] = $doc;
823  }
824  print_r($response->docs);
825  if (count($docs) != 1) {
826  // RISH: 10/06/2022 - I coded this for po:root but it is probably better
827  // to make sure the term exists in cvterms than use this method
828  // also update the function with the term, the function name is tpps_get_cvterms_v1_2()
829  //
830  // // try to find a defining ontology based on the json results, if one can be found, use that
831  // $found_alternative = false;
832  // for($doc_index = 0; $doc_index < count($response->docs); $doc_index++) {
833  // $doc = $response->docs[$doc_index];
834  // print_r('doc:');
835  // print_r($doc);
836  // if (!empty($doc->is_defining_ontology)) {
837  // // we found a possible legit term (this was tested with po:root)
838  // if($doc->is_defining_ontology == 'true') {
839  // print_r('Found a valid alternative.' . "\n");
840  // $found_alternative = true;
841  // break;
842  // }
843  // }
844  // }
845  // if($found_alternative == false) {
846  // throw new \Exception("conflict found for cvterm $term from ontology $ontology: $response->numFound terms found");
847  // }
848  throw new \Exception("conflict found for cvterm $term from ontology $ontology: $response->numFound terms found");
849  }
850  $response->docs[$doc_index] = current($docs);
851  }
852  $cvt = tpps_ols_add_cvterm($response->docs[$doc_index]);
853  }
854  return $cvt;
855 }
856 
868 function tpps_local_install_term($key, $version = NULL) {
869  if (empty($version)) {
870  $version = TPPS_MAJOR_VERSION . '_' . TPPS_MINOR_VERSION;
871  }
872  $get_cvt = 'tpps_get_cvterms_v' . $version;
873  $terms = $get_cvt();
874  $info = $terms[$key] ?? NULL;
875  if (empty($info)) {
876  throw new \Exception("Error installing term $key: Term info could not be found for TPPS cvterm version $version.");
877  }
878 
879  $local_db = variable_get('tpps_local_db');
880  $parts = explode(':', $info);
881  $term = $parts[1];
882  $cvt = chado_get_cvterm(array(
883  'name' => $key,
884  'definition' => $term,
885  'is_obsolete' => 0,
886  ));
887 
888  if (!$cvt) {
889  drupal_set_message("Creating local cvterm $key", 'status');
890  $cvt = chado_insert_cvterm(array(
891  'id' => "{$local_db->name}:$key",
892  'name' => $key,
893  'definition' => $term,
894  'cv_name' => 'local',
895  ));
896 
897  if (!$cvt) {
898  drupal_set_message("Error creating local cvterm $key", 'error');
899  }
900  }
901 
902  return $cvt;
903 }
904 
915 function tpps_match_cvterms($old_cvt, $new_cvt) {
916  if (empty($old_cvt->name)) {
917  return;
918  }
919  $result = db_select('chado.cvtermsynonym', 'cs')
920  ->fields('cs', array('cvtermsynonym_id'))
921  ->condition('cvterm_id', $new_cvt->cvterm_id)
922  ->condition('synonym', $old_cvt->name)
923  ->range(0, 1)
924  ->execute()->fetchObject() ?? NULL;
925 
926  if (empty($result)) {
927  db_insert('chado.cvtermsynonym')
928  ->fields(array(
929  'cvterm_id' => $new_cvt->cvterm_id,
930  'synonym' => $old_cvt->name,
931  'type_id' => chado_get_cvterm(array(
932  'name' => 'exact',
933  'cv_id' => array(
934  'synonym_type',
935  ),
936  ))->cvterm_id,
937  ))
938  ->execute();
939  }
940 
941  $old_synonyms = db_select('chado.cvtermsynonym', 'c')
942  ->fields('c', array('synonym', 'type_id'))
943  ->condition('cvterm_id', $old_cvt->cvterm_id)
944  ->execute();
945 
946  while (($syn = $old_synonyms->fetchObject())) {
947  $result = db_select('chado.cvtermsynonym', 'cs')
948  ->fields('cs', array('cvtermsynonym_id'))
949  ->condition('cvterm_id', $new_cvt->cvterm_id)
950  ->condition('synonym', $syn->synonym)
951  ->range(0, 1)
952  ->execute()->fetchObject() ?? NULL;
953 
954  if (empty($result)) {
955  db_insert('chado.cvtermsynonym')
956  ->fields(array(
957  'cvterm_id' => $new_cvt->cvterm_id,
958  'synonym' => $syn->synonym,
959  'type_id' => $syn->type_id,
960  ))
961  ->execute();
962  }
963  }
964 }
965 
985 function tpps_migrate_cvterms($old_major = NULL, $old_minor = NULL, $new_major = TPPS_MAJOR_VERSION, $new_minor = TPPS_MINOR_VERSION) {
986  $old_v = variable_get('tpps_cvterms_version', '1_1');
987  if (!empty($old_major) and !empty($old_minor)) {
988  $old_v = $old_major . '_' . $old_minor;
989  }
990  $new_v = $new_major . '_' . $new_minor;
991  if ($new_v === $old_v) {
992  return;
993  }
994  $old_term_func = 'tpps_get_cvterms_v' . $old_v;
995  $new_term_func = 'tpps_get_cvterms_v' . $new_v;
996  $old_terms = $old_term_func();
997  $new_terms = $new_term_func();
998  $transaction = db_transaction();
999 
1000  try {
1001  foreach ($old_terms as $key => $old_info) {
1002  $new_info = $new_terms[$key] ?? NULL;
1003  if (!isset($new_info) or !isset($old_info)) {
1004  continue;
1005  }
1006 
1007  if ($old_info == $new_info) {
1008  continue;
1009  }
1010 
1011  preg_match('/^([^:]*):(.*)$/', $old_info, $matches);
1012  $old_ont = $matches[1];
1013  $old_term = $matches[2];
1014 
1015  if ($old_ont == 'local') {
1016  $old_cvt = chado_get_cvterm(array(
1017  'name' => $key,
1018  'definition' => $old_term,
1019  ));
1020  }
1021  else {
1022  $old_cvt = chado_get_cvterm(array(
1023  'name' => $old_term,
1024  'cv_id' => tpps_load_cv($old_ont)->cv_id,
1025  ));
1026  }
1027 
1028  $new_cvt = tpps_ols_install_term($new_info);
1029  if ($new_cvt === FALSE) {
1030  throw new \Exception("No OLS terms found for term $new_info");
1031  }
1032  unset($new_terms[$key]);
1033  tpps_match_cvterms($old_cvt, $new_cvt);
1034  }
1035  foreach ($new_terms as $key => $info) {
1036  if (!empty($info)) {
1037  // print_r($info);
1038  // echo "\n";
1039  $result = tpps_ols_install_term($info);
1040  if ($result === 'local') {
1041  tpps_local_install_term($key, $new_v);
1042  }
1043  }
1044  }
1045 
1046  variable_set('tpps_cvterms_version', $new_v);
1047  }
1048  catch (\Exception $e) {
1049  $transaction->rollback();
1050  throw $e;
1051  }
1052 }
tpps_ols_ontology_info($id)
tpps_ols_url($types, array $args=array())
tpps_get_cvterms_v1_1()
tpps_load_cv($info)
tpps_migrate_cvterms($old_major=NULL, $old_minor=NULL, $new_major=TPPS_MAJOR_VERSION, $new_minor=TPPS_MINOR_VERSION)
tpps_cvterms_clear_cache(TripalJob $job=NULL)
tpps_ols_search($query, $ontology=NULL, $exact=TRUE)
tpps_job_logger_write($string, $replacements=[])
Definition: submit_all.php:160
tpps_match_cvterms($old_cvt, $new_cvt)
tpps_ols_call($type, array $args=array())
tpps_local_install_term($key, $version=NULL)
const TPPS_OLS_BASE
Definition: tpps.module:18
tpps_ols_add_cvterm($term)
tpps_ols_add_cv($ontology_id)
const TPPS_MAJOR_VERSION
Definition: tpps.module:8
tpps_load_cvterm($term, array $options=array(), $version=NULL, $refresh_cache=FALSE)
tpps_get_cvterms_v1_2()
tpps_ols_install_term($info)
const TPPS_MINOR_VERSION
Definition: tpps.module:9