Tripal Plant PopGen Submit
file_utils.inc File Reference

Go to the source code of this file.

Functions

 tpps_compare_files ($fid_1, $fid_2, $file_1_id_name, $file_2_id_name, $file_1_no_header=FALSE, $file_2_no_header=FALSE)
 
 tpps_convert_colname ($column)
 
 tpps_file_generator ($fid, array $options=array())
 
 tpps_file_headers ($fid, $no_header=FALSE)
 
 tpps_file_iterator ($fid, $function, array &$options=array())
 
 tpps_file_len ($fid)
 
 tpps_file_len_helper ($row, array &$options=array())
 
 tpps_file_validate_columns (array &$form_state, array $required_groups, array $file_element)
 
 tpps_file_width ($fid)
 
 tpps_flat_generator ($location, array $options=array())
 
 tpps_flat_width ($location, $delim=',')
 
 tpps_get_archive_files ($archive)
 
 tpps_get_location ($location)
 
 tpps_get_path_extension ($path)
 
 tpps_increment_hex ($hex)
 
 tpps_parse_file ($fid, $max_rows=0, $no_header=FALSE, $columns=NULL)
 
 tpps_parse_file_column ($fid, $column, $no_header=FALSE)
 
 tpps_parse_file_column_helper ($row, array &$options)
 
 tpps_parse_file_helper ($row, array &$options)
 
 tpps_rename_file ($fid, $new_name, array $options=array())
 
 tpps_rmdir ($dir)
 
 tpps_save_file_columns (array &$form, array &$form_state)
 
 tpps_xlsx_generator ($location, array $options=array())
 
 tpps_xlsx_get_cell_value (&$reader, $cell_type, array &$strings=array())
 
 tpps_xlsx_get_dimension ($location)
 
 tpps_xlsx_get_row (&$reader, array &$strings=array(), $plain=TRUE, $columns=NULL)
 
 tpps_xlsx_get_rows (array &$readers, array &$strings=array(), $plain=TRUE, $columns=NULL)
 
 tpps_xlsx_get_strings ($strings_location)
 
 tpps_xlsx_translate_date ($date)
 
 tpps_xlsx_width ($location)
 

Detailed Description

Defines useful functions for file management.

Definition in file file_utils.inc.

Function Documentation

◆ tpps_compare_files()

tpps_compare_files (   $fid_1,
  $fid_2,
  $file_1_id_name,
  $file_2_id_name,
  $file_1_no_header = FALSE,
  $file_2_no_header = FALSE 
)

Compares two file columns.

Returns the array of items that are found in file 1, column file_1_id_name, and are not found in file 2, column file_2_id_name.

Parameters
int$fid_1File 1 identifier.
int$fid_2File 2 identifier.
mixed$file_1_id_nameName of column to check in file 1. String or integer.
mixed$file_2_id_nameName of column to check in file 2. String or integer.
Returns
array The array of items found in file 1 that were not found in file 2.

Definition at line 379 of file file_utils.inc.

379  {
380  $missing = array();
381  $file_1_content = array_unique(tpps_parse_file_column($fid_1, $file_1_id_name, $file_1_no_header));
382  $file_2_content = array_unique(tpps_parse_file_column($fid_2, $file_2_id_name, $file_2_no_header));
383  asort($file_1_content);
384  asort($file_2_content);
385  reset($file_1_content);
386  reset($file_2_content);
387 
388  while (current($file_1_content) !== FALSE and current($file_2_content) !== FALSE) {
389  if (current($file_1_content) < current($file_2_content)) {
390  $missing[] = current($file_1_content);
391  next($file_1_content);
392  continue;
393  }
394  elseif (current($file_1_content) > current($file_2_content)) {
395  next($file_2_content);
396  continue;
397  }
398  next($file_1_content);
399  next($file_2_content);
400  continue;
401  }
402 
403  while (current($file_1_content) !== FALSE) {
404  $missing[] = current($file_1_content);
405  next($file_1_content);
406  }
407  return $missing;
408 }
tpps_parse_file_column($fid, $column, $no_header=FALSE)
Definition: file_utils.inc:334

◆ tpps_convert_colname()

tpps_convert_colname (   $column)

This function will convert a hexadecimal column id to an integer.

Here are a few examples of column keys and their converted integers: echo tpps_convert_colname('A'); // 0 echo tpps_convert_colname('B'); // 1 echo tpps_convert_colname('AA'); // 26 echo tpps_convert_colname('AB'); // 27 echo tpps_convert_colname('BA'); // 52

Parameters
string$columnThe capital alphabetical key for the column.
Returns
int The integer conversion of the column key.

Definition at line 251 of file file_utils.inc.

251  {
252  $total = 0;
253  $base_16 = unpack('H*', 'A')[1];
254  $base = base_convert($base_16, 16, 10) - 1;
255  for ($i = 0; $i < strlen($column); $i++) {
256  $total *= 26;
257  $num_16 = unpack('H*', $column[$i])[1];
258  $total += base_convert($num_16, 16, 10) - $base;
259  }
260  return $total - 1;
261 }

◆ tpps_file_generator()

tpps_file_generator (   $fid,
array  $options = array() 
)

This function selects the appropriate generator for a file.

The generator is selected based on the file extension. If the extension is not recognized, throws an exception.

Parameters
int$fidThe Drupal managed file id for the file.
array$optionsAdditional options to be passed to the generator.
Returns
Generator|array The generator for the file.

Definition at line 1074 of file file_utils.inc.

1074  {
1075  $file = file_load($fid);
1076  $location = tpps_get_location($file->uri);
1077  $extension = tpps_get_path_extension($location);
1078  switch ($extension) {
1079  case 'xlsx':
1080  return tpps_xlsx_generator($location, $options);
1081 
1082  case 'txt':
1083  case 'csv':
1084  case 'vcf':
1085  case 'gz':
1086  return tpps_flat_generator($location, $options);
1087 
1088  default:
1089  if (empty($extension)) {
1090  $extension = "NULL";
1091  }
1092  $msg = "Unrecognized file type: $extension";
1093  throw new Exception($msg);
1094  }
1095 }
tpps_flat_generator($location, array $options=array())
tpps_get_path_extension($path)
Definition: file_utils.inc:661
tpps_xlsx_generator($location, array $options=array())
tpps_get_location($location)
Definition: file_utils.inc:640

◆ tpps_file_headers()

tpps_file_headers (   $fid,
  $no_header = FALSE 
)

Returns the headers of a TPPS file.

Parameters
int$fidThe Drupal managed file id of the file.
bool$no_headerWhether or not the no_header option has been set for the file.
Returns
array An array of headers for the file.

Definition at line 972 of file file_utils.inc.

972  {
973  $headers = array();
974  if ($no_header) {
975  $hex = unpack('H*', 'A')[1];
976  $width = tpps_file_width($fid);
977  for ($i = 0; $i < $width; $i++) {
978  $key = pack('H*', $hex);
979  $headers[$key] = $i;
980  $hex = tpps_increment_hex($hex);
981  }
982  return $headers;
983  }
984 
985  $content = array();
986  $options = array(
987  'no_header' => TRUE,
988  'max_rows' => 1,
989  'content' => &$content,
990  );
991  tpps_file_iterator($fid, 'tpps_parse_file_helper', $options);
992  return current($content);
993 }
tpps_file_width($fid)
Definition: file_utils.inc:115
tpps_increment_hex($hex)
Definition: file_utils.inc:216
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_file_iterator()

tpps_file_iterator (   $fid,
  $function,
array &  $options = array() 
)

Iterates over a file and applies a function to each generator result.

This function should be used in place of tpps_parse_file() wherever possible, as it uses much less memory and is much faster.

Parameters
int$fidThe Drupal managed file id of the file.
string$functionThe funciton to be applied to each item returned by the generator.
array$optionsOptions that will be passed to both $function and the generator.
Returns
bool Returns TRUE on success, otherwise FALSE.

Definition at line 1011 of file file_utils.inc.

1011  {
1012  if (!function_exists($function)) {
1013  return FALSE;
1014  }
1015 
1016  if (!empty($options['job'])) {
1017  $options['job']->setTotalItems(tpps_file_len($fid));
1018  $options['job']->setItemsHandled(0);
1019  }
1020 
1021  $file = file_load($fid);
1022  $file_location = tpps_get_location($file->uri);
1023 
1024  foreach (tpps_file_generator($fid, $options) as $item) {
1025  // Assume the item IS NOT EMPTY
1026  $item_is_not_empty = true;
1027  if(is_array($item)) {
1028  // Check each key value
1029  $values_count = count($item);
1030  $values_empty_count = 0;
1031  foreach($item as $key => $value) {
1032  if($value == '') {
1033  $values_empty_count = $values_empty_count + 1;
1034  }
1035  }
1036  // Check if $values_count == $values_empty_count (this means item is empty)
1037  if ($values_count == $values_empty_count) {
1038  $item_is_not_empty = false;
1039  }
1040  }
1041 
1042  // if the item is not empty, proceed to process the line
1043  if ($item_is_not_empty) {
1044  $function($item, $options);
1045  if (!empty($options['job'])) {
1046  $options['job']->addItemsHandled(1);
1047  }
1048  }
1049  else {
1050  // print_r($item);
1051  echo json_encode($item) . "\n";
1052  echo "[TPPS FILE ITERATOR] Found a line/item that was empty, ignoring this item\n";
1053  echo "[TPPS FILE ITERATOR] File location: " . $file_location . "\n";
1054  // throw new Exception('DEBUG STOP - FOUND AN EMPTY LINE');
1055  }
1056  }
1057  return TRUE;
1058 }
tpps_file_len($fid)
Definition: file_utils.inc:65
tpps_get_location($location)
Definition: file_utils.inc:640
tpps_file_generator($fid, array $options=array())

◆ tpps_file_len()

tpps_file_len (   $fid)

Returns the number of lines or rows in a file.

Parameters
int$fidThe Drupal managed file identifier of the file.
Returns
int The number of lines or rows in the file.

Definition at line 65 of file file_utils.inc.

65  {
66  $file = file_load($fid);
67  $location = tpps_get_location($file->uri);
68  $extension = tpps_get_path_extension($location);
69  $count = 0;
70  $options = array(
71  'count' => &$count,
72  );
73  if ($extension == 'vcf') {
74  $options['skip_prefix'] = '#';
75  }
76  tpps_file_iterator($fid, 'tpps_file_len_helper', $options);
77  return $count;
78 }
tpps_get_path_extension($path)
Definition: file_utils.inc:661
tpps_get_location($location)
Definition: file_utils.inc:640
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_file_len_helper()

tpps_file_len_helper (   $row,
array &  $options = array() 
)

This is the helper function for tpps_file_len().

This function is passed to the tpps_file_iterator during tpps_file_len, and increments the count attribute of the options array. It also checks whether a skip_prefix was provided, and skips lines if necessary.

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 92 of file file_utils.inc.

92  {
93  if (empty($options['skip_prefix'])) {
94  $options['count']++;
95  }
96  else {
97  if (substr(current($row), 0, strlen($options['skip_prefix'])) != $options['skip_prefix']) {
98  $options['count']++;
99  }
100  }
101 }

◆ tpps_file_validate_columns()

tpps_file_validate_columns ( array &  $form_state,
array  $required_groups,
array  $file_element 
)

This function validates that the file contains all of the required groups.

A "required group" is a column type that must be present within the file for the file to be considered valid. For instance, a required group for a plant accession file would be the "Location" group. The required groups array should satisfy the structure: array( '<group name>="">' => array( '<type>' => array(<column option="" index>="">[, <column option="" index>="">...])[, '<type>' => array(<column option="" index>="">[, <column option="" index>="">...])...] )[, '<group name>="">' => array( '<type>' => array(<column option="" index>="">[, <column option="" index>="">...])[, '<type>' => array(<column option="" index>="">[, <column option="" index>="">...])...] )...] ) where <group name>=""> is the name of the required group, <type> is the name of the set of options that could satisfy the required group, and each of the <column option="" index>="">s is a key within the file element['column-options'] field.

Parameters
array$form_stateThe state of the form that the file is part of.
array$required_groupsThe required column option groups for the file.
array$file_elementThe managed_file element of the file within the form.
Returns
array The groups and the column option types that the file was valid for.

Definition at line 442 of file file_utils.inc.

442  {
443  $cols = $file_element['#value']['columns'];
444 
445  $parents = $file_element['#parents'];
446  $new_end_columns = end($parents) . "-columns";
447  $new_end_no_header = end($parents) . "-no-header";
448  $new_end_empty = end($parents) . "-empty";
449  $group_path = array_pop($parents) . "-groups";
450  $values = drupal_array_get_nested_value($form_state['values'], $parents);
451  // Initialize form column values in form state.
452  $values[$new_end_columns] = array();
453  // Hold onto the location of the columns in form state.
454  $state_column_values = &$values[$new_end_columns];
455  $values[$new_end_no_header] = isset($file_element['#value']['no-header']) ? $file_element['#value']['no-header'] : NULL;
456  $values[$new_end_empty] = isset($file_element['#value']['empty']) ? $file_element['#value']['empty'] : NULL;
457 
458  $title_parts = explode(':', $file_element['#title']);
459  $error_prompt = $title_parts[0];
460 
461  $groups = array();
462  $required_groups_flat = array();
463  foreach ($required_groups as $group => $combinations) {
464  $groups[$group] = array();
465  $required_groups_flat[$group] = array();
466  foreach ($combinations as $name => $combination) {
467  $required_groups_flat[$group] = array_merge($required_groups_flat[$group], $combination);
468  }
469  }
470 
471  foreach ($cols as $name => $type) {
472  $state_column_values[$name] = $type;
473  foreach ($required_groups_flat as $group => $types) {
474  if (in_array($type, $types)) {
475  if (!isset($groups[$group][$type])) {
476  $groups[$group][$type] = array($name);
477  }
478  else {
479  $groups[$group][$type][] = $name;
480  }
481  break;
482  }
483  }
484  }
485  // dpm($required_groups);
486  foreach ($required_groups as $group => $combinations) {
487  $group_valid = FALSE;
488  $groups[$group]['#type'] = array();
489  foreach ($combinations as $name => $combination) {
490  $combination_valid = TRUE;
491  foreach ($combination as $type) {
492  if (!isset($groups[$group][$type])) {
493  $combination_valid = FALSE;
494  break;
495  }
496  }
497  if ($combination_valid) {
498  $groups[$group]['#type'][] = $name;
499  $group_valid = TRUE;
500  }
501  }
502 
503  if (!$group_valid) {
504  form_set_error($file_element['#name'] . "[columns][$group", "$error_prompt: Please specify a column or columns that hold $group.");
505  }
506  }
507 
508  foreach ($groups as $key => $group) {
509  foreach ($group as $opt_num => $col_names) {
510  if (count($col_names) == 1) {
511  $groups[$key][$opt_num] = $col_names[0];
512  }
513  }
514  }
515 
516  $values[$group_path] = $groups;
517  drupal_array_set_nested_value($form_state['values'], $parents, $values);
518 
519  return $groups;
520 }

◆ tpps_file_width()

tpps_file_width (   $fid)

This function returns the width of a file.

The process of finding the width of the file is done with the use of helper functions, and this function returns the results.

Parameters
int$fidThe Drupal managed file identifier of the file to be parsed.
Returns
int The length of the longest line in the file.

Definition at line 115 of file file_utils.inc.

115  {
116  $file = file_load($fid);
117  $location = tpps_get_location($file->uri);
118  $extension = tpps_get_path_extension($location);
119  switch ($extension) {
120  case 'xlsx':
121  return tpps_xlsx_width($location);
122 
123  case 'txt':
124  case 'csv':
125  return tpps_flat_width($location);
126 
127  case 'vcf':
128  case 'gz':
129  return tpps_flat_width($location, "\t");
130 
131  default:
132  throw new Exception('Unrecognized file type');
133  }
134 }
tpps_flat_width($location, $delim=',')
Definition: file_utils.inc:179
tpps_get_path_extension($path)
Definition: file_utils.inc:661
tpps_xlsx_width($location)
Definition: file_utils.inc:145
tpps_get_location($location)
Definition: file_utils.inc:640

◆ tpps_flat_generator()

tpps_flat_generator (   $location,
array  $options = array() 
)

This function is s generator for flat files.

If the 'max_rows' option is set, the generator stop after max_rows items are yielded. If the 'columns' option is set, then each item yielded by the generator will contain only the specified columns from the row.

Parameters
string$locationThe location of the file on the server.
array$optionsAdditional options detailing how to parse the file.
Returns
Generator|array Yields each row of the file (or columns of a row, if applicable).

Definition at line 1234 of file file_utils.inc.

1234  {
1235  $original = ini_get('auto_detect_line_endings');
1236  if (!$original) {
1237  ini_set('auto_detect_line_endings', TRUE);
1238  }
1239 
1240  $no_header = $options['no_header'] ?? FALSE;
1241  $columns = $options['columns'] ?? NULL;
1242  $max_rows = $options['max_rows'] ?? NULL;
1243 
1244  $delim = ',';
1245  $longest = tpps_flat_width($location, $delim);
1246  $handle = gzopen($location, 'r');
1247 
1248  if (!$no_header) {
1249  gzgets($handle);
1250  }
1251  $count = 0;
1252 
1253  while (($line = gzgets($handle))) {
1254  $vals = str_getcsv($line, $delim);
1255  if (!empty($max_rows) and $count >= $max_rows) {
1256  break;
1257  }
1258  $count++;
1259 
1260  $values = array();
1261 
1262  if (empty($columns)) {
1263  $hex = unpack('H*', 'A')[1];
1264  for ($i = 0; $i < $longest; $i++) {
1265  $key = pack('H*', $hex);
1266  $values[$key] = isset($vals[$i]) ? trim($vals[$i]) : NULL;
1267  if (isset($values[$key]) and !check_plain($values[$key])) {
1268  $values[$key] = trim(mb_convert_encoding($values[$key], "UTF-8", "Windows-1252"));
1269  }
1270  $hex = tpps_increment_hex($hex);
1271  }
1272  yield $values;
1273  continue;
1274  }
1275 
1276  foreach ($columns as $column) {
1277  $values[$column] = $vals[tpps_convert_colname($column)] ?? NULL;
1278  }
1279 
1280  yield $values;
1281  }
1282 
1283  fclose($handle);
1284  ini_set('auto_detect_line_endings', $original);
1285 }
tpps_increment_hex($hex)
Definition: file_utils.inc:216
tpps_flat_width($location, $delim=',')
Definition: file_utils.inc:179
tpps_convert_colname($column)
Definition: file_utils.inc:251

◆ tpps_flat_width()

tpps_flat_width (   $location,
  $delim = ',' 
)

This function gets the length of the longest line of a flat file.

Parameters
string$locationThe location of the file.
string$delimThe delimeter for each line in the file. Defaults to ','.
Returns
int The length of the longest line.

Definition at line 179 of file file_utils.inc.

179  {
180  $original = ini_get('auto_detect_line_endings');
181  if (!$original) {
182  ini_set('auto_detect_line_endings', TRUE);
183  }
184 
185  $longest = 0;
186  $handle = gzopen($location, 'r');
187 
188  while (($line = gzgets($handle))) {
189  $line = str_getcsv($line, $delim);
190  $longest = max($longest, count($line));
191  }
192 
193  fclose($handle);
194  ini_set('auto_detect_line_endings', $original);
195  return $longest;
196 }

◆ tpps_get_archive_files()

tpps_get_archive_files (   $archive)

Extracts an archive from a path or Drupal file object.

Parameters
mixed$archiveThe archive to be extracted. Can be a path or Drupal file object.
Returns
array Array of file names. NULL if extension is not a valid compression extension.

Definition at line 911 of file file_utils.inc.

911  {
912  $files = array();
913  if (is_object($archive) and !empty($archive->uri)) {
914  $archive = $archive->uri;
915  }
916  $loc = drupal_realpath($archive);
917 
918  $ext = tpps_get_path_extension($loc);
919  $unzip_dir = drupal_realpath('temporary://tpps_tmp');
920  if (is_dir($unzip_dir)) {
921  tpps_rmdir($unzip_dir);
922  }
923 
924  if (is_dir($unzip_dir) or mkdir($unzip_dir)) {
925  switch ($ext) {
926  case 'zip':
927  $zip = new \ZipArchive();
928  $zip->open($loc);
929  $zip->extractTo($unzip_dir);
930  break;
931 
932  case 'gz':
933  $zip = new \Archive_Tar($loc, 'gz');
934  $zip->extract($unzip_dir);
935  break;
936 
937  case 'tar':
938  $zip = new \Archive_Tar($loc);
939  $zip->extract($unzip_dir);
940  break;
941 
942  default:
943  return array();
944  }
945 
946  $dir = $unzip_dir;
947  $files = scandir($dir);
948  if ($files and count($files) == 3 and is_dir($dir . '/' . $files[2])) {
949  $dir .= '/' . $files[2];
950  $files = scandir($dir);
951  }
952  }
953 
954  foreach ($files as $key => $name) {
955  $files[$key] = $dir . '/' . $name;
956  }
957 
958  return $files;
959 }
tpps_get_path_extension($path)
Definition: file_utils.inc:661
tpps_rmdir($dir)
Definition: file_utils.inc:884

◆ tpps_get_location()

tpps_get_location (   $location)

Gets true file path from a uri or Drupal file stream.

If the file is compressed, extracts the archive and returns the path of the extracted file.

Parameters
string$locationThe uri or file stream of the file we want the location of.
Returns
string The true path of the file.

Definition at line 640 of file file_utils.inc.

640  {
641  $location = drupal_realpath($location);
642  $extension = tpps_get_path_extension($location);
643  if ($extension == 'zip' or $extension == 'tar') {
644  $files = tpps_get_archive_files($location);
645  if ($files and (count($files) == 3 or (count($files) == 4 and substr($files[3], -9) === '/__MACOSX')) and !is_dir($files[2])) {
646  $location = $files[2];
647  }
648  }
649  return $location;
650 }
tpps_get_archive_files($archive)
Definition: file_utils.inc:911
tpps_get_path_extension($path)
Definition: file_utils.inc:661

◆ tpps_get_path_extension()

tpps_get_path_extension (   $path)

Gets the file extension from a path.

Parameters
string$pathThe path of the file we want the extension of.
Returns
string The extension of that file.

Definition at line 661 of file file_utils.inc.

661  {
662  preg_match('/\.([a-zA-Z0-9]*)$/', $path, $matches);
663 
664  if(count($matches) == 0) {
665  return ""; // Added by Rish during debugging process. XLSX extracting creates files without paths
666  }
667  return strtolower($matches[1]);
668 
669 }

◆ tpps_increment_hex()

tpps_increment_hex (   $hex)

This function increments a hexidecimal key.

The function expects a hexidecimal encoding of a capital alphabetical key. For example, you might pass "$hex = unpack('H*', 'A')[1];". This function is intended to be used for parsed file keys. Here are some examples: echo pack('H*', tpps_increment_hex(unpack('H*', 'A')[1])); // B echo pack('H*', tpps_increment_hex(unpack('H*', 'AA')[1])); // AB echo pack('H*', tpps_increment_hex(unpack('H*', 'Z')[1])); // AA echo pack('H*', tpps_increment_hex(unpack('H*', 'AZ')[1])); // BA echo pack('H*', tpps_increment_hex(unpack('H*', 'ZZZ')[1])); // AAAA.

Parameters
string$hexThe hexidecimal encoding of a capital alphabetical key.
Returns
string The encoding of the next capital alphabetical key.

Definition at line 216 of file file_utils.inc.

216  {
217  $add = 1;
218  $new = "";
219  for ($i = strlen(pack('H*', $hex)) - 1; $i >= 0; $i--) {
220  $num = base_convert(substr($hex, (2 * $i), 2), 16, 10) + $add;
221  if ($num > 90) {
222  $num = 65;
223  }
224  else {
225  $add = 0;
226  }
227  $new = base_convert($num, 10, 16) . $new;
228  if ($i == 0 and $add) {
229  $new = '41' . $new;
230  }
231  }
232  return $new;
233 }

◆ tpps_parse_file()

tpps_parse_file (   $fid,
  $max_rows = 0,
  $no_header = FALSE,
  $columns = NULL 
)

This function parses a file and returns an array of its content.

The process of actually parsing the file is done with the use of helper functions, and this function returns the results.

Parameters
int$fidThe Drupal managed file identifier of the file to be parsed.
int$max_rowsThe maximum number of rows to read from the file.
bool$no_headerWhether or not the file has a "no_header" flag set.
Returns
array An array containing the content headers and content body.

Definition at line 25 of file file_utils.inc.

25  {
26  $content = array();
27  $options = array(
28  'no_header' => $no_header,
29  'columns' => $columns,
30  'max_rows' => $max_rows,
31  'content' => &$content,
32  );
33 
34  tpps_file_iterator($fid, 'tpps_parse_file_helper', $options);
35  $content['headers'] = tpps_file_headers($fid, $no_header);
36  return $content;
37 }
tpps_file_headers($fid, $no_header=FALSE)
Definition: file_utils.inc:972
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_parse_file_column()

tpps_parse_file_column (   $fid,
  $column,
  $no_header = FALSE 
)

Parses a single column from a file.

Parameters
int$fidThe Drupal managed file identifier of the file to be parsed.
string$columnThe alphabetical key of the column to be parsed.
bool$no_headerWhether or not the no_header options has been set to true for the file.
Returns
array An array of values found in the parsed column.

Definition at line 334 of file file_utils.inc.

334  {
335  $content = array();
336  $options = array(
337  'no_header' => $no_header,
338  'columns' => array($column),
339  'content' => &$content,
340  );
341  tpps_file_iterator($fid, 'tpps_parse_file_column_helper', $options);
342  return $content;
343 }
tpps_file_iterator($fid, $function, array &$options=array())

◆ tpps_parse_file_column_helper()

tpps_parse_file_column_helper (   $row,
array &  $options 
)

This function processes a single row of a file.

This function populates the content attribute of the options array with the data in a single column. This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 357 of file file_utils.inc.

357  {
358  $options['content'][] = $row[current($options['columns'])];
359 }

◆ tpps_parse_file_helper()

tpps_parse_file_helper (   $row,
array &  $options 
)

This function processes a single row of a file.

This function populates the content attribute of the options array with content returned by the TPPS file generator. It is used by both the tpps_parse_file and the tpps_file_headers functions. This function is meant to be used with tpps_file_iterator().

Parameters
mixed$rowThe item yielded by the TPPS file generator.
array$optionsAdditional options set when calling tpps_file_iterator().

Definition at line 52 of file file_utils.inc.

52  {
53  $options['content'][] = $row;
54 }

◆ tpps_rename_file()

tpps_rename_file (   $fid,
  $new_name,
array  $options = array() 
)

Renames a Drupal managed file.

If the "preserve" option is set to true, then the new file will be created as a copy of the old one. Otherwise, the old file will be removed.

Parameters
int$fidThe Drupal managed file identifier of the file to be renamed.
string$new_nameThe new name of the file.
array$optionsAdditional options to be used when renaming the file.
Returns
object The new Drupal managed file object.

Definition at line 279 of file file_utils.inc.

279  {
280  if (!array_key_exists('preserve', $options)) {
281  $options['preserve'] = FALSE;
282  }
283 
284  if (!empty($fid) and ($file = file_load($fid))) {
285  $new_filename = $new_name . "." . tpps_get_path_extension($file->uri);
286  if (!preg_match('/^(.*\/)(.*)$/', $new_name, $matches)) {
287  preg_match('/^(.*\/).*$/', $file->uri, $matches);
288  $new_filename = $matches[1] . $new_filename;
289  }
290 
291  if ($options['preserve']) {
292  $file->status = FILE_STATUS_PERMANENT;
293  file_save($file);
294  // Check if file already exists then return that file.
295  $result = db_query('SELECT f.fid
296  FROM {file_managed} f WHERE f.uri = :uri', array(':uri' => $new_filename));
297  $record = $result->fetchObject();
298  if(isset($record)) {
299  return file_load($record->fid);
300  }
301  return file_copy($file, $new_filename, FILE_EXISTS_RENAME);
302  }
303  // Check if file already exists then return that file.
304  $result = db_query('SELECT f.fid
305  FROM {file_managed} f WHERE f.uri = :uri', array(':uri' => $new_filename));
306  $record = $result->fetchObject();
307 
308  if(isset($record->fid)) {
309  // print_r("FID:" . $record->fid . "\n");
310  return file_load($record->fid);
311  }
312  $file_object = file_move($file, $new_filename, FILE_EXISTS_RENAME);
313  // print_r("FILE OBJECT:\n");
314  // print_r($file_object);
315  // print_r("\n");
316  return $file_object;
317  }
318  throw new Exception("Error: could not open file");
319 }
tpps_get_path_extension($path)
Definition: file_utils.inc:661

◆ tpps_rmdir()

tpps_rmdir (   $dir)

Recursively removes a directory.

We want to clean out the temporary tpps_xlsx directory when we are done with an excel file, so we need to recursively remove all of the links and sub- directories. Use this function with caution.

Parameters
string$dirThe directory to be removed.

Definition at line 884 of file file_utils.inc.

884  {
885  if (is_dir($dir)) {
886  $children = scandir($dir);
887  foreach ($children as $child) {
888  if ($child != '.' and $child != '..') {
889  if (is_dir($dir . '/' . $child) and !is_link($dir . '/' . $child)) {
890  tpps_rmdir($dir . '/' . $child);
891  }
892  else {
893  unlink($dir . '/' . $child);
894  }
895  }
896  }
897  rmdir($dir);
898  }
899 }
tpps_rmdir($dir)
Definition: file_utils.inc:884

◆ tpps_save_file_columns()

tpps_save_file_columns ( array &  $form,
array &  $form_state 
)

Saves file column selections to the values of the form_state.

This function is called before the form is validated so that the column options can be saved whenever the user makes a selection.

Parameters
array$formThe form being validated.
array$form_stateThe state of the form being validated.

Definition at line 533 of file file_utils.inc.

533  {
534 
535  if ($form_state['stage'] == TPPS_PAGE_3) {
536  for ($i = 1; $i <= $form_state['stats']['species_count']; $i++) {
537  if (($file = file_load($form_state['values']['tree-accession']["species-$i"]['file'])) and $form_state['values']['tree-accession']["species-$i"]['file'] != 0) {
538  file_usage_add($file, 'tpps', 'tpps_project', substr($form_state['accession'], 4));
539  }
540 
541  $form_state['values']['tree-accession']["species-$i"]['file-columns'] = array();
542 
543  if (isset($form['tree-accession']["species-$i"]['file']['#value']['columns'])) {
544  foreach ($form['tree-accession']["species-$i"]['file']['#value']['columns'] as $col => $val) {
545  if ($col[0] != '#') {
546  $form_state['values']['tree-accession']["species-$i"]['file-columns'][$col] = $form['tree-accession']["species-$i"]['file']['#value']['columns'][$col];
547  }
548  }
549  }
550 
551  if (empty($form_state['values']['tree-accession']['check'])) {
552  break;
553  }
554  }
555  }
556  elseif ($form_state['stage'] == TPPS_PAGE_4) {
557  if (isset($form['organism-1']['genotype'])) {
558  for ($i = 1; $i <= $form_state['saved_values'][TPPS_PAGE_1]['organism']['number']; $i++) {
559 
560  if (!empty($form_state['values']["organism-$i"]['genotype']['files']['snps-assay']) and ($file = file_load($form_state['values']["organism-$i"]['genotype']['files']['snps-assay']))) {
561  file_usage_add($file, 'tpps', 'tpps_project', substr($form_state['accession'], 4));
562 
563  $form_state['values']["organism-$i"]['genotype']['files']['snps-assay-columns'] = array();
564 
565  if (isset($form["organism-$i"]['genotype']['files']['snps-assay']['#value']['columns'])) {
566  foreach ($form["organism-$i"]['genotype']['files']['snps-assay']['#value']['columns'] as $col => $val) {
567  if ($col[0] != '#') {
568  $form_state['values']["organism-$i"]['genotype']['files']['snps-assay-columns'][$col] = $form["organism-$i"]['genotype']['files']['snps-assay']['#value']['columns'][$col];
569  }
570  }
571  }
572  }
573 
574  if (!empty($form_state['values']["organism-$i"]['genotype']['files']['other']) and ($file = file_load($form_state['values']["organism-$i"]['genotype']['files']['other']))) {
575  file_usage_add($file, 'tpps', 'tpps_project', substr($form_state['accession'], 4));
576 
577  $form_state['values']["organism-$i"]['genotype']['files']['other-columns'] = array();
578 
579  if (isset($form["organism-$i"]['genotype']['files']['other']['#value']['columns'])) {
580  foreach ($form["organism-$i"]['genotype']['files']['other']['#value']['columns'] as $col => $val) {
581  if ($col[0] != '#') {
582  $form_state['values']["organism-$i"]['genotype']['files']['other-columns'][$col] = $form["organism-$i"]['genotype']['files']['other']['#value']['columns'][$col];
583  }
584  }
585  }
586  }
587  }
588  }
589 
590  if (isset($form['organism-1']['phenotype'])) {
591  for ($i = 1; $i <= $form_state['saved_values'][TPPS_PAGE_1]['organism']['number']; $i++) {
592  if (!empty($form_state['values']["organism-$i"]['phenotype']['normal-check'])) {
593  if (($file = file_load($form_state['values']["organism-$i"]['phenotype']['file'])) and $form_state['values']["organism-$i"]['phenotype']['file'] != 0) {
594  file_usage_add($file, 'tpps', 'tpps_project', substr($form_state['accession'], 4));
595  }
596 
597  $form_state['values']["organism-$i"]['phenotype']['file-columns'] = array();
598 
599  if (isset($form["organism-$i"]['phenotype']['file']['#value']['columns'])) {
600  foreach ($form["organism-$i"]['phenotype']['file']['#value']['columns'] as $col => $val) {
601  if ($col[0] != '#') {
602  $form_state['values']["organism-$i"]['phenotype']['file-columns'][$col] = $form["organism-$i"]['phenotype']['file']['#value']['columns'][$col];
603  }
604  }
605  }
606 
607  if ($form_state['values']["organism-$i"]['phenotype']['check'] != '0') {
608  if (($file = file_load($form_state['values']["organism-$i"]['phenotype']['metadata'])) and $form_state['values']["organism-$i"]['phenotype']['metadata'] != 0) {
609  file_usage_add($file, 'tpps', 'tpps_project', substr($form_state['accession'], 4));
610  }
611 
612  $form_state['values']["organism-$i"]['phenotype']['metadata-columns'] = array();
613 
614  if (isset($form["organism-$i"]['phenotype']['metadata']['#value']['columns'])) {
615  foreach ($form["organism-$i"]['phenotype']['metadata']['#value']['columns'] as $col => $val) {
616  if ($col[0] != '#') {
617  $form_state['values']["organism-$i"]['phenotype']['metadata-columns'][$col] = $form["organism-$i"]['phenotype']['metadata']['#value']['columns'][$col];
618  }
619  }
620  }
621  }
622  }
623  }
624  }
625  }
626 }
const TPPS_PAGE_1
Definition: tpps.module:12
const TPPS_PAGE_4
Definition: tpps.module:15
const TPPS_PAGE_3
Definition: tpps.module:14

◆ tpps_xlsx_generator()

tpps_xlsx_generator (   $location,
array  $options = array() 
)

This function is a generator for xlsx files.

If the 'max_rows' option is set, the generator stop after max_rows items are yielded. If the 'columns' option is set, then each item yielded by the generator will contain only the specified columns from the row.

Parameters
string$locationThe location of the file on the server.
array$optionsAdditional options detailing how to parse the file.
Returns
Generator|array Yields each row of the file (or columns of a row, if applicable).

Definition at line 1112 of file file_utils.inc.

1112  {
1113  $dir = drupal_realpath(TPPS_TEMP_XLSX);
1114  $no_header = $options['no_header'] ?? FALSE;
1115  $columns = $options['columns'] ?? NULL;
1116  $max_rows = $options['max_rows'] ?? NULL;
1117 
1118  if (!empty($columns)) {
1119  $new_columns = array();
1120  foreach ($columns as $col) {
1121  $new_columns[$col] = $col;
1122  }
1123  $columns = $new_columns;
1124  }
1125 
1126  $zip = new ZipArchive();
1127  $zip->open($location);
1128  $zip->extractTo($dir);
1129 
1130  $strings_location = $dir . '/xl/sharedStrings.xml';
1131 
1132  // Find all worksheets.
1133  $sheets = scandir($dir . '/xl/worksheets');
1134  unset($sheets[array_search('.', $sheets)]);
1135  unset($sheets[array_search('..', $sheets)]);
1136 
1137  $strings = tpps_xlsx_get_strings($strings_location);
1138 
1139  // Get dimensions, readers.
1140  $first_left = NULL;
1141  $last_right = NULL;
1142  $dims = array();
1143  $readers = array();
1144  foreach ($sheets as $sheet) {
1145  // dpm('SHEET location: ' . $dir . '/xl/worksheets/' . $sheet);
1146  if (tpps_get_path_extension($dir . '/xl/worksheets/' . $sheet) !== 'xml') {
1147  continue;
1148  }
1149  $loc = $dir . '/xl/worksheets/' . $sheet;
1150  $dimension = tpps_xlsx_get_dimension($loc);
1151  // dpm($loc);
1152  // dpm($dimension);
1153  preg_match('/([A-Z]+)[0-9]+:([A-Z]+)[0-9]+/', $dimension, $matches);
1154  // dpm($matches);
1155  if(count($matches) < 3) {
1156  // The dimensions are invalid (probably an empty worksheet)
1157  }
1158  else {
1159  // Matches found dimensions, continue processing this as a valid sheet
1160  $left_hex = unpack('H*', $matches[1]);
1161  $hex = $left_hex[1];
1162  $right_hex = unpack('H*', $matches[2]);
1163  if ($first_left == NULL) {
1164  $first_left = $left_hex[1];
1165  $last_right = $left_hex[1] - 1;
1166  }
1167  while (base_convert($hex, 16, 10) <= base_convert($right_hex[1], 16, 10)) {
1168  $hex = tpps_increment_hex($hex);
1169  $last_right = tpps_increment_hex($last_right);
1170  }
1171  $dims[] = array($left_hex[1], $right_hex[1]);
1172  $reader = new XMLReader();
1173  $reader->open($loc);
1174  $readers[] = $reader;
1175  }
1176  }
1177 
1178  // If the file has a header row, skip it.
1179  if (!$no_header) {
1180  tpps_xlsx_get_rows($readers, $strings);
1181  }
1182 
1183  // Iterate through file.
1184  $count = 0;
1185  while (($rows = tpps_xlsx_get_rows($readers, $strings, TRUE, $columns))) {
1186  if (!empty($max_rows) and $count >= $max_rows) {
1187  break;
1188  }
1189  $count++;
1190 
1191  $values = array();
1192  $key_hex = $first_left;
1193  foreach ($readers as $idx => $read) {
1194  $row = $rows[$idx];
1195  if (!empty($row)) {
1196  ksort($row);
1197  }
1198  $hex = $dims[$idx][0];
1199  while (base_convert($hex, 16, 10) <= base_convert($dims[$idx][1], 16, 10)) {
1200  $key = pack('H*', $key_hex);
1201  $row_key = pack('H*', $hex);
1202  if (empty($columns) or array_search($key, $columns) !== FALSE) {
1203  $values[$key] = isset($row[$row_key]) ? trim($row[$row_key]) : NULL;
1204  }
1205  $hex = tpps_increment_hex($hex);
1206  $key_hex = tpps_increment_hex($key_hex);
1207  }
1208  }
1209  yield $values;
1210  }
1211 
1212  // Close readers.
1213  foreach ($readers as $reader) {
1214  $reader->close();
1215  }
1216  tpps_rmdir($dir);
1217 }
tpps_xlsx_get_rows(array &$readers, array &$strings=array(), $plain=TRUE, $columns=NULL)
Definition: file_utils.inc:714
tpps_increment_hex($hex)
Definition: file_utils.inc:216
const TPPS_TEMP_XLSX
Definition: tpps.module:16
tpps_xlsx_get_strings($strings_location)
Definition: file_utils.inc:853
tpps_get_path_extension($path)
Definition: file_utils.inc:661
tpps_xlsx_get_dimension($location)
Definition: file_utils.inc:682
tpps_rmdir($dir)
Definition: file_utils.inc:884

◆ tpps_xlsx_get_cell_value()

tpps_xlsx_get_cell_value ( $reader,
  $cell_type,
array &  $strings = array() 
)

Returns a cell value from an XML file.

This function recieves an XMLReader object, a cell type attribute, and a location for a strings XML file, and returns the value of the current cell. If the cell type is a string, then the function returns the string from the strings file with the matching string id. Otherwise, it returns the inner XML from the 'v' element within the cell. If no 'v' element can be found before the end of the cell, it returns NULL.

Parameters
object$readerThe XMLReader object being used to read the XML file.
string$cell_typeThe 't' attribute of the current cell.
array$stringsThe cached strings for the current XML file.
Returns
mixed Either the value in the 'v' element of the cell or the associated string.

Definition at line 827 of file file_utils.inc.

827  {
828  if ($reader->isEmptyElement) {
829  return NULL;
830  }
831 
832  while (!($reader->nodeType == XMLReader::END_ELEMENT and ($reader->name == 'c' or $reader->name == 'row')) and $reader->read()) {
833  if ($reader->nodeType == XMLReader::ELEMENT and $reader->name == 'v') {
834  $value = $reader->readInnerXml();
835  if (!empty($cell_type) and $cell_type == 's') {
836  $value = $strings[$value];
837  }
838  return $value;
839  }
840  }
841  return NULL;
842 }

◆ tpps_xlsx_get_dimension()

tpps_xlsx_get_dimension (   $location)

Returns the dimension string of an xlsx file.

If the dimension string cannot be found, returns NULL.

Parameters
string$locationThe location of the file.
Returns
mixed The dimension string of the file, or NULL if it cannot be found.

Definition at line 682 of file file_utils.inc.

682  {
683  $reader = new XMLReader();
684  $reader->open($location);
685  while ($reader->read()) {
686  if ($reader->nodeType == XMLReader::ELEMENT and $reader->name == 'dimension') {
687  $dim = $reader->getAttribute('ref');
688  $reader->close();
689  return $dim;
690  }
691  }
692  return NULL;
693 }

◆ tpps_xlsx_get_row()

tpps_xlsx_get_row ( $reader,
array &  $strings = array(),
  $plain = TRUE,
  $columns = NULL 
)

Retrieves a single "row" from an XML file.

This function recieves an XMLReader object and a location for a strings XML file, and returns an array of cell values. If the $plain parameter is set to FALSE, then the function will return TRUE when the XMLReader cursor is on the next row element. If no row element can be found, the function returns FALSE.

Parameters
object$readerThe XMLReader object being used to read the XML file.
array$stringsThe cached strings for the current XML file.
bool$plainWhether or not to attempt to return an array of cell values, or just TRUE.
mixed$columnsAn array of columns to return. If NULL, return all columns.
Returns
mixed If $plain is set to TRUE, an array of cell values. Otherwise TRUE.

Definition at line 755 of file file_utils.inc.

755  {
756  while ($reader->read()) {
757  if ($reader->nodeType == XMLReader::ELEMENT and $reader->name == 'row') {
758  if ($plain) {
759  $cells = array();
760  while (!($reader->nodeType == XMLReader::END_ELEMENT and $reader->name == 'row') and $reader->read()) {
761  if ($reader->nodeType == XMLReader::ELEMENT and $reader->name == 'c') {
762  $cell_type = $reader->getAttribute('t');
763  preg_match('/([A-Z]+)([0-9]+)/', $reader->getAttribute('r'), $matches);
764  $cell_location = $matches[1];
765  if (!empty($columns) and !isset($columns[$cell_location])) {
766  continue;
767  }
768  $cells[$cell_location] = tpps_xlsx_get_cell_value($reader, $cell_type, $strings);
769  }
770  }
771  return $cells;
772  }
773  return TRUE;
774  }
775  }
776  return FALSE;
777 }
tpps_xlsx_get_cell_value(&$reader, $cell_type, array &$strings=array())
Definition: file_utils.inc:827

◆ tpps_xlsx_get_rows()

tpps_xlsx_get_rows ( array &  $readers,
array &  $strings = array(),
  $plain = TRUE,
  $columns = NULL 
)

Retrieves rows from array of readers.

This function recieves an array of XMLReader objects and an array of cached strings, and returns an array of rows produced by tpps_xlsx_get_row() from each reader.

Parameters
array$readersThe array of XMLReader objects.
array$stringsThe cached strings for the current XML file.
bool$plainWhether or not to attempt to return an array of cell values, or just TRUE.
mixed$columnsAn array of columns to return. If NULL, return all columns.
Returns
mixed If $plain is set to TRUE, an array of cell values. Otherwise TRUE.

Definition at line 714 of file file_utils.inc.

714  {
715  $rows = array();
716  $empty = TRUE;
717  foreach ($readers as $key => $r) {
718  $reader = &$readers[$key];
719  $row = tpps_xlsx_get_row($reader, $strings, $plain, $columns);
720  if ($row) {
721  $rows[$key] = $row;
722  $empty = FALSE;
723  }
724  else {
725  $rows[$key] = NULL;
726  }
727  }
728  if ($empty) {
729  return FALSE;
730  }
731  return $rows;
732 }
tpps_xlsx_get_row(&$reader, array &$strings=array(), $plain=TRUE, $columns=NULL)
Definition: file_utils.inc:755

◆ tpps_xlsx_get_strings()

tpps_xlsx_get_strings (   $strings_location)

Retrieves strings from an excel strings XML file.

Parameters
string$strings_locationThe location of the strings XML file.
Returns
array The strings from the strings XML file.

Definition at line 853 of file file_utils.inc.

853  {
854  $strings = array();
855  $reader = new XMLReader();
856  $reader->open($strings_location);
857  $count = 0;
858  while ($reader->read()) {
859  if ($reader->nodeType == XMLReader::ELEMENT and $reader->name == 'si') {
860  $string = "";
861 
862  while (!($reader->nodeType == XMLReader::END_ELEMENT and $reader->name == 'si') and $reader->read()) {
863  if ($reader->nodeType == XMLReader::ELEMENT and $reader->name == 't') {
864  $string .= $reader->readInnerXml();
865  }
866  }
867  $strings[$count++] = $string;
868  }
869  }
870  $reader->close();
871  return $strings;
872 }

◆ tpps_xlsx_translate_date()

tpps_xlsx_translate_date (   $date)

Translates the date from an .xlsx file.

This function may seem strange at first, but skips $date = 60 on purpose. Day 60 in Excel's encoding represents 2/29/1900, which is not a real date.

Parameters
mixed$dateThe date to be translated.
Returns
mixed The original date provided, a re-formated date, or NULL on failure.

Definition at line 791 of file file_utils.inc.

791  {
792  if (strtotime($date) !== FALSE) {
793  return $date;
794  }
795 
796  if ($date > 60) {
797  $date = $date - 1;
798  return date("m/d/Y", strtotime("12/31/1899 +$date days"));
799  }
800  if ($date < 60) {
801  return date("m/d/Y", strtotime("12/31/1899 +$date days"));
802  }
803 
804  return NULL;
805 }

◆ tpps_xlsx_width()

tpps_xlsx_width (   $location)

This function gets the length of the longest line of an xlsx file.

Parameters
string$locationThe location of the file.
Returns
int The length of the longest line.

Definition at line 145 of file file_utils.inc.

145  {
146  $dir = drupal_realpath(TPPS_TEMP_XLSX);
147 
148  $zip = new ZipArchive();
149  $zip->open($location);
150  $zip->extractTo($dir);
151 
152  $data_location = $dir . '/xl/worksheets/sheet1.xml';
153 
154  $dimension = tpps_xlsx_get_dimension($data_location);
155  preg_match('/([A-Z]+)[0-9]+:([A-Z]+)[0-9]+/', $dimension, $matches);
156  $left_hex = unpack('H*', $matches[1]);
157  $hex = $left_hex[1];
158  $right_hex = unpack('H*', $matches[2]);
159 
160  $len = 0;
161  while (base_convert($hex, 16, 10) <= base_convert($right_hex[1], 16, 10)) {
162  $len++;
163  $hex = tpps_increment_hex($hex);
164  }
165  return $len;
166 }
tpps_increment_hex($hex)
Definition: file_utils.inc:216
const TPPS_TEMP_XLSX
Definition: tpps.module:16
tpps_xlsx_get_dimension($location)
Definition: file_utils.inc:682