25 function tpps_parse_file($fid, $max_rows = 0, $no_header = FALSE, $columns = NULL) {
28 'no_header' => $no_header,
29 'columns' => $columns,
30 'max_rows' => $max_rows,
31 'content' => &$content,
53 $options[
'content'][] = $row;
66 $file = file_load($fid);
73 if ($extension ==
'vcf') {
74 $options[
'skip_prefix'] =
'#';
93 if (empty($options[
'skip_prefix'])) {
97 if (substr(current($row), 0, strlen($options[
'skip_prefix'])) != $options[
'skip_prefix']) {
116 $file = file_load($fid);
119 switch ($extension) {
132 throw new Exception(
'Unrecognized file type');
148 $zip =
new ZipArchive();
149 $zip->open($location);
150 $zip->extractTo($dir);
152 $data_location = $dir .
'/xl/worksheets/sheet1.xml';
155 preg_match(
'/([A-Z]+)[0-9]+:([A-Z]+)[0-9]+/', $dimension, $matches);
156 $left_hex = unpack(
'H*', $matches[1]);
158 $right_hex = unpack(
'H*', $matches[2]);
161 while (base_convert($hex, 16, 10) <= base_convert($right_hex[1], 16, 10)) {
180 $original = ini_get(
'auto_detect_line_endings');
182 ini_set(
'auto_detect_line_endings', TRUE);
186 $handle = gzopen($location,
'r');
188 while (($line = gzgets($handle))) {
189 $line = str_getcsv($line, $delim);
190 $longest = max($longest, count($line));
194 ini_set(
'auto_detect_line_endings', $original);
219 for ($i = strlen(pack(
'H*', $hex)) - 1; $i >= 0; $i--) {
220 $num = base_convert(substr($hex, (2 * $i), 2), 16, 10) + $add;
227 $new = base_convert($num, 10, 16) . $new;
228 if ($i == 0 and $add) {
253 $base_16 = unpack(
'H*',
'A')[1];
254 $base = base_convert($base_16, 16, 10) - 1;
255 for ($i = 0; $i < strlen($column); $i++) {
257 $num_16 = unpack(
'H*', $column[$i])[1];
258 $total += base_convert($num_16, 16, 10) - $base;
280 if (!array_key_exists(
'preserve', $options)) {
281 $options[
'preserve'] = FALSE;
284 if (!empty($fid) and ($file = file_load($fid))) {
286 if (!preg_match(
'/^(.*\/)(.*)$/', $new_name, $matches)) {
287 preg_match(
'/^(.*\/).*$/', $file->uri, $matches);
288 $new_filename = $matches[1] . $new_filename;
291 if ($options[
'preserve']) {
292 $file->status = FILE_STATUS_PERMANENT;
295 $result = db_query(
'SELECT f.fid 296 FROM {file_managed} f WHERE f.uri = :uri', array(
':uri' => $new_filename));
297 $record = $result->fetchObject();
299 return file_load($record->fid);
301 return file_copy($file, $new_filename, FILE_EXISTS_RENAME);
304 $result = db_query(
'SELECT f.fid 305 FROM {file_managed} f WHERE f.uri = :uri', array(
':uri' => $new_filename));
306 $record = $result->fetchObject();
308 if(isset($record->fid)) {
310 return file_load($record->fid);
312 $file_object = file_move($file, $new_filename, FILE_EXISTS_RENAME);
318 throw new Exception(
"Error: could not open file");
337 'no_header' => $no_header,
338 'columns' => array($column),
339 'content' => &$content,
358 $options[
'content'][] = $row[current($options[
'columns'])];
379 function tpps_compare_files($fid_1, $fid_2, $file_1_id_name, $file_2_id_name, $file_1_no_header = FALSE, $file_2_no_header = FALSE) {
383 asort($file_1_content);
384 asort($file_2_content);
385 reset($file_1_content);
386 reset($file_2_content);
388 while (current($file_1_content) !== FALSE and current($file_2_content) !== FALSE) {
389 if (current($file_1_content) < current($file_2_content)) {
390 $missing[] = current($file_1_content);
391 next($file_1_content);
394 elseif (current($file_1_content) > current($file_2_content)) {
395 next($file_2_content);
398 next($file_1_content);
399 next($file_2_content);
403 while (current($file_1_content) !== FALSE) {
404 $missing[] = current($file_1_content);
405 next($file_1_content);
443 $cols = $file_element[
'#value'][
'columns'];
445 $parents = $file_element[
'#parents'];
446 $new_end_columns = end($parents) .
"-columns";
447 $new_end_no_header = end($parents) .
"-no-header";
448 $new_end_empty = end($parents) .
"-empty";
449 $group_path = array_pop($parents) .
"-groups";
450 $values = drupal_array_get_nested_value($form_state[
'values'], $parents);
452 $values[$new_end_columns] = array();
454 $state_column_values = &$values[$new_end_columns];
455 $values[$new_end_no_header] = isset($file_element[
'#value'][
'no-header']) ? $file_element[
'#value'][
'no-header'] : NULL;
456 $values[$new_end_empty] = isset($file_element[
'#value'][
'empty']) ? $file_element[
'#value'][
'empty'] : NULL;
458 $title_parts = explode(
':', $file_element[
'#title']);
459 $error_prompt = $title_parts[0];
462 $required_groups_flat = array();
463 foreach ($required_groups as $group => $combinations) {
464 $groups[$group] = array();
465 $required_groups_flat[$group] = array();
466 foreach ($combinations as $name => $combination) {
467 $required_groups_flat[$group] = array_merge($required_groups_flat[$group], $combination);
471 foreach ($cols as $name => $type) {
472 $state_column_values[$name] = $type;
473 foreach ($required_groups_flat as $group => $types) {
474 if (in_array($type, $types)) {
475 if (!isset($groups[$group][$type])) {
476 $groups[$group][$type] = array($name);
479 $groups[$group][$type][] = $name;
486 foreach ($required_groups as $group => $combinations) {
487 $group_valid = FALSE;
488 $groups[$group][
'#type'] = array();
489 foreach ($combinations as $name => $combination) {
490 $combination_valid = TRUE;
491 foreach ($combination as $type) {
492 if (!isset($groups[$group][$type])) {
493 $combination_valid = FALSE;
497 if ($combination_valid) {
498 $groups[$group][
'#type'][] = $name;
504 form_set_error($file_element[
'#name'] .
"[columns][$group",
"$error_prompt: Please specify a column or columns that hold $group.");
508 foreach ($groups as $key => $group) {
509 foreach ($group as $opt_num => $col_names) {
510 if (count($col_names) == 1) {
511 $groups[$key][$opt_num] = $col_names[0];
516 $values[$group_path] = $groups;
517 drupal_array_set_nested_value($form_state[
'values'], $parents, $values);
536 for ($i = 1; $i <= $form_state[
'stats'][
'species_count']; $i++) {
537 if (($file = file_load($form_state[
'values'][
'tree-accession'][
"species-$i"][
'file'])) and $form_state[
'values'][
'tree-accession'][
"species-$i"][
'file'] != 0) {
538 file_usage_add($file,
'tpps',
'tpps_project', substr($form_state[
'accession'], 4));
541 $form_state[
'values'][
'tree-accession'][
"species-$i"][
'file-columns'] = array();
543 if (isset($form[
'tree-accession'][
"species-$i"][
'file'][
'#value'][
'columns'])) {
544 foreach ($form[
'tree-accession'][
"species-$i"][
'file'][
'#value'][
'columns'] as $col => $val) {
545 if ($col[0] !=
'#') {
546 $form_state[
'values'][
'tree-accession'][
"species-$i"][
'file-columns'][$col] = $form[
'tree-accession'][
"species-$i"][
'file'][
'#value'][
'columns'][$col];
551 if (empty($form_state[
'values'][
'tree-accession'][
'check'])) {
557 if (isset($form[
'organism-1'][
'genotype'])) {
558 for ($i = 1; $i <= $form_state[
'saved_values'][
TPPS_PAGE_1][
'organism'][
'number']; $i++) {
560 if (!empty($form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'snps-assay']) and ($file = file_load($form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'snps-assay']))) {
561 file_usage_add($file,
'tpps',
'tpps_project', substr($form_state[
'accession'], 4));
563 $form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'snps-assay-columns'] = array();
565 if (isset($form[
"organism-$i"][
'genotype'][
'files'][
'snps-assay'][
'#value'][
'columns'])) {
566 foreach ($form[
"organism-$i"][
'genotype'][
'files'][
'snps-assay'][
'#value'][
'columns'] as $col => $val) {
567 if ($col[0] !=
'#') {
568 $form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'snps-assay-columns'][$col] = $form[
"organism-$i"][
'genotype'][
'files'][
'snps-assay'][
'#value'][
'columns'][$col];
574 if (!empty($form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'other']) and ($file = file_load($form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'other']))) {
575 file_usage_add($file,
'tpps',
'tpps_project', substr($form_state[
'accession'], 4));
577 $form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'other-columns'] = array();
579 if (isset($form[
"organism-$i"][
'genotype'][
'files'][
'other'][
'#value'][
'columns'])) {
580 foreach ($form[
"organism-$i"][
'genotype'][
'files'][
'other'][
'#value'][
'columns'] as $col => $val) {
581 if ($col[0] !=
'#') {
582 $form_state[
'values'][
"organism-$i"][
'genotype'][
'files'][
'other-columns'][$col] = $form[
"organism-$i"][
'genotype'][
'files'][
'other'][
'#value'][
'columns'][$col];
590 if (isset($form[
'organism-1'][
'phenotype'])) {
591 for ($i = 1; $i <= $form_state[
'saved_values'][
TPPS_PAGE_1][
'organism'][
'number']; $i++) {
592 if (!empty($form_state[
'values'][
"organism-$i"][
'phenotype'][
'normal-check'])) {
593 if (($file = file_load($form_state[
'values'][
"organism-$i"][
'phenotype'][
'file'])) and $form_state[
'values'][
"organism-$i"][
'phenotype'][
'file'] != 0) {
594 file_usage_add($file,
'tpps',
'tpps_project', substr($form_state[
'accession'], 4));
597 $form_state[
'values'][
"organism-$i"][
'phenotype'][
'file-columns'] = array();
599 if (isset($form[
"organism-$i"][
'phenotype'][
'file'][
'#value'][
'columns'])) {
600 foreach ($form[
"organism-$i"][
'phenotype'][
'file'][
'#value'][
'columns'] as $col => $val) {
601 if ($col[0] !=
'#') {
602 $form_state[
'values'][
"organism-$i"][
'phenotype'][
'file-columns'][$col] = $form[
"organism-$i"][
'phenotype'][
'file'][
'#value'][
'columns'][$col];
607 if ($form_state[
'values'][
"organism-$i"][
'phenotype'][
'check'] !=
'0') {
608 if (($file = file_load($form_state[
'values'][
"organism-$i"][
'phenotype'][
'metadata'])) and $form_state[
'values'][
"organism-$i"][
'phenotype'][
'metadata'] != 0) {
609 file_usage_add($file,
'tpps',
'tpps_project', substr($form_state[
'accession'], 4));
612 $form_state[
'values'][
"organism-$i"][
'phenotype'][
'metadata-columns'] = array();
614 if (isset($form[
"organism-$i"][
'phenotype'][
'metadata'][
'#value'][
'columns'])) {
615 foreach ($form[
"organism-$i"][
'phenotype'][
'metadata'][
'#value'][
'columns'] as $col => $val) {
616 if ($col[0] !=
'#') {
617 $form_state[
'values'][
"organism-$i"][
'phenotype'][
'metadata-columns'][$col] = $form[
"organism-$i"][
'phenotype'][
'metadata'][
'#value'][
'columns'][$col];
641 $location = drupal_realpath($location);
643 if ($extension ==
'zip' or $extension ==
'tar') {
645 if ($files and (count($files) == 3 or (count($files) == 4 and substr($files[3], -9) ===
'/__MACOSX')) and !is_dir($files[2])) {
646 $location = $files[2];
662 preg_match(
'/\.([a-zA-Z0-9]*)$/', $path, $matches);
664 if(count($matches) == 0) {
667 return strtolower($matches[1]);
683 $reader =
new XMLReader();
684 $reader->open($location);
685 while ($reader->read()) {
686 if ($reader->nodeType == XMLReader::ELEMENT and $reader->name ==
'dimension') {
687 $dim = $reader->getAttribute(
'ref');
714 function tpps_xlsx_get_rows(array &$readers, array &$strings = array(), $plain = TRUE, $columns = NULL) {
717 foreach ($readers as $key => $r) {
718 $reader = &$readers[$key];
755 function tpps_xlsx_get_row(&$reader, array &$strings = array(), $plain = TRUE, $columns = NULL) {
756 while ($reader->read()) {
757 if ($reader->nodeType == XMLReader::ELEMENT and $reader->name ==
'row') {
760 while (!($reader->nodeType == XMLReader::END_ELEMENT and $reader->name ==
'row') and $reader->read()) {
761 if ($reader->nodeType == XMLReader::ELEMENT and $reader->name ==
'c') {
762 $cell_type = $reader->getAttribute(
't');
763 preg_match(
'/([A-Z]+)([0-9]+)/', $reader->getAttribute(
'r'), $matches);
764 $cell_location = $matches[1];
765 if (!empty($columns) and !isset($columns[$cell_location])) {
792 if (strtotime($date) !== FALSE) {
798 return date(
"m/d/Y", strtotime(
"12/31/1899 +$date days"));
801 return date(
"m/d/Y", strtotime(
"12/31/1899 +$date days"));
828 if ($reader->isEmptyElement) {
832 while (!($reader->nodeType == XMLReader::END_ELEMENT and ($reader->name ==
'c' or $reader->name ==
'row')) and $reader->read()) {
833 if ($reader->nodeType == XMLReader::ELEMENT and $reader->name ==
'v') {
834 $value = $reader->readInnerXml();
835 if (!empty($cell_type) and $cell_type ==
's') {
836 $value = $strings[$value];
855 $reader =
new XMLReader();
856 $reader->open($strings_location);
858 while ($reader->read()) {
859 if ($reader->nodeType == XMLReader::ELEMENT and $reader->name ==
'si') {
862 while (!($reader->nodeType == XMLReader::END_ELEMENT and $reader->name ==
'si') and $reader->read()) {
863 if ($reader->nodeType == XMLReader::ELEMENT and $reader->name ==
't') {
864 $string .= $reader->readInnerXml();
867 $strings[$count++] = $string;
886 $children = scandir($dir);
887 foreach ($children as $child) {
888 if ($child !=
'.' and $child !=
'..') {
889 if (is_dir($dir .
'/' . $child) and !is_link($dir .
'/' . $child)) {
893 unlink($dir .
'/' . $child);
913 if (is_object($archive) and !empty($archive->uri)) {
914 $archive = $archive->uri;
916 $loc = drupal_realpath($archive);
919 $unzip_dir = drupal_realpath(
'temporary://tpps_tmp');
920 if (is_dir($unzip_dir)) {
924 if (is_dir($unzip_dir) or mkdir($unzip_dir)) {
927 $zip = new \ZipArchive();
929 $zip->extractTo($unzip_dir);
933 $zip = new \Archive_Tar($loc,
'gz');
934 $zip->extract($unzip_dir);
938 $zip = new \Archive_Tar($loc);
939 $zip->extract($unzip_dir);
947 $files = scandir($dir);
948 if ($files and count($files) == 3 and is_dir($dir .
'/' . $files[2])) {
949 $dir .=
'/' . $files[2];
950 $files = scandir($dir);
954 foreach ($files as $key => $name) {
955 $files[$key] = $dir .
'/' . $name;
975 $hex = unpack(
'H*',
'A')[1];
977 for ($i = 0; $i < $width; $i++) {
978 $key = pack(
'H*', $hex);
989 'content' => &$content,
992 return current($content);
1012 if (!function_exists($function)) {
1016 if (!empty($options[
'job'])) {
1018 $options[
'job']->setItemsHandled(0);
1021 $file = file_load($fid);
1026 $item_is_not_empty =
true;
1027 if(is_array($item)) {
1029 $values_count = count($item);
1030 $values_empty_count = 0;
1031 foreach($item as $key => $value) {
1033 $values_empty_count = $values_empty_count + 1;
1037 if ($values_count == $values_empty_count) {
1038 $item_is_not_empty =
false;
1043 if ($item_is_not_empty) {
1044 $function($item, $options);
1045 if (!empty($options[
'job'])) {
1046 $options[
'job']->addItemsHandled(1);
1051 echo json_encode($item) .
"\n";
1052 echo
"[TPPS FILE ITERATOR] Found a line/item that was empty, ignoring this item\n";
1053 echo
"[TPPS FILE ITERATOR] File location: " . $file_location .
"\n";
1075 $file = file_load($fid);
1078 switch ($extension) {
1089 if (empty($extension)) {
1090 $extension =
"NULL";
1092 $msg =
"Unrecognized file type: $extension";
1093 throw new Exception($msg);
1114 $no_header = $options[
'no_header'] ?? FALSE;
1115 $columns = $options[
'columns'] ?? NULL;
1116 $max_rows = $options[
'max_rows'] ?? NULL;
1118 if (!empty($columns)) {
1119 $new_columns = array();
1120 foreach ($columns as $col) {
1121 $new_columns[$col] = $col;
1123 $columns = $new_columns;
1126 $zip =
new ZipArchive();
1127 $zip->open($location);
1128 $zip->extractTo($dir);
1130 $strings_location = $dir .
'/xl/sharedStrings.xml';
1133 $sheets = scandir($dir .
'/xl/worksheets');
1134 unset($sheets[array_search(
'.', $sheets)]);
1135 unset($sheets[array_search(
'..', $sheets)]);
1144 foreach ($sheets as $sheet) {
1149 $loc = $dir .
'/xl/worksheets/' . $sheet;
1153 preg_match(
'/([A-Z]+)[0-9]+:([A-Z]+)[0-9]+/', $dimension, $matches);
1155 if(count($matches) < 3) {
1160 $left_hex = unpack(
'H*', $matches[1]);
1161 $hex = $left_hex[1];
1162 $right_hex = unpack(
'H*', $matches[2]);
1163 if ($first_left == NULL) {
1164 $first_left = $left_hex[1];
1165 $last_right = $left_hex[1] - 1;
1167 while (base_convert($hex, 16, 10) <= base_convert($right_hex[1], 16, 10)) {
1171 $dims[] = array($left_hex[1], $right_hex[1]);
1172 $reader =
new XMLReader();
1173 $reader->open($loc);
1174 $readers[] = $reader;
1186 if (!empty($max_rows) and $count >= $max_rows) {
1192 $key_hex = $first_left;
1193 foreach ($readers as $idx => $read) {
1198 $hex = $dims[$idx][0];
1199 while (base_convert($hex, 16, 10) <= base_convert($dims[$idx][1], 16, 10)) {
1200 $key = pack(
'H*', $key_hex);
1201 $row_key = pack(
'H*', $hex);
1202 if (empty($columns) or array_search($key, $columns) !== FALSE) {
1203 $values[$key] = isset($row[$row_key]) ? trim($row[$row_key]) : NULL;
1213 foreach ($readers as $reader) {
1235 $original = ini_get(
'auto_detect_line_endings');
1237 ini_set(
'auto_detect_line_endings', TRUE);
1240 $no_header = $options[
'no_header'] ?? FALSE;
1241 $columns = $options[
'columns'] ?? NULL;
1242 $max_rows = $options[
'max_rows'] ?? NULL;
1246 $handle = gzopen($location,
'r');
1253 while (($line = gzgets($handle))) {
1254 $vals = str_getcsv($line, $delim);
1255 if (!empty($max_rows) and $count >= $max_rows) {
1262 if (empty($columns)) {
1263 $hex = unpack(
'H*',
'A')[1];
1264 for ($i = 0; $i < $longest; $i++) {
1265 $key = pack(
'H*', $hex);
1266 $values[$key] = isset($vals[$i]) ? trim($vals[$i]) : NULL;
1267 if (isset($values[$key]) and !check_plain($values[$key])) {
1268 $values[$key] = trim(mb_convert_encoding($values[$key],
"UTF-8",
"Windows-1252"));
1276 foreach ($columns as $column) {
1284 ini_set(
'auto_detect_line_endings', $original);
tpps_flat_generator($location, array $options=array())
tpps_xlsx_get_rows(array &$readers, array &$strings=array(), $plain=TRUE, $columns=NULL)
tpps_get_archive_files($archive)
tpps_xlsx_translate_date($date)
tpps_flat_width($location, $delim=',')
tpps_parse_file_helper($row, array &$options)
tpps_xlsx_get_strings($strings_location)
tpps_xlsx_get_cell_value(&$reader, $cell_type, array &$strings=array())
tpps_get_path_extension($path)
tpps_xlsx_get_row(&$reader, array &$strings=array(), $plain=TRUE, $columns=NULL)
tpps_file_headers($fid, $no_header=FALSE)
tpps_save_file_columns(array &$form, array &$form_state)
tpps_parse_file_column_helper($row, array &$options)
tpps_xlsx_generator($location, array $options=array())
tpps_parse_file_column($fid, $column, $no_header=FALSE)
tpps_xlsx_width($location)
tpps_file_len_helper($row, array &$options=array())
tpps_xlsx_get_dimension($location)
tpps_get_location($location)
tpps_convert_colname($column)
tpps_rename_file($fid, $new_name, array $options=array())
tpps_file_generator($fid, array $options=array())
tpps_file_validate_columns(array &$form_state, array $required_groups, array $file_element)
tpps_file_iterator($fid, $function, array &$options=array())
tpps_compare_files($fid_1, $fid_2, $file_1_id_name, $file_2_id_name, $file_1_no_header=FALSE, $file_2_no_header=FALSE)
tpps_parse_file($fid, $max_rows=0, $no_header=FALSE, $columns=NULL)