csvxml/csv_check.php

381 lines
18 KiB
PHP
Raw Normal View History

2019-08-25 21:45:52 +02:00
<?PHP
declare(strict_types = 1);
2019-08-25 21:45:52 +02:00
error_reporting(E_ALL);
ini_set('display_errors', "1");
require_once __DIR__ . "/functions/functions.php";
if (session_status() != PHP_SESSION_ACTIVE) {
session_start();
}
// This array contains all available languages
$allowed_langs = ['ar', 'de', 'en', 'hu', 'id', 'it', 'pl','pt'];
// Some languages are in translation. They will only be available for logged in users.
if (isset($_GET['navlang'])) {
$_SESSION['lang'] = $_GET['navlang'];
if (!in_array($_SESSION['lang'], $allowed_langs)) $_SESSION['lang'] = 'de';
}
else if (!isset($_SESSION['lang'])) {
$_SESSION['lang'] = lang_getfrombrowser($allowed_langs, 'en', "", false);
}
$lang = $_SESSION['lang'];
2019-08-25 21:45:52 +02:00
2019-08-27 00:31:46 +02:00
$filename = $_GET['fnam'];
$csv_datei = 'csv/' . $filename;
2019-08-25 21:45:52 +02:00
// Get allowed values
require __DIR__ . "/values/availableFields.php";
$allowed = $eventpart = $eventpartsure = $fieldsWithDependency = $fieldsWithAllowedValueSet = [];
foreach ($availableFields as $categoryName => $fieldCategory) {
$allowed = array_merge($allowed, array_keys($fieldCategory));
// Extended operations for events
if (strpos($categoryName, $basis['event']) !== false) {
foreach ($fieldCategory as $key => $value) {
if (strpos($key, "_annotation") !== false) continue;
if (strpos($key, "_sure") !== false) $eventpartsure[] = $key;
else $eventpart[] = $key;
}
}
foreach ($fieldCategory as $key => $value) {
if (!empty($value["dependsOn"])) {
$fieldsWithDependency[$key] = $value['dependsOn'];
}
if (!empty($value["allowedValues"])) {
$fieldsWithAllowedValueSet[$key] = $value['allowedValues'];
}
}
}
2019-08-27 00:31:46 +02:00
$allowed_object_other_title_kind_of = array('object_other_title_kind_of', 'Wissenschaft', 'Alltagssprache', 'Umgangssprache', 'Dialekt', 'Tudományos', 'Köznyelvi', 'Nyelvjárás');
$allowed_other_object_title_in = array('other_object_title_in', 'Wissenschaft', 'Alltagssprache', 'Umgangssprache', 'Dialekt', 'Tudományos', 'Köznyelvi', 'Nyelvjárás');
$allowed_closer_location_as = array('closer_location_as', 'Aufnahmeort', 'Fundort', 'Früherer Ort', 'Felvétel késztésének helye','Lelőhely', 'Történelmi elnevezés');
$allowed_inclusion_kind_of = array('inclusion_kind_of', 'Schenkung', 'Kauf', 'Grabung', 'Notbergung', 'Erbschaft', 'Stiftung', 'Enteignung', 'Ursprungsbestand', 'Ajándékozás','Vétel','Feltárás','Hivatalos átadás','Csere','Gyűjtés','Saját előállítás','Törzsanyag','Letét', 'endowment', 'dispossession', 'old stock', '');
$allowed_currency = array('bought_for_currency', 'worth_unit', 'worth_insurance_unit', 'DM', 'Euro', 'Forint', 'Lari', 'Mark', 'Pengő', 'Real', 'RM', 'Rupiah', 'Złoty','US-Dollar');
$allowed_measurements = array('dimensions_separate_length_unit', 'dimensions_separate_width_unit', 'dimensions_separate_height_unit', 'dimensions_separate_diameter_unit', 'dimensions_separate_wall_thickness_unit
2019-08-25 21:45:52 +02:00
', 'm', 'dm', 'cm', 'mm');
2019-08-27 00:31:46 +02:00
$allowed_weight = array('dimensions_separate_weight_unit', 't', 'kg', 'g');
$allowed_yesno = array('detailed_description_md', 'detailed_description_extern', 'inscription_md', 'inscription_extern', 'dimensions_separate_show_md', 'dimensions_separate_show_extern', 'object_group_show1', 'object_group_show2', 'object_group_show3', 'object_group_show4', 'object_group_show5', 'image_visible1', 'image_visible2', 'image_visible3', 'image_visible4', 'image_visible5', 'image_visible6', 'image_visible7', 'image_visible8', 'image_visible9', 'image_visible10', 'image_visible11', 'image_visible12', 'image_visible13', 'image_visible14', 'image_visible15', 'image_visible16', 'image_visible17', 'image_visible18', 'image_visible19', 'image_visible20', 'image_visible21', 'image_visible22', 'image_visible23', 'image_visible24', 'image_visible25', 'image_visible26', 'image_visible27', 'image_visible28', 'image_main1', 'image_main2', 'image_main3', 'image_main4', 'image_main5', 'image_main6', 'image_main7', 'image_main8', 'image_main9', 'image_main10', 'image_main11', 'image_main12', 'image_main13', 'image_main14', 'image_main15', 'image_main16', 'image_main17', 'image_main18', 'image_main19', 'image_main20', 'image_main21', 'image_main22', 'image_main23', 'image_main24', 'image_main25', 'image_main26', 'image_main27', 'image_main28','y', 'n');
$crosscheck1 = array('object_other_title','detailed_description','detailed_description','inscription','inscription','dimensions_separate_length_value', 'dimensions_separate_width_value', 'dimensions_separate_height_value', 'dimensions_separate_diameter_value', 'dimensions_separate_wall_thickness_value', 'dimensions_separate_weight_value','closer_location','bought_for','worth_value','worth_insurance_value');
$crosscheck2 = array('object_other_title_kind_of','detailed_description_md','detailed_description_extern','inscription_md','inscription_extern','dimensions_separate_length_unit', 'dimensions_separate_width_unit', 'dimensions_separate_height_unit', 'dimensions_separate_diameter_unit', 'dimensions_separate_wall_thickness_unit', 'dimensions_separate_weight_unit','closer_location_as','bought_for_currency','worth_unit','worth_insurance_unit');
2019-08-25 21:45:52 +02:00
///// Check #1
//echo '<pre>';print_r($allowed);echo '</pre>';
echo '1: Only allowed tags (column names) used?';
$fp = fopen ( $csv_datei, 'r' );
$y = 1;
2019-08-27 00:31:46 +02:00
$error = 0;
2019-08-25 21:45:52 +02:00
$zeile = fgetcsv ( $fp, 100000, ';' );
2019-08-27 00:31:46 +02:00
for ($x = 0; $x < count ( $zeile ); $x++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
$zeile[$x] = str_replace("\xEF\xBB\xBF", "", $zeile[$x]);
$inhalt[$y][$x] = $zeile[$x];
if (!in_array($inhalt[1][$x], $allowed)) {
echo '<br><i style="font-style:normal;color:#990000;">ERROR in column ' . $x . ' created by value: ' . $inhalt[1][$x] . '</i>';
$error = $error + 1;
}
//echo '<br/>';var_dump($inhalt[1][$x]);
2019-08-25 21:45:52 +02:00
}
fclose($fp);
$erstezeile = $zeile;
if ($error != 0) echo '<br><b style="color:#990000;">Not allowed tags found !</b>'; else echo '<br><i style="font-style:normal;color:#009900;">Only allowed tags used !</i>';
//// Check #2
echo '<br><br>2: Not allowed multiple use of tags (column names)?';
2019-08-27 00:31:46 +02:00
$compare = array_unique($zeile);
function identical_values($arrayA, $arrayB) {
sort($arrayA);
sort($arrayB);
2019-08-25 21:45:52 +02:00
return $arrayA == $arrayB;
2019-08-27 00:31:46 +02:00
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
$result = identical_values($zeile, $compare);
if ($result == false) {
echo '<br><b style="color:#990000;">There are dublicate column names !</b>';
$error = $error + 1;
2019-08-25 21:45:52 +02:00
}
else
{
2019-08-27 00:31:46 +02:00
echo '<br><i style="font-style:normal;color:#009900;">No dublicate column names !</i>';
2019-08-25 21:45:52 +02:00
}
//// Get values into memory for following checks
$fp = fopen ( $csv_datei, 'r' );
2019-08-27 00:31:46 +02:00
$y = 0;
2019-08-25 21:45:52 +02:00
while ( $zeile = fgetcsv ( $fp, 100000, ';' ) )
{
2019-08-27 00:31:46 +02:00
$y++;
//echo '<br>';print_r($zeile);
for ($x = 0; $x < count ( $zeile ); $x++)
{
$inhalt[$y][$x] = str_replace("'", "\'", $zeile[$x]);
}
2019-08-25 21:45:52 +02:00
}
fclose($fp);
///// Check #3
echo '<br><br>3: Mandatory tags available and always filled in?';
unset($inv_array);
2019-08-27 00:31:46 +02:00
$inv_error = 0;
function get_duplicates($array)
2019-08-25 21:45:52 +02:00
{
return array_unique( array_diff_assoc( $array, array_unique( $array ) ) );
2019-08-27 00:31:46 +02:00
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
$mandatory = array('inventory_number','object_type','object_title','object_description');
for ($i = 0; $i < count ($mandatory); $i++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if (!in_array($mandatory[$i], $erstezeile)) {
echo '<br><i style="font-style:normal;color:#990000;">Mandatory: Column <b>' . $mandatory[$i] . '</b> missing</i>';
$error = $error + 1;
$inv_error = $inv_error + 1;
}
else
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
$spaltenr = array_search($mandatory[$i], $erstezeile);
for ($j = 0; $j < $y; $j++)
{
if ($inhalt[$j + 1][$spaltenr] == '') {
echo '<br><i style="font-style:normal;color:#990000;">Missing value for <b>' . $mandatory[$i] . '</b> in row ' . ($j + 1) . '</i>';
$error = $error + 1;
$inv_error = $inv_error + 1;
}
if ($mandatory[$i] == 'inventory_number') {
$inv_array[] = $inhalt[$j + 1][$spaltenr];
}
}
2019-08-25 21:45:52 +02:00
}
}
2019-08-27 00:31:46 +02:00
if ($inv_error == 0) echo '<br><i style="font-style:normal;color:#009900;">All mandatory tags available and with values !</i>';
2019-08-25 21:45:52 +02:00
///// Check #4
echo '<br><br>4: Inventory_number unique ?';
2019-08-27 00:31:46 +02:00
if (in_array('inventory_number', $erstezeile)) {
$doppelte_inv = get_duplicates($inv_array);
$doppelte_inv = array_values($doppelte_inv);
if (count($doppelte_inv) > 0) {
for ($i = 0; $i < count($doppelte_inv); $i++)
{
echo '<br><i style="font-style:normal;color:#990000;">Multiple use of inventory_number <b>' . $doppelte_inv[$i] . '</b></i>';
$error = $error + 1;
}
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
else echo '<br><i style="font-style:normal;color:#009900;">All inventory_numbers are unique !</i>';
2019-08-25 21:45:52 +02:00
}
else
{
2019-08-27 00:31:46 +02:00
echo '<br><b style="font-style:normal;color:#990000;">Aborted, column inventory_number is missing</b>';
$error = $error + 1;
2019-08-25 21:45:52 +02:00
}
///// Check #5
echo '<br><br>5: Dependent colums observed ?<br>';
// Check for correct handling of dependent fields
foreach ($fieldsWithDependency as $tField => $tDependentFields) {
if (array_search($tField, $erstezeile) === false) continue;
foreach ($tDependentFields as $tDependentField) {
if (array_search($tDependentField, $erstezeile) === false) {
$depencymessage[] = "Dependency issue at column $tField: Corresponding column $tDependentField is missing";
2019-08-27 00:31:46 +02:00
}
2019-08-25 21:45:52 +02:00
}
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
if (isset($depencymessage) and $depencymessage != '') {
echo '<b style="color:#990000;">Dependent columns were not observed !</b>';
for ($i = 0; $i < count ($depencymessage); $i++)
{
echo '<br>' . $depencymessage[$i];
$error = $error + 1;
}
2019-08-25 21:45:52 +02:00
}
else {
2019-08-27 00:31:46 +02:00
echo '<i style="font-style:normal;color:#009900;">Dependent columns were observed !</i>';
2019-08-25 21:45:52 +02:00
}
///// Check #6
echo '<br><br>6: Dependency of content observed?';
2019-08-27 00:31:46 +02:00
$depcon_error = 0;
for ($l = 0; $l < count($crosscheck1); $l++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if (in_array($crosscheck1[$l], $erstezeile)) {
for ($j = 1; $j < ($y + 1); $j++)
{
if ($inhalt[$j][array_search($crosscheck2[$l], $erstezeile)] !== '' and $inhalt[$j][array_search($crosscheck1[$l], $erstezeile)] == '') {
echo '<br>Tag <b>' . $crosscheck2[$l] . '</b> given but no entry for <b>' . $crosscheck1[$l] . '</b> (row ' . $j . ')';
$depcon_error = $depcon_error + 1;
}
}
2019-08-25 21:45:52 +02:00
}
}
2019-08-27 00:31:46 +02:00
for ($l = 0; $l < count($eventpart); $l++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if (in_array($eventpart[$l], $erstezeile)) {
for ($j = 1; $j < ($y + 1); $j++)
{
if ($inhalt[$j][array_search($eventpartsure[$l], $erstezeile)] !== '' and $inhalt[$j][array_search($eventpart[$l], $erstezeile)] == '') {
echo '<br>Tag <b>' . $eventpartsure[$l] . '</b> given but no entry for <b>' . $eventpart[$l] . '</b> (row ' . $j . ')';
$depcon_error = $depcon_error + 1;
}
}
2019-08-25 21:45:52 +02:00
}
}
2019-08-27 00:31:46 +02:00
if (in_array('dimensions_separate_show_md', $erstezeile)) {
for ($j = 1; $j < ($y + 1); $j++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if ($inhalt[$j][array_search('dimensions_separate_show_md', $erstezeile)] !== '' and ($inhalt[$j][array_search('dimensions_separate_length_value', $erstezeile)] == '') and $inhalt[$j][array_search('dimensions_separate_width_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_heigt_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_weight_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_diameter_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_wall_thickness_value', $erstezeile)] == '') {
echo '<br>Tag <b>dimensions_separate_show_md</b> given but no separate values available (row ' . $j . ')';
$depcon_error = $depcon_error + 1;
}
2019-08-25 21:45:52 +02:00
}
}
2019-08-27 00:31:46 +02:00
if (in_array('dimensions_separate_show_extern', $erstezeile)) {
for ($j = 1; $j < ($y + 1); $j++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if ($inhalt[$j][array_search('dimensions_separate_show_extern', $erstezeile)] !== '' and ($inhalt[$j][array_search('dimensions_separate_length_value', $erstezeile)] == '') and $inhalt[$j][array_search('dimensions_separate_width_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_heigt_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_weight_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_diameter_value', $erstezeile)] == '' and $inhalt[$j][array_search('dimensions_separate_wall_thickness_value', $erstezeile)] == '') {
echo '<br>Tag <b>dimensions_separate_show_extern</b> given but no separate values available (row ' . $j . ')';
$depcon_error = $depcon_error + 1;
}
2019-08-25 21:45:52 +02:00
}
}
/*
for ($im=1;$im<11;$im++)
{
if (in_array('image_name'.$im,$erstezeile))
{
for ($j=1;$j<($y+1);$j++)
{
if ($inhalt[$j][array_search('image_rights'.$im,$erstezeile)]!=='')
{
echo '<br>TAG <b>image_name'.$im.'</b> given but no value available for image_rights'.$im.' (row '.$j.')';
$depcon_error=$depcon_error+1;
}
if ($inhalt[$j][array_search('image_visible'.$im,$erstezeile)]!=='')
{
echo '<br>TAG <b>image_name'.$im.'</b> given but no value available for image_visible'.$im.' (row '.$j.')';
$depcon_error=$depcon_error+1;
}
2019-08-27 00:31:46 +02:00
}
2019-08-25 21:45:52 +02:00
}
}
*/
2019-08-27 00:31:46 +02:00
if ($depcon_error == 0) echo '<br><i style="font-style:normal;color:#009900;">Dependency of content was observed !</i>';
2019-08-25 21:45:52 +02:00
///// Check #7
echo '<br><br>7: Not allowed values in controlled lists?<br>';
for ($i = 2; $i <= $y; $i++) {
foreach ($inhalt[$i] as $key => $value) {
$columnName = $inhalt[1][$key];
// If the field is not restricted, then continue
if (!isset($fieldsWithAllowedValueSet[$columnName])) continue;
// For others: check if the value is from the list of allowed values.
if (!in_array($value, $fieldsWithAllowedValueSet[$columnName])) {
$errormessage[] = "Disallowed value in column <code>{$columnName}</code> on row <code>{$i}</code>: <em>" . $value . "</em> (allowed values: <small>" . implode(", ", $fieldsWithAllowedValueSet[$columnName]) . "</small>)";
2019-08-27 00:31:46 +02:00
}
2019-08-25 21:45:52 +02:00
}
}
2019-08-27 00:31:46 +02:00
if (isset($errormessage) and $errormessage != '') {
echo '<b style="color:#990000;">Columns with controlled values contain invalid values !</b>';
for ($i = 0; $i < count ($errormessage); $i++)
{
echo '<br>' . $errormessage[$i];
$error = $error + 1;
}
2019-08-25 21:45:52 +02:00
}
else
{
2019-08-27 00:31:46 +02:00
echo '<i style="font-style:normal;color:#009900;">Values in controlled fields are all valid !</i>';
2019-08-25 21:45:52 +02:00
}
///// Check #8
unset($errormessage);
echo '<br><br>8: Main image or main resource given?<br>';
2019-08-27 00:31:46 +02:00
$hasanyimage = 0;
for ($im = 1; $im < 29; $im++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if (array_search('image_name' . $im, $erstezeile) != '') {$imagemain[$im]['name'] = array_search('image_name' . $im, $erstezeile);$hasanyimage++;
}
if (array_search('image_visible' . $im, $erstezeile) != '') $imagemain[$im]['visible'] = array_search('image_visible' . $im, $erstezeile);
if (array_search('image_main' . $im, $erstezeile) != '') $imagemain[$im]['main'] = array_search('image_main' . $im, $erstezeile);
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
if ($hasanyimage > 0) {
$imagemain = array_values($imagemain);
for ($i = 1; $i <= $y; $i++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if ($i > 1) {
$maimg[$i] = 0;
2019-08-25 21:45:52 +02:00
//check if in a row any image_name is given
2019-08-27 00:31:46 +02:00
$hatimg[$i] = 0;
for ($im = 0; $im < count($imagemain); $im++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if ($inhalt[$i][$imagemain[$im]['name']] != '') $hatimg[$i]++;
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
if ($hatimg[$i] > 0) {
2019-08-25 21:45:52 +02:00
// first check: how many main-images?
2019-08-27 00:31:46 +02:00
for ($im = 0; $im < count($imagemain); $im++)
2019-08-25 21:45:52 +02:00
{
2019-08-27 00:31:46 +02:00
if ($inhalt[$i][$imagemain[$im]['main']] == 'y') {$maimg[$i]++;$merk = $im;
}
2019-08-25 21:45:52 +02:00
}
// if there is exacly one main-image, is it visible?
2019-08-27 00:31:46 +02:00
if ($maimg[$i] == 1) {
if ($inhalt[$i][$imagemain[$merk]['visible']] == 'n') {
$errormessage[] = '<b style="font-weight:normal;color:#990000">Main image in row ' . $i . ' is not visible</b>';
2019-08-25 21:45:52 +02:00
}
}
}
}
2019-08-27 00:31:46 +02:00
if ($i > 1 and $maimg[$i] == 0 and $hatimg[$i] > 0) $errormessage[] = '<b style="font-weight:normal;color:#990000">There is no visible main image given in row ' . $i . '</b>';
if ($i > 1 and $maimg[$i] > 1 and $hatimg[$i] > 0) $errormessage[] = '<b style="font-weight:normal;color:#990000">There are ' . $maimg[$i] . ' main images given in row ' . $i . '</b>';
2019-08-25 21:45:52 +02:00
}
2019-08-27 00:31:46 +02:00
if (isset($errormessage) and $errormessage != '') {
echo '<b style="color:#990000;">There is not one main image for each object !</b>';
for ($i = 0; $i < count ($errormessage); $i++)
{
echo '<br>' . $errormessage[$i];
$error++;
2019-08-27 00:31:46 +02:00
}
2019-08-25 21:45:52 +02:00
}
else {
2019-08-27 00:31:46 +02:00
echo '<i style="font-style:normal;color:#009900;">For each object that has images attached exactly one main image is given !</i>';
2019-08-25 21:45:52 +02:00
}
}
else
{
echo '<i style="font-style:normal;color:#009900;">No images to be imported !</i>';
}
echo '<hr>';
2019-08-27 00:31:46 +02:00
if ($error + $depcon_error > 0) {
echo '
<p>Error(s) found: ' . ($error + $depcon_error) . '</p>';
2019-08-25 21:45:52 +02:00
}
else {
2019-08-27 00:31:46 +02:00
echo '<a href="index5.php?fnam=' . $_GET['fnam'] . '"><img src="img/go.gif"> Create XML for md:import (iso 8859-1)</a><br>';
echo '<a href="index6.php?fnam=' . $_GET['fnam'] . '"><img src="img/go.gif"> Create XML for md:import (utf8)</a><br>';
2019-08-25 21:45:52 +02:00
}