<?PHP
/**
 * Parser for XML generated through CSVXML.
 *
 * @author Joshua Ramon Enslin <joshua@museum-digital.de>
 * @author Stefan Rohde-Enslin <s.rohde-enslin@museum-digital.de>
 * @link   https://imports.museum-digital.org/importer/parsers/csvxml.php
 */
declare(strict_types = 1);

/**
 * Parse function.
 *
 * @param array<mixed>     $version        Instance to import into.
 * @param integer          $institution_id Institution to import to.
 * @param non-empty-string $XMLFolder      Folder of the XML files to import.
 * @param string           $dataFolder     Data folder.
 * @param integer          $sammlung_id    Collection ID. Optional.
 * @param boolean          $visibility     Import objects to be directly visible?.
 * @param boolean          $insertOnly     If set to true, only new objects are added,
 *                                         old are not updated.
 *
 * @return void
 */
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {

    if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");

    if (empty($dataFolder)) {
        $importImages = false;
    }
    else $importImages = true;

    // Set up writers

    $collectionWriter = new MDCollectionWriter($version['mainDB']);
    $linkWriter = new MDLinkWriter($version['mainDB']);
    $seriesWriter = new MDSeriesWriter($version['mainDB']);
    $exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
    $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
    $tagWriter = new MDTagWriter($version['nodaDB']);

    $outputHandler = new MDOutputHandler;
    $outputHandler->setVerbosity(2);

    $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);

    /*
    $objectWriter->disableUpdateBaseData = true;
    $objectWriter->disableImportAdditionalData = true;
     */
    $objectWriter->disableImportImagesResources = true;
    $objectWriter->disableImportTranscriptions = true;
    $objectWriter->disableImportSeries = true;
    $objectWriter->disableImportObjectRecords = true;
    $objectWriter->disableImportExhibitions = true;
    $objectWriter->disableImportTags = true;
    $objectWriter->disableImportLiterature = true;
    $objectWriter->disableImportMarkings = true;
    /*
    $objectWriter->disableImportCollections = true;
    $objectWriter->disableImportHyperlinks = true;
    $objectWriter->disableImportReception = true;
     */

    $i = 0;
    $startAtCounter = 0;

    foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {

        $fileContents = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}");
        $allRecords = explode('_____-----_____', $fileContents);
        $fileContents = null;

        $languages_iso639 = array_flip(MDLanguagesSet::LANGUAGES_ISO639_2B);

        foreach ($allRecords as $recordStr) {

            ++$i;
            if ($i < $startAtCounter) {
                continue;
            }

            if (!($objectData = simplexml_load_string($recordStr, "SimpleXMLElement", LIBXML_NOCDATA))) {
                throw new Exception("Cannot load raw data into SimpleXML ({$recordStr})");
            }

            unset($invNo);
            if (!empty((string)$objectData->location->shelfLocator)
                and (\preg_match("/^Hs\-[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)
                || \preg_match("/^Hs\-[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator))
            ) {
                $invNo = (string)$objectData->location->shelfLocator;
                $outputHandler->toLog("Using inventory number $invNo (Hs- set) - " . $objectData->identifier, 2);
            }
            else if (!empty((string)$objectData->location->shelfLocator)
                and (\preg_match("/^[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)
                || \preg_match("/^[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator))
            ) {
                $invNo = 'Hs-' . (string)$objectData->location->shelfLocator;
                $outputHandler->toLog("Using inventory number $invNo", 2);
            }

            if (empty($invNo)) {
                $invNo = substr((string)$objectData->identifier, strrpos((string)$objectData->identifier, '/') + 1);
            }

            $description = $objectData->abstract . PHP_EOL . PHP_EOL . $objectData->physicalDescription;

            /*
            if ($invNo !== 'Hs-28865') continue;
            print_r($objectData);
            exit;
             */

            $title = (string)$objectData->titleInfo->title;
            if (!empty((string)$objectData->titleInfo->subTitle)) {
                $title .= ': ' . (string)$objectData->titleInfo->subTitle;
                unset($objectData->titleInfo->subTitle);
            }

            // Check if inventory number is known already
            $result = $version['mainDB']->query_by_stmt("SELECT 1
                FROM `objekt`
                WHERE `objekt_inventarnr` = ?", "s", $invNo);

            if ($result->num_rows !== 0) {
                $result->close();
                $result = null;
                continue;
            }
            $result->close();
            $result = null;

            $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, (string)$objectData->genre, $title, $description, $outputHandler);

            if (!empty($objectData->physicalDescription->extent)) {
                if (str_ends_with((string)$objectData->physicalDescription->extent, " Br.")) {
                    if (strlen((string)$objectData->physicalDescription->extent) < 8) {
                        $object->set_string("stueckzahl", str_replace(" Br.", "", (string)$objectData->physicalDescription->extent));
                    }
                }
                else {
                    $object->append_objekt_beschreibung((string)$objectData->physicalDescription->extent);
                }
            }

            unset($objectData->titleInfo->title, $objectData->abstract, $objectData->physicalDescription);
            if (count($objectData->titleInfo->children()) === 0) {
                unset($objectData->titleInfo);
            }
            // Will later use $objectData->identifier for setting a link to the source repository
            // Will later use $objectData->genre for tags

            $object->appendTagByName((string)$objectData->genre, "", $tagWriter);
            unset($objectData->genre);

            if (count($objectData->note) > 1) {
                foreach ($objectData->note as $note) {
                    $object->append_string("notizen_text1", PHP_EOL . (string)$note);
                }
            }
            else if (!empty((string)$objectData->note)) {
                $object->set_string("notizen_text1", (string)$objectData->note);
            }
            unset($objectData->note);

            unset($objectData->location->physicalLocation);

            if (!empty((string)$objectData->location->shelfLocator)) {
                $object->append_string("standort_eigentlich", (string)$objectData->location->shelfLocator);
            }
            unset($objectData->location->shelfLocator);
            unset($objectData->location);

            foreach ($objectData->name as $actor) {

                $linkTypeName = null;
                if (!empty((string)$actor->role->roleTerm[1])) {
                    $linkTypeName = (string)$actor->role->roleTerm[1];
                }
                else if (!empty((string)$actor->role->roleTerm)) {
                    $linkTypeName = (string)$actor->role->roleTerm;
                }

                if (!isset(MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName])) {
                    throw new Exception("Unknown actor type: " . (string)$linkTypeName . ' in ' . (string)$objectData->identifier . ' for ' . (string)$actor->namePart . ' ///// ' . PHP_EOL . PHP_EOL . var_export($actor->role->roleTerm, true));
                }

                $linkType = MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName];

                $gndUrl = (string)$actor->attributes()->valueURI;
                $gndId = substr($gndUrl, strrpos($gndUrl, '/') + 1);

                $md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $linkType, $outputHandler);

                $md_event->set_persinst_id((string)$actor->namePart, "", "", ['gnd' => $gndId]);
                if ($md_event->get_persinst_id() !== 0) {

                    // Handle origin information for production events
                    if (!empty($objectData->originInfo)) {
                        if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->dateCreated)) {

                            if (str_starts_with((string)$objectData->originInfo->dateCreated[1], 'o.D. [')) {
                                $md_event->set_zeiten_id(trim(substr((string)$objectData->originInfo->dateCreated[1], 5), " []"));
                            }

                            else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[")
                                and strlen((string)$objectData->originInfo->dateCreated[1]) === 23
                                and substr((string)$objectData->originInfo->dateCreated[1], 0, 10) === substr((string)$objectData->originInfo->dateCreated[1], 12, 10)
                            ) {
                                $md_event->set_zeiten_id(substr((string)$objectData->originInfo->dateCreated[1], 0, 10));
                            }

                            /*
                            else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[")) {
                                echo substr((string)$objectData->originInfo->dateCreated[1], 12, 10);
                                print_r($objectData->originInfo->dateCreated);
                                echo "strlen:" . strlen((string)$objectData->originInfo->dateCreated[1]);
                                exit;
                            }
                             */
                            else $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]);
                            unset($objectData->originInfo->dateCreated);
                        }

                        if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->place)) {
                            $md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm);
                            unset($objectData->originInfo->place->placeTerm);
                            if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place);
                        }
                        if (empty($objectData->originInfo->children())) unset($objectData->originInfo);
                    }

                    $object->appendEvent($md_event);
                }

            }
            unset($objectData->name);

            // If originInfo is still set here, try handling it.
            if (!empty($objectData->originInfo)) {
                $md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler);

                // Handle origin information for production events
                if (!empty($objectData->originInfo->dateCreated)) {
                    $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]);
                    unset($objectData->originInfo->dateCreated);
                }

                if (!empty($objectData->originInfo->place)) {
                    $md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm);
                    unset($objectData->originInfo->place->placeTerm);
                    if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place);
                }
                if (empty($objectData->originInfo->children())) unset($objectData->originInfo);

                if ($md_event->get_zeiten_id() !== 0 || $md_event->get_orte_id() !== 0) {
                    $object->appendEvent($md_event);
                }
            }

            if (!empty($objectData->language)) {
                $object->set_string("content_language", $languages_iso639[(string)$objectData->language->languageTerm[0]]);
                $object->set_bool("content_language_show_md", true);
                $object->set_bool("content_language_show_extern", true);
                unset($objectData->language);
            }

            if (!empty($objectData->relatedItem)) {
                foreach ($objectData->relatedItem as $relatedItem) {

                    if ((string)$relatedItem->attributes()->type !== 'constituent') {
                        continue;
                    }

                    if (in_array((string)$relatedItem->recordInfo->recordIdentifier, ["ead_DE-F25_37_VirtuellerBestand", "ead_DE-F25_37_VirtuellerBestand_added"], true)) {
                        $object->appendCollectionByID(5);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9624") {
                        $object->appendCollectionByID(521);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9626") {
                        $object->appendCollectionByID(534);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9627") {
                        $object->appendCollectionByID(543);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9629") {
                        $object->appendCollectionByID(541);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9630") {
                        $object->appendCollectionByID(540);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9621") {
                        $object->appendCollectionByID(539);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9623") {
                        $object->appendCollectionByID(538);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9625") {
                        $object->appendCollectionByID(537);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9622") {
                        $object->appendCollectionByID(536);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-37593") {
                        $object->appendCollectionByID(545);
                    }
                    else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9636") {
                        $object->appendCollectionByID(544);
                    }
                    else if (str_starts_with((string)$relatedItem->titleInfo->title, 'Nachlaß')
                        || str_starts_with((string)$relatedItem->titleInfo->title, 'Teilnachlaß')
                    ) {
                        continue;
                    }
                    else {
                        throw new Exception("Unknown collection: " . var_export($relatedItem, true));
                    }

                }
            }

            // For now: skip relatedItem
            unset($objectData->relatedItem);
            unset($objectData->recordInfo->recordContentSource);

            $object->appendLinkByName((string)$objectData->identifier, "Das Objekt bei Kalliope");
            unset($objectData->identifier);

            unset($objectData->recordInfo->recordIdentifier);

            // Import edit history at kalliope
            $editHistory = [];
            $editHistory[] = "Ersterfassung bei Kalliope: " . $objectData->recordInfo->recordCreationDate;
            foreach ($objectData->recordInfo->recordChangeDate as $changeDate) {
                $editHistory[] = "Bearbeitung bei Kalliope: " . (string)$changeDate;
            }
            $editHistStr = implode(PHP_EOL, $editHistory);

            $object->append_string("notizen_text1", PHP_EOL . PHP_EOL . $editHistStr);

            unset($objectData->recordInfo->recordCreationDate, $objectData->recordInfo->recordChangeDate);
            if (count($objectData->recordInfo->children()) === 0) {
                unset($objectData->recordInfo);
            }

            if (count($objectData->children()) === 0) {
                unset($objectData);
            }

            $object->set_objekt_publik($visibility);
            $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);

            if (!empty($objectData)) {
                throw new MDParserIncomplete("Incomplete parser: " . var_export($objectData, true));
            }

            $outputHandler->toLog("Done with object $i", 2);

            // Sleep for a millisecond
            usleep(IMPORTER_DELAY_PER_OBJECT);

        }

    }

}