Better modularize NodaWikidataFetcher's loading of translations
This commit is contained in:
parent
511304b6f2
commit
b318b5b471
|
@ -17,8 +17,8 @@ final class NodaWikidataFetcher {
|
||||||
'Accept: application/sparql-results+json',
|
'Accept: application/sparql-results+json',
|
||||||
];
|
];
|
||||||
|
|
||||||
const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh'];
|
const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'uk', 'zh'];
|
||||||
const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||||||
|
|
||||||
const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"];
|
const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"];
|
||||||
|
|
||||||
|
@ -354,6 +354,76 @@ final class NodaWikidataFetcher {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads translations from Wikipedia pages through wikidata and then merges
|
||||||
|
* them with Wikidata's own translations into a usable array.
|
||||||
|
*
|
||||||
|
* @param array<string> $checkagainstLanguage The language to check against.
|
||||||
|
* @param array<mixed> $data Data fetched from Wikidata.
|
||||||
|
*
|
||||||
|
* @return array<string, array{label: string, description: string, link: string}>
|
||||||
|
*/
|
||||||
|
public static function listTranslationsFromWikidataWikipedia(array $checkagainstLanguage, array $data):array {
|
||||||
|
|
||||||
|
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
||||||
|
if (empty($languagesToFetch)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
||||||
|
}
|
||||||
|
catch (TypeError $e) {
|
||||||
|
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
||||||
|
}
|
||||||
|
|
||||||
|
$output = [];
|
||||||
|
|
||||||
|
foreach ($checkagainstLanguage as $lang) {
|
||||||
|
|
||||||
|
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
|
||||||
|
|
||||||
|
$wikilink = $wikilinks[$lang];
|
||||||
|
if (!empty($contents[$lang])) {
|
||||||
|
|
||||||
|
$descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
|
||||||
|
|
||||||
|
# Process data retrieved from wikipedia
|
||||||
|
|
||||||
|
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
|
||||||
|
else $tDescription = "";
|
||||||
|
|
||||||
|
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
||||||
|
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$tDescription = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
$output[$lang] = [
|
||||||
|
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
|
||||||
|
'description' => self::_cleanWikidataInput($tDescription),
|
||||||
|
'link' => $wikilink,
|
||||||
|
];
|
||||||
|
|
||||||
|
}
|
||||||
|
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
|
||||||
|
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||||
|
|
||||||
|
$output[$lang] = [
|
||||||
|
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||||||
|
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||||||
|
'link' => "",
|
||||||
|
];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return $output;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans contents parsed from Wikipedia.
|
* Cleans contents parsed from Wikipedia.
|
||||||
*
|
*
|
||||||
|
@ -875,68 +945,23 @@ final class NodaWikidataFetcher {
|
||||||
*/
|
*/
|
||||||
public function getWikidataTranslationsForPersinst(array $data, int $persinst_id):void {
|
public function getWikidataTranslationsForPersinst(array $data, int $persinst_id):void {
|
||||||
|
|
||||||
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
|
if (empty($translations = self::listTranslationsFromWikidataWikipedia(self::LANGUAGES_TO_CHECK, $data))) {
|
||||||
|
|
||||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
|
||||||
if (empty($languagesToFetch)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
|
||||||
}
|
|
||||||
catch (TypeError $e) {
|
|
||||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
|
||||||
}
|
|
||||||
|
|
||||||
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?)");
|
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?)");
|
||||||
|
|
||||||
$this->_mysqli_noda->autocommit(false);
|
$this->_mysqli_noda->autocommit(false);
|
||||||
|
|
||||||
foreach ($checkagainstLanguage as $lang) {
|
foreach ($translations as $lang => $values) {
|
||||||
|
|
||||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
|
try {
|
||||||
|
$insertStmt->bind_param("issss", $persinst_id, $lang,
|
||||||
$wikilink = $wikilinks[$lang];
|
$values['label'], $values['description'], $values['link']);
|
||||||
if (!empty($contents[$lang])) {
|
|
||||||
|
|
||||||
$descFromWiki = $contents[$lang];
|
|
||||||
$descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*'];
|
|
||||||
|
|
||||||
# Process data retrieved from wikipedia
|
|
||||||
|
|
||||||
if ($descFromWiki !== null) $tDescription = self::_cleanWikidataInput((string)$descFromWiki);
|
|
||||||
else $tDescription = "";
|
|
||||||
|
|
||||||
if (substr($tDescription, -1) === chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
|
|
||||||
|
|
||||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
|
||||||
// Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription
|
|
||||||
// dies enthält den ersten Absatz der jeweiligen Wikipedia
|
|
||||||
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$tDescription = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
|
|
||||||
$tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
|
|
||||||
|
|
||||||
try {
|
|
||||||
$insertStmt->bind_param("issss", $persinst_id, $tLang, $tLabel, $tDescription, $wikilink);
|
|
||||||
$insertStmt->execute();
|
|
||||||
}
|
|
||||||
catch (MDMysqliInvalidEncodingError $e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
|
|
||||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
|
||||||
|
|
||||||
$wikilink = "";
|
|
||||||
$insertStmt->bind_param("issss", $persinst_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
|
|
||||||
$insertStmt->execute();
|
$insertStmt->execute();
|
||||||
}
|
}
|
||||||
|
catch (MDMysqliInvalidEncodingError $e) {
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -944,7 +969,6 @@ final class NodaWikidataFetcher {
|
||||||
$this->_mysqli_noda->autocommit(true);
|
$this->_mysqli_noda->autocommit(true);
|
||||||
|
|
||||||
$insertStmt->close();
|
$insertStmt->close();
|
||||||
unset($insertStmt);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1248,75 +1272,23 @@ final class NodaWikidataFetcher {
|
||||||
*/
|
*/
|
||||||
public function getWikidataTranslationsForPlace(array $data, int $ort_id) {
|
public function getWikidataTranslationsForPlace(array $data, int $ort_id) {
|
||||||
|
|
||||||
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
|
if (empty($translations = self::listTranslationsFromWikidataWikipedia(self::LANGUAGES_TO_CHECK, $data))) {
|
||||||
|
|
||||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
|
||||||
if (empty($languagesToFetch)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
|
||||||
}
|
|
||||||
catch (TypeError $e) {
|
|
||||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
|
||||||
}
|
|
||||||
|
|
||||||
$insertStmt = $this->_mysqli_noda->do_prepare("CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?)");
|
$insertStmt = $this->_mysqli_noda->do_prepare("CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?)");
|
||||||
|
|
||||||
$this->_mysqli_noda->autocommit(false);
|
$this->_mysqli_noda->autocommit(false);
|
||||||
|
|
||||||
foreach ($checkagainstLanguage as $lang) {
|
foreach ($translations as $lang => $values) {
|
||||||
|
|
||||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
|
try {
|
||||||
|
$insertStmt->bind_param("issss", $ort_id, $lang,
|
||||||
$wikilink = $wikilinks[$lang];
|
$values['label'], $values['description'], $values['link']);
|
||||||
if (!empty($contents[$lang])) {
|
|
||||||
|
|
||||||
$descFromWiki = $contents[$lang];
|
|
||||||
|
|
||||||
if (!($wikiDataDecoded = json_decode($descFromWiki, true))) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$tLabel = $wikiDataDecoded['parse']['title'];
|
|
||||||
$descFromWiki = $wikiDataDecoded['parse']['text']['*'];
|
|
||||||
|
|
||||||
# Process data retrieved from wikipedia
|
|
||||||
if (empty($descFromWiki)) $tDescription = "";
|
|
||||||
else {
|
|
||||||
|
|
||||||
$tDescription = self::_cleanWikidataInput((string)$descFromWiki);
|
|
||||||
|
|
||||||
if (substr($tDescription, -1) === chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
|
|
||||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
|
||||||
$tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription));
|
|
||||||
// echo '<br>Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription; // dies enthält den ersten Absatz der jeweiligen Wikipedia
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$tDescription = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
|
|
||||||
if (empty($tLabel)) $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
|
|
||||||
|
|
||||||
try {
|
|
||||||
$insertStmt->bind_param("issss", $ort_id, $tLang, $tLabel, $tDescription, $wikilink);
|
|
||||||
$insertStmt->execute();
|
|
||||||
}
|
|
||||||
catch (MDMysqliInvalidEncodingError $e) {
|
|
||||||
$_SESSION["editHistory"] = ["changesStored", "Error adding translation for language $tLang"];
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
|
||||||
|
|
||||||
$wikilink = "";
|
|
||||||
$insertStmt->bind_param("issss", $ort_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
|
|
||||||
$insertStmt->execute();
|
$insertStmt->execute();
|
||||||
}
|
}
|
||||||
|
catch (MDMysqliInvalidEncodingError $e) {
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1573,84 +1545,32 @@ final class NodaWikidataFetcher {
|
||||||
*/
|
*/
|
||||||
public function getWikidataTranslationsForTag(array $data, int $tag_id) {
|
public function getWikidataTranslationsForTag(array $data, int $tag_id) {
|
||||||
|
|
||||||
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
|
if (empty($translations = self::listTranslationsFromWikidataWikipedia(self::LANGUAGES_TO_CHECK, $data))) {
|
||||||
|
|
||||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
|
||||||
if (empty($languagesToFetch)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
|
||||||
}
|
|
||||||
catch (TypeError $e) {
|
|
||||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
|
||||||
}
|
|
||||||
|
|
||||||
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertTagTranslation(?, ?, ?, ?, ?)");
|
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertTagTranslation(?, ?, ?, ?, ?)");
|
||||||
|
|
||||||
$this->_mysqli_noda->autocommit(false);
|
$this->_mysqli_noda->autocommit(false);
|
||||||
|
|
||||||
foreach ($checkagainstLanguage as $lang) {
|
foreach ($translations as $lang => $values) {
|
||||||
|
|
||||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
|
|
||||||
|
|
||||||
$wikilink = $wikilinks[$lang];
|
|
||||||
if (!empty($contents[$lang])) {
|
|
||||||
|
|
||||||
$descFromWiki = $contents[$lang];
|
|
||||||
$descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*'];
|
|
||||||
|
|
||||||
if (!empty($descFromWiki)) {
|
|
||||||
|
|
||||||
# Process data retrieved from wikipedia
|
|
||||||
$tDescription = self::_cleanWikidataInput((string)$descFromWiki);
|
|
||||||
|
|
||||||
if (substr($tDescription, -1) === chr(10)) {
|
|
||||||
$tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
|
||||||
$tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription));
|
|
||||||
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$tDescription = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$tDescription = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
|
|
||||||
$tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
|
|
||||||
|
|
||||||
if (in_array($tLang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
|
||||||
$tLabel = ucfirst(trim($tLabel));
|
|
||||||
$tDescription = ucfirst(trim($tDescription));
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
$insertStmt->bind_param("issss", $tag_id, $tLang, $tLabel, $tDescription, $wikilink);
|
|
||||||
$insertStmt->execute();
|
|
||||||
}
|
|
||||||
catch (MDMysqliInvalidEncodingError $e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
||||||
|
$label = ucfirst($values['label']);
|
||||||
|
$description = ucfirst($values['description']);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$label = $values['label'];
|
||||||
|
$description = $values['description'];
|
||||||
}
|
}
|
||||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
|
||||||
|
|
||||||
$wikilink = "";
|
try {
|
||||||
|
$insertStmt->bind_param("issss", $tag_id, $lang,
|
||||||
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
$label, $description, $values['link']);
|
||||||
$data['labels'][$lang]['value'] = ucfirst(trim($data['labels'][$lang]['value']));
|
|
||||||
$data['descriptions'][$lang]['value'] = ucfirst(trim($data['descriptions'][$lang]['value']));
|
|
||||||
}
|
|
||||||
|
|
||||||
$insertStmt->bind_param("issss", $tag_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
|
|
||||||
$insertStmt->execute();
|
$insertStmt->execute();
|
||||||
}
|
}
|
||||||
|
catch (MDMysqliInvalidEncodingError $e) {
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user