Better modularize NodaWikidataFetcher's loading of translations
This commit is contained in:
parent
511304b6f2
commit
b318b5b471
@ -17,8 +17,8 @@ final class NodaWikidataFetcher {
|
||||
'Accept: application/sparql-results+json',
|
||||
];
|
||||
|
||||
const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'zh'];
|
||||
const LANGUAGES_TO_CHECK = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||||
const LANGUAGES_MAIN_DESC = ['de', 'da', 'en', 'es', 'fr', 'hu', 'it', 'jp', 'nl', 'pt', 'ru', 'sv', 'uk', 'zh'];
|
||||
const LANGUAGES_TO_CHECK = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
|
||||
|
||||
const LANGUAGES_TO_CAPITALIZE = ["cs", "da", "de", "en", "es", "fr", "fi", "id", "it", "nl", "pl", "pt", "ru", "sv", "tl", "tr"];
|
||||
|
||||
@ -354,6 +354,76 @@ final class NodaWikidataFetcher {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads translations from Wikipedia pages through wikidata and then merges
|
||||
* them with Wikidata's own translations into a usable array.
|
||||
*
|
||||
* @param array<string> $checkagainstLanguage The language to check against.
|
||||
* @param array<mixed> $data Data fetched from Wikidata.
|
||||
*
|
||||
* @return array<string, array{label: string, description: string, link: string}>
|
||||
*/
|
||||
public static function listTranslationsFromWikidataWikipedia(array $checkagainstLanguage, array $data):array {
|
||||
|
||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
||||
if (empty($languagesToFetch)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
||||
}
|
||||
catch (TypeError $e) {
|
||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
||||
}
|
||||
|
||||
$output = [];
|
||||
|
||||
foreach ($checkagainstLanguage as $lang) {
|
||||
|
||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki']) && !empty($wikilinks[$lang])) {
|
||||
|
||||
$wikilink = $wikilinks[$lang];
|
||||
if (!empty($contents[$lang])) {
|
||||
|
||||
$descFromWiki = json_decode($contents[$lang], true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
|
||||
if ($descFromWiki !== null) $tDescription = (string)$descFromWiki;
|
||||
else $tDescription = "";
|
||||
|
||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput((string)$data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanWikidataInput($tDescription),
|
||||
'link' => $wikilink,
|
||||
];
|
||||
|
||||
}
|
||||
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$output[$lang] = [
|
||||
'label' => self::_cleanWikidataInput($data['labels'][$lang]['value']),
|
||||
'description' => self::_cleanWikidataInput($data['descriptions'][$lang]['value']),
|
||||
'link' => "",
|
||||
];
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return $output;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans contents parsed from Wikipedia.
|
||||
*
|
||||
@ -875,68 +945,23 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function getWikidataTranslationsForPersinst(array $data, int $persinst_id):void {
|
||||
|
||||
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
|
||||
|
||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
||||
if (empty($languagesToFetch)) {
|
||||
if (empty($translations = self::listTranslationsFromWikidataWikipedia(self::LANGUAGES_TO_CHECK, $data))) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
||||
}
|
||||
catch (TypeError $e) {
|
||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
||||
}
|
||||
|
||||
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?)");
|
||||
|
||||
$this->_mysqli_noda->autocommit(false);
|
||||
|
||||
foreach ($checkagainstLanguage as $lang) {
|
||||
foreach ($translations as $lang => $values) {
|
||||
|
||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
|
||||
|
||||
$wikilink = $wikilinks[$lang];
|
||||
if (!empty($contents[$lang])) {
|
||||
|
||||
$descFromWiki = $contents[$lang];
|
||||
$descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
|
||||
if ($descFromWiki !== null) $tDescription = self::_cleanWikidataInput((string)$descFromWiki);
|
||||
else $tDescription = "";
|
||||
|
||||
if (substr($tDescription, -1) === chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
|
||||
|
||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
||||
// Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription
|
||||
// dies enthält den ersten Absatz der jeweiligen Wikipedia
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
|
||||
$tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
|
||||
|
||||
try {
|
||||
$insertStmt->bind_param("issss", $persinst_id, $tLang, $tLabel, $tDescription, $wikilink);
|
||||
$insertStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
}
|
||||
|
||||
}
|
||||
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$wikilink = "";
|
||||
$insertStmt->bind_param("issss", $persinst_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
|
||||
try {
|
||||
$insertStmt->bind_param("issss", $persinst_id, $lang,
|
||||
$values['label'], $values['description'], $values['link']);
|
||||
$insertStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -944,7 +969,6 @@ final class NodaWikidataFetcher {
|
||||
$this->_mysqli_noda->autocommit(true);
|
||||
|
||||
$insertStmt->close();
|
||||
unset($insertStmt);
|
||||
|
||||
}
|
||||
|
||||
@ -1248,75 +1272,23 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function getWikidataTranslationsForPlace(array $data, int $ort_id) {
|
||||
|
||||
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
|
||||
|
||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
||||
if (empty($languagesToFetch)) {
|
||||
if (empty($translations = self::listTranslationsFromWikidataWikipedia(self::LANGUAGES_TO_CHECK, $data))) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
||||
}
|
||||
catch (TypeError $e) {
|
||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
||||
}
|
||||
|
||||
$insertStmt = $this->_mysqli_noda->do_prepare("CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?)");
|
||||
|
||||
$this->_mysqli_noda->autocommit(false);
|
||||
|
||||
foreach ($checkagainstLanguage as $lang) {
|
||||
foreach ($translations as $lang => $values) {
|
||||
|
||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
|
||||
|
||||
$wikilink = $wikilinks[$lang];
|
||||
if (!empty($contents[$lang])) {
|
||||
|
||||
$descFromWiki = $contents[$lang];
|
||||
|
||||
if (!($wikiDataDecoded = json_decode($descFromWiki, true))) {
|
||||
continue;
|
||||
}
|
||||
$tLabel = $wikiDataDecoded['parse']['title'];
|
||||
$descFromWiki = $wikiDataDecoded['parse']['text']['*'];
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
if (empty($descFromWiki)) $tDescription = "";
|
||||
else {
|
||||
|
||||
$tDescription = self::_cleanWikidataInput((string)$descFromWiki);
|
||||
|
||||
if (substr($tDescription, -1) === chr(10)) $tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
|
||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
||||
$tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription));
|
||||
// echo '<br>Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription; // dies enthält den ersten Absatz der jeweiligen Wikipedia
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
|
||||
if (empty($tLabel)) $tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
|
||||
|
||||
try {
|
||||
$insertStmt->bind_param("issss", $ort_id, $tLang, $tLabel, $tDescription, $wikilink);
|
||||
$insertStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
$_SESSION["editHistory"] = ["changesStored", "Error adding translation for language $tLang"];
|
||||
}
|
||||
|
||||
}
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$wikilink = "";
|
||||
$insertStmt->bind_param("issss", $ort_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
|
||||
try {
|
||||
$insertStmt->bind_param("issss", $ort_id, $lang,
|
||||
$values['label'], $values['description'], $values['link']);
|
||||
$insertStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1573,84 +1545,32 @@ final class NodaWikidataFetcher {
|
||||
*/
|
||||
public function getWikidataTranslationsForTag(array $data, int $tag_id) {
|
||||
|
||||
$checkagainstLanguage = self::LANGUAGES_TO_CHECK;
|
||||
|
||||
list($languagesToFetch, $wikilinks) = self::getWikidataWikipediaTranslationSources($checkagainstLanguage, $data);
|
||||
if (empty($languagesToFetch)) {
|
||||
if (empty($translations = self::listTranslationsFromWikidataWikipedia(self::LANGUAGES_TO_CHECK, $data))) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$contents = MD_STD::runCurlMulti($languagesToFetch, 10000);
|
||||
}
|
||||
catch (TypeError $e) {
|
||||
throw new MDExpectedException("Failed to initialize a request. Try pressing F5 to run the requests again.");
|
||||
}
|
||||
|
||||
$insertStmt = $this->_mysqli_noda->do_prepare("CALL nodaInsertTagTranslation(?, ?, ?, ?, ?)");
|
||||
|
||||
$this->_mysqli_noda->autocommit(false);
|
||||
|
||||
foreach ($checkagainstLanguage as $lang) {
|
||||
|
||||
if (!empty($languagesToFetch[$lang]) && !empty($data['sitelinks'][$lang . 'wiki'])) {
|
||||
|
||||
$wikilink = $wikilinks[$lang];
|
||||
if (!empty($contents[$lang])) {
|
||||
|
||||
$descFromWiki = $contents[$lang];
|
||||
$descFromWiki = json_decode($descFromWiki, true)['parse']['text']['*'];
|
||||
|
||||
if (!empty($descFromWiki)) {
|
||||
|
||||
# Process data retrieved from wikipedia
|
||||
$tDescription = self::_cleanWikidataInput((string)$descFromWiki);
|
||||
|
||||
if (substr($tDescription, -1) === chr(10)) {
|
||||
$tDescription = substr($tDescription, 0, strlen($tDescription) - 1);
|
||||
}
|
||||
|
||||
$tDescription = '"' . $tDescription . '" - (' . $data['labels'][$lang]['language'] . '.wikipedia.org ' . date('d.m.Y') . ')';
|
||||
$tDescription = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $tDescription));
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
$tDescription = "";
|
||||
}
|
||||
|
||||
$tLang = self::_cleanWikidataInput((string)$data['labels'][$lang]['language']);
|
||||
$tLabel = self::_cleanWikidataInput((string)$data['labels'][$lang]['value']);
|
||||
|
||||
if (in_array($tLang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
||||
$tLabel = ucfirst(trim($tLabel));
|
||||
$tDescription = ucfirst(trim($tDescription));
|
||||
}
|
||||
|
||||
try {
|
||||
$insertStmt->bind_param("issss", $tag_id, $tLang, $tLabel, $tDescription, $wikilink);
|
||||
$insertStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
}
|
||||
foreach ($translations as $lang => $values) {
|
||||
|
||||
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
||||
$label = ucfirst($values['label']);
|
||||
$description = ucfirst($values['description']);
|
||||
}
|
||||
else {
|
||||
$label = $values['label'];
|
||||
$description = $values['description'];
|
||||
}
|
||||
else if (!empty($data['labels'][$lang]['value']) and !empty($data['descriptions'][$lang])) {
|
||||
|
||||
$wikilink = "";
|
||||
|
||||
if (in_array($lang, self::LANGUAGES_TO_CAPITALIZE, true)) {
|
||||
$data['labels'][$lang]['value'] = ucfirst(trim($data['labels'][$lang]['value']));
|
||||
$data['descriptions'][$lang]['value'] = ucfirst(trim($data['descriptions'][$lang]['value']));
|
||||
}
|
||||
|
||||
$insertStmt->bind_param("issss", $tag_id, $data['labels'][$lang]['language'], $data['labels'][$lang]['value'], $data['descriptions'][$lang]['value'], $wikilink);
|
||||
try {
|
||||
$insertStmt->bind_param("issss", $tag_id, $lang,
|
||||
$label, $description, $values['link']);
|
||||
$insertStmt->execute();
|
||||
}
|
||||
catch (MDMysqliInvalidEncodingError $e) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user