Add blacklist for unwanted rewrites in consolidating place names

This commit is contained in:
2023-11-26 23:55:22 +01:00
parent e610723107
commit b36a504277
4 changed files with 135 additions and 5 deletions

View File

@ -53,9 +53,11 @@ function getNames(array $data):array {
// Q6256 => country
$targets = [
/*
'Q6256' => 'countries',
'Q3024240' => 'historical_countries',
'Q10864048' => 'first_lvl_administrative_units',
*/
];
$langs = ['ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'ha', 'he', 'hi', 'hu', 'id', 'it', 'ja', 'ka', 'ko', 'nl', 'pl', 'pt', 'ro', 'ru', 'sv', 'sw', 'ta', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'zh'];
@ -69,3 +71,31 @@ foreach ($langs as $lang) {
}
}
// The following should be lists of terms that are independent of language
$targetsForMerge = [
'Q23718' => 'cardinal_directions',
];
$mergedValues = [];
foreach ($langs as $lang) {
foreach ($targetsForMerge as $qid => $filename) {
if (!isset($mergedValues[$filename])) {
$mergedValues[$filename] = [];
}
$mergedValues[$filename] = array_merge($mergedValues[$filename], getNames(query($lang, $qid)));
echo "Fetched $lang : $filename ($qid)" . PHP_EOL;
}
}
$mergedValues['cardinal_directions'][] = 'Nord';
$mergedValues['cardinal_directions'][] = 'Ost';
$mergedValues['cardinal_directions'][] = 'West';
$mergedValues['cardinal_directions'][] = 'Süd';
foreach ($mergedValues as $filename => $values) {
file_put_contents(__DIR__ . '/../static/' . $filename . '.json', json_encode(array_values(array_unique($values))));
}