From 2aff8ce2daf0fccff63a51af313471ca290085bf Mon Sep 17 00:00:00 2001 From: Dennis Diefenbach Date: Fri, 22 Feb 2019 10:30:59 +0100 Subject: [PATCH 1/5] Entity is also imported if there exists already an entity with the same label in one language --- src/EntityImporter.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/EntityImporter.php b/src/EntityImporter.php index ad84fd3..bfd38b4 100644 --- a/src/EntityImporter.php +++ b/src/EntityImporter.php @@ -10,6 +10,7 @@ use Wikibase\DataModel\Entity\Item; use Wikibase\DataModel\Snak\PropertyValueSnak; use Wikibase\DataModel\Statement\StatementList; +use Wikibase\EntityContent; use Wikibase\Import\Store\ImportedEntityMappingStore; use Wikibase\Lib\Store\EntityStore; @@ -140,7 +141,7 @@ private function createEntity( EntityDocument $entity ) { $entity, 'Import entity', $this->importUser, - EDIT_NEW + EDIT_NEW | EntityContent::EDIT_IGNORE_CONSTRAINTS ); } From 8cde74a363836b58e3b56d622154f53263ba0f00 Mon Sep 17 00:00:00 2001 From: Dennis Diefenbach Date: Tue, 11 Jun 2019 17:22:28 +0200 Subject: [PATCH 2/5] Solving issue 16 --- src/EntityImporter.php | 97 +++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/src/EntityImporter.php b/src/EntityImporter.php index ad84fd3..0131552 100644 --- a/src/EntityImporter.php +++ b/src/EntityImporter.php @@ -74,22 +74,42 @@ public function importEntities( array $ids, $importStatements = true ) { $stashedEntities = array_merge( $stashedEntities, $this->importBatch( $batch ) ); } - if ( $importStatements === true ) { - foreach( $stashedEntities as $entity ) { - $referencedEntities = $this->getReferencedEntities( $entity ); - $this->importEntities( $referencedEntities, false ); - - $localId = $this->entityMappingStore->getLocalId( $entity->getId() ); - - if ( $localId && !$this->statementsCountLookup->hasStatements( $localId ) ) { - $this->statementsImporter->importStatements( $entity ); - } else { - $this->logger->info( - 'Statements already imported for ' . $entity->getId()->getSerialization() - ); - } - } - } + if ( $importStatements === true ) { + foreach( $stashedEntities as $entity ) { + $referencedEntities = $this->getReferencedEntities( $entity ); + $this->importEntities( $referencedEntities, false ); + + $entity_new = $entity; + $statements_new = $entity->getStatements(); + foreach($statements_new as $key1 => $statement_new) { + $snak_new = $statement_new->getMainSnak(); + if ($snak_new instanceof PropertyValueSnak) { + $data_value_new = $snak_new->getDataValue(); + if ($data_value_new instanceof UnboundedQuantityValue) { + $unit = $data_value_new->getUnit(); + if (strpos($unit, 'http://www.wikidata.org/entity/') !== false) { + $id = str_replace("http://www.wikidata.org/entity/", "", $unit); + $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); + $data_value_new = new UnboundedQuantityValue($data_value_new->getAmount(), 'http://YOUR_HOST/entity/' . $newid); + $snak_new = new PropertyValueSnak($snak_new->getPropertyId(), $data_value_new); + $statement_new->setMainSnak($snak_new); + $statements_new->addStatement($statement_new, $key1); + + } + } + } + } + $localId = $this->entityMappingStore->getLocalId($entity->getId()); + + if ($localId && !$this->statementsCountLookup->hasStatements($localId)) { + $this->statementsImporter->importStatements($entity_new); + } else { + $this->logger->info( + 'Statements already imported for ' . $entity->getId()->getSerialization() + ); + } + } + } } private function importBatch( array $batch ) { @@ -162,24 +182,33 @@ private function getBadgeItems( array $entities ) { return $badgeItems; } - private function getReferencedEntities( EntityDocument $entity ) { - $snaks = $entity->getStatements()->getAllSnaks(); - $entities = array(); - - foreach( $snaks as $snak ) { - $entities[] = $snak->getPropertyId()->getSerialization(); - - if ( $snak instanceof PropertyValueSnak ) { - $value = $snak->getDataValue(); - - if ( $value instanceof EntityIdValue ) { - $entities[] = $value->getEntityId()->getSerialization(); - } - } - } - - return array_unique( $entities ); - } + private function getReferencedEntities( EntityDocument $entity ) { + $statements = $entity->getStatements(); + $snaks = $statements->getAllSnaks(); + $entities = array(); + + foreach( $snaks as $key => $snak ) { + $entities[] = $snak->getPropertyId()->getSerialization(); + + if ( $snak instanceof PropertyValueSnak ) { + $value = $snak->getDataValue(); + if ( $value instanceof EntityIdValue ) { + $entities[] = $value->getEntityId()->getSerialization(); + } + if ($value instanceof UnboundedQuantityValue){ + $unit = $value->getUnit(); + if (strpos($unit, 'http://www.wikidata.org/entity/') !== false){ + $value2 = array_pop(array_reverse($this->apiEntityLookup->getEntities([str_replace("http://www.wikidata.org/entity/","",$value->getUnit())]))); + $number = $value2->getId()->getSerialization(); + $unit = $value->getUnit(); + $unit = $number; + $entities[] = $number; + } + } + } + } + return array_unique( $entities ); + } private function importBadgeItems( array $entities ) { $badgeItems = $this->getBadgeItems( $entities ); From 69a31d061999bbcd03035c7640d717bebc94c5f4 Mon Sep 17 00:00:00 2001 From: D063520 Date: Wed, 12 Jun 2019 12:52:15 +0000 Subject: [PATCH 3/5] Import of units now working --- src/EntityImporter.php | 47 +++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/src/EntityImporter.php b/src/EntityImporter.php index 5c17ab9..84dd0ca 100644 --- a/src/EntityImporter.php +++ b/src/EntityImporter.php @@ -9,10 +9,14 @@ use Wikibase\DataModel\Entity\EntityIdValue; use Wikibase\DataModel\Entity\Item; use Wikibase\DataModel\Snak\PropertyValueSnak; +use Wikibase\DataModel\Snak\SnakList; use Wikibase\DataModel\Statement\StatementList; use Wikibase\EntityContent; use Wikibase\Import\Store\ImportedEntityMappingStore; use Wikibase\Lib\Store\EntityStore; +use Wikibase\Repo\WikibaseRepo; +use DataValues\UnboundedQuantityValue; +use Wikibase\DataModel\Entity\ItemId; class EntityImporter { @@ -81,7 +85,8 @@ public function importEntities( array $ids, $importStatements = true ) { $this->importEntities( $referencedEntities, false ); $entity_new = $entity; - $statements_new = $entity->getStatements(); + $statements_new = $entity->getStatements(); + $uri = WikibaseRepo::getDefaultInstance()->getSettings()->getSetting( 'conceptBaseUri' ); foreach($statements_new as $key1 => $statement_new) { $snak_new = $statement_new->getMainSnak(); if ($snak_new instanceof PropertyValueSnak) { @@ -89,17 +94,31 @@ public function importEntities( array $ids, $importStatements = true ) { if ($data_value_new instanceof UnboundedQuantityValue) { $unit = $data_value_new->getUnit(); if (strpos($unit, 'http://www.wikidata.org/entity/') !== false) { - $id = str_replace("http://www.wikidata.org/entity/", "", $unit); - $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); - $data_value_new = new UnboundedQuantityValue($data_value_new->getAmount(), 'http://YOUR_HOST/entity/' . $newid); + $id = str_replace("http://www.wikidata.org/entity/", "", $unit); + $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); + $data_value_new = new UnboundedQuantityValue($data_value_new->getAmount(), $uri . $newid); $snak_new = new PropertyValueSnak($snak_new->getPropertyId(), $data_value_new); $statement_new->setMainSnak($snak_new); - $statements_new->addStatement($statement_new, $key1); - } } - } - } + } + $snakList_new = $statement_new->getQualifiers(); + foreach($snakList_new as $key2 => $snak_new){ + if ($snak_new instanceof PropertyValueSnak) { + $data_value_new = $snak_new->getDataValue(); + if ($data_value_new instanceof UnboundedQuantityValue) { + $unit = $data_value_new->getUnit(); + if (strpos($unit, 'http://www.wikidata.org/entity/') !== false) { + $id = str_replace("http://www.wikidata.org/entity/", "", $unit); + $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); + $data_value_new = new UnboundedQuantityValue($data_value_new->getAmount(), $uri . $newid); + $snak_new = new PropertyValueSnak($snak_new->getPropertyId(), $data_value_new); + } + } + } + $snakList_new[$key2] = $snak_new; + } + } $localId = $this->entityMappingStore->getLocalId($entity->getId()); if ($localId && !$this->statementsCountLookup->hasStatements($localId)) { @@ -193,11 +212,11 @@ private function getReferencedEntities( EntityDocument $entity ) { if ( $snak instanceof PropertyValueSnak ) { $value = $snak->getDataValue(); - if ( $value instanceof EntityIdValue ) { + if ( $value instanceof EntityIdValue ) { $entities[] = $value->getEntityId()->getSerialization(); - } - if ($value instanceof UnboundedQuantityValue){ - $unit = $value->getUnit(); + } + if ($value instanceof UnboundedQuantityValue ){ + $unit = $value->getUnit(); if (strpos($unit, 'http://www.wikidata.org/entity/') !== false){ $value2 = array_pop(array_reverse($this->apiEntityLookup->getEntities([str_replace("http://www.wikidata.org/entity/","",$value->getUnit())]))); $number = $value2->getId()->getSerialization(); @@ -205,8 +224,8 @@ private function getReferencedEntities( EntityDocument $entity ) { $unit = $number; $entities[] = $number; } - } - } + } + } } return array_unique( $entities ); } From f33915e9ee23d12c12bf241ae9b6c0dd42b7d289 Mon Sep 17 00:00:00 2001 From: Dennis Diefenbach Date: Wed, 12 Jun 2019 14:54:34 +0200 Subject: [PATCH 4/5] Better formatting --- src/EntityImporter.php | 278 +++++++++++++++++++++-------------------- 1 file changed, 143 insertions(+), 135 deletions(-) diff --git a/src/EntityImporter.php b/src/EntityImporter.php index 84dd0ca..b5fd19d 100644 --- a/src/EntityImporter.php +++ b/src/EntityImporter.php @@ -18,107 +18,110 @@ use DataValues\UnboundedQuantityValue; use Wikibase\DataModel\Entity\ItemId; -class EntityImporter { +class EntityImporter +{ - private $statementsImporter; + private $statementsImporter; - private $badgeItemUpdater; + private $badgeItemUpdater; - private $apiEntityLookup; + private $apiEntityLookup; - private $entityStore; + private $entityStore; - private $entityMappingStore; + private $entityMappingStore; - private $logger; + private $logger; - private $statementsCountLookup; + private $statementsCountLookup; - private $idParser; + private $idParser; - private $importUser; + private $importUser; - private $batchSize; + private $batchSize; - public function __construct( - StatementsImporter $statementsImporter, - BadgeItemUpdater $badgeItemUpdater, - ApiEntityLookup $apiEntityLookup, - EntityStore $entityStore, - ImportedEntityMappingStore $entityMappingStore, - StatementsCountLookup $statementsCountLookup, - LoggerInterface $logger - ) { - $this->statementsImporter = $statementsImporter; - $this->badgeItemUpdater = $badgeItemUpdater; - $this->apiEntityLookup = $apiEntityLookup; - $this->entityStore = $entityStore; - $this->entityMappingStore = $entityMappingStore; - $this->statementsCountLookup = $statementsCountLookup; - $this->logger = $logger; + public function __construct( + StatementsImporter $statementsImporter, + BadgeItemUpdater $badgeItemUpdater, + ApiEntityLookup $apiEntityLookup, + EntityStore $entityStore, + ImportedEntityMappingStore $entityMappingStore, + StatementsCountLookup $statementsCountLookup, + LoggerInterface $logger + ) + { + $this->statementsImporter = $statementsImporter; + $this->badgeItemUpdater = $badgeItemUpdater; + $this->apiEntityLookup = $apiEntityLookup; + $this->entityStore = $entityStore; + $this->entityMappingStore = $entityMappingStore; + $this->statementsCountLookup = $statementsCountLookup; + $this->logger = $logger; - $this->idParser = new BasicEntityIdParser(); - $this->importUser = User::newFromId( 0 ); - $this->batchSize = 10; - } + $this->idParser = new BasicEntityIdParser(); + $this->importUser = User::newFromId(0); + $this->batchSize = 10; + } - public function importEntities( array $ids, $importStatements = true ) { - $batches = array_chunk( $ids, $this->batchSize ); + public function importEntities(array $ids, $importStatements = true) + { + $batches = array_chunk($ids, $this->batchSize); - $stashedEntities = array(); + $stashedEntities = array(); - foreach( $batches as $batch ) { - $entities = $this->apiEntityLookup->getEntities( $batch ); + foreach ($batches as $batch) { + $entities = $this->apiEntityLookup->getEntities($batch); - if ( $entities ) { - $this->importBadgeItems( $entities ); - } else { - $this->logger->error( 'Failed to retrieve items for batch' ); - } + if ($entities) { + $this->importBadgeItems($entities); + } else { + $this->logger->error('Failed to retrieve items for batch'); + } - $stashedEntities = array_merge( $stashedEntities, $this->importBatch( $batch ) ); - } + $stashedEntities = array_merge($stashedEntities, $this->importBatch($batch)); + } - if ( $importStatements === true ) { - foreach( $stashedEntities as $entity ) { - $referencedEntities = $this->getReferencedEntities( $entity ); - $this->importEntities( $referencedEntities, false ); + if ($importStatements === true) { + foreach ($stashedEntities as $entity) { + $referencedEntities = $this->getReferencedEntities($entity); + $this->importEntities($referencedEntities, false); $entity_new = $entity; - $statements_new = $entity->getStatements(); - $uri = WikibaseRepo::getDefaultInstance()->getSettings()->getSetting( 'conceptBaseUri' ); - foreach($statements_new as $key1 => $statement_new) { + $statements_new = $entity->getStatements(); + $uri = WikibaseRepo::getDefaultInstance()->getSettings()->getSetting('conceptBaseUri'); + foreach ($statements_new as $key1 => $statement_new) { $snak_new = $statement_new->getMainSnak(); if ($snak_new instanceof PropertyValueSnak) { $data_value_new = $snak_new->getDataValue(); if ($data_value_new instanceof UnboundedQuantityValue) { $unit = $data_value_new->getUnit(); if (strpos($unit, 'http://www.wikidata.org/entity/') !== false) { - $id = str_replace("http://www.wikidata.org/entity/", "", $unit); - $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); + $id = str_replace("http://www.wikidata.org/entity/", "", $unit); + $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); $data_value_new = new UnboundedQuantityValue($data_value_new->getAmount(), $uri . $newid); $snak_new = new PropertyValueSnak($snak_new->getPropertyId(), $data_value_new); $statement_new->setMainSnak($snak_new); } } - } - $snakList_new = $statement_new->getQualifiers(); - foreach($snakList_new as $key2 => $snak_new){ - if ($snak_new instanceof PropertyValueSnak) { + } + $snakList_new = $statement_new->getQualifiers(); + foreach ($snakList_new as $key2 => $snak_new) { + if ($snak_new instanceof PropertyValueSnak) { $data_value_new = $snak_new->getDataValue(); if ($data_value_new instanceof UnboundedQuantityValue) { - $unit = $data_value_new->getUnit(); + $unit = $data_value_new->getUnit(); if (strpos($unit, 'http://www.wikidata.org/entity/') !== false) { - $id = str_replace("http://www.wikidata.org/entity/", "", $unit); - $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); + $id = str_replace("http://www.wikidata.org/entity/", "", $unit); + $newid = $this->entityMappingStore->getLocalId(new ItemId($id)); $data_value_new = new UnboundedQuantityValue($data_value_new->getAmount(), $uri . $newid); $snak_new = new PropertyValueSnak($snak_new->getPropertyId(), $data_value_new); - } - } - } - $snakList_new[$key2] = $snak_new; - } - } + } + } + } + $snakList_new[$key2] = $snak_new; + } + } $localId = $this->entityMappingStore->getLocalId($entity->getId()); if ($localId && !$this->statementsCountLookup->hasStatements($localId)) { @@ -130,109 +133,114 @@ public function importEntities( array $ids, $importStatements = true ) { } } } - } + } - private function importBatch( array $batch ) { - $entities = $this->apiEntityLookup->getEntities( $batch ); + private function importBatch(array $batch) + { + $entities = $this->apiEntityLookup->getEntities($batch); - if ( !is_array( $entities ) ) { - $this->logger->error( 'Failed to import batch' ); + if (!is_array($entities)) { + $this->logger->error('Failed to import batch'); - return array(); - } + return array(); + } - $stashedEntities = array(); + $stashedEntities = array(); - foreach( $entities as $originalId => $entity ) { - $stashedEntities[] = $entity->copy(); - $originalEntityId = $this->idParser->parse( $originalId ); + foreach ($entities as $originalId => $entity) { + $stashedEntities[] = $entity->copy(); + $originalEntityId = $this->idParser->parse($originalId); - if ( !$this->entityMappingStore->getLocalId( $originalEntityId ) ) { - try { - $this->logger->info( "Creating $originalId" ); + if (!$this->entityMappingStore->getLocalId($originalEntityId)) { + try { + $this->logger->info("Creating $originalId"); - $entityRevision = $this->createEntity( $entity ); - $localId = $entityRevision->getEntity()->getId(); - $this->entityMappingStore->add( $originalEntityId, $localId ); - } catch( \Exception $ex ) { - $this->logger->error( "Failed to add $originalId" ); - $this->logger->error( $ex->getMessage() ); - } - } else { - $this->logger->info( "$originalId already imported" ); - } - } + $entityRevision = $this->createEntity($entity); + $localId = $entityRevision->getEntity()->getId(); + $this->entityMappingStore->add($originalEntityId, $localId); + } catch (\Exception $ex) { + $this->logger->error("Failed to add $originalId"); + $this->logger->error($ex->getMessage()); + } + } else { + $this->logger->info("$originalId already imported"); + } + } - return $stashedEntities; - } + return $stashedEntities; + } - private function createEntity( EntityDocument $entity ) { - $entity->setId( null ); + private function createEntity(EntityDocument $entity) + { + $entity->setId(null); - $entity->setStatements( new StatementList() ); + $entity->setStatements(new StatementList()); - if ( $entity instanceof Item ) { - $siteLinkList = $this->badgeItemUpdater->replaceBadges( $entity->getSiteLinkList() ); - $entity->setSiteLinkList( $siteLinkList ); - } + if ($entity instanceof Item) { + $siteLinkList = $this->badgeItemUpdater->replaceBadges($entity->getSiteLinkList()); + $entity->setSiteLinkList($siteLinkList); + } - return $this->entityStore->saveEntity( - $entity, - 'Import entity', - $this->importUser, + return $this->entityStore->saveEntity( + $entity, + 'Import entity', + $this->importUser, EDIT_NEW | EntityContent::EDIT_IGNORE_CONSTRAINTS - ); - } + ); + } - private function getBadgeItems( array $entities ) { - $badgeItems = array(); + private function getBadgeItems(array $entities) + { + $badgeItems = array(); - foreach( $entities as $entity ) { - if ( !$entity instanceof Item ) { - continue; - } + foreach ($entities as $entity) { + if (!$entity instanceof Item) { + continue; + } - foreach( $entity->getSiteLinks() as $siteLink ) { - foreach( $siteLink->getBadges() as $badge ) { - $badgeItems[] = $badge->getSerialization(); - } - } - } + foreach ($entity->getSiteLinks() as $siteLink) { + foreach ($siteLink->getBadges() as $badge) { + $badgeItems[] = $badge->getSerialization(); + } + } + } - return $badgeItems; - } + return $badgeItems; + } - private function getReferencedEntities( EntityDocument $entity ) { + private function getReferencedEntities(EntityDocument $entity) + { $statements = $entity->getStatements(); $snaks = $statements->getAllSnaks(); $entities = array(); - foreach( $snaks as $key => $snak ) { + foreach ($snaks as $key => $snak) { $entities[] = $snak->getPropertyId()->getSerialization(); - if ( $snak instanceof PropertyValueSnak ) { + if ($snak instanceof PropertyValueSnak) { $value = $snak->getDataValue(); - if ( $value instanceof EntityIdValue ) { + if ($value instanceof EntityIdValue) { $entities[] = $value->getEntityId()->getSerialization(); - } - if ($value instanceof UnboundedQuantityValue ){ - $unit = $value->getUnit(); - if (strpos($unit, 'http://www.wikidata.org/entity/') !== false){ - $value2 = array_pop(array_reverse($this->apiEntityLookup->getEntities([str_replace("http://www.wikidata.org/entity/","",$value->getUnit())]))); + } + if ($value instanceof UnboundedQuantityValue) { + $unit = $value->getUnit(); + if (strpos($unit, 'http://www.wikidata.org/entity/') !== false) { + $value2 = array_pop(array_reverse($this->apiEntityLookup->getEntities([str_replace("http://www.wikidata.org/entity/", "", $value->getUnit())]))); $number = $value2->getId()->getSerialization(); $unit = $value->getUnit(); $unit = $number; $entities[] = $number; } - } - } + } + } } - return array_unique( $entities ); + return array_unique($entities); } - private function importBadgeItems( array $entities ) { - $badgeItems = $this->getBadgeItems( $entities ); - $this->importEntities( $badgeItems, false ); - } + private function importBadgeItems(array $entities) + { + $badgeItems = $this->getBadgeItems($entities); + $this->importEntities($badgeItems, false); + } } From 920f40d4ef638fdf7692de3b41cd759da93daaa5 Mon Sep 17 00:00:00 2001 From: Dennis Diefenbach Date: Wed, 12 Jun 2019 15:17:00 +0200 Subject: [PATCH 5/5] Removed other issue --- src/EntityImporter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EntityImporter.php b/src/EntityImporter.php index b5fd19d..ec770eb 100644 --- a/src/EntityImporter.php +++ b/src/EntityImporter.php @@ -185,7 +185,7 @@ private function createEntity(EntityDocument $entity) $entity, 'Import entity', $this->importUser, - EDIT_NEW | EntityContent::EDIT_IGNORE_CONSTRAINTS + EDIT_NEW ); }