Created
August 15, 2016 19:31
-
-
Save adamzimmermann/79c43f662e008e930faf8df57808bbbd to your computer and use it in GitHub Desktop.
Drupal Migrate class for using images as a migration source.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Source Migration class for finding image tags in markup. | |
*/ | |
class ExampleMigrateSourceImage extends MigrateSource { | |
/** | |
* The string of HTML content. | |
*/ | |
private $content = ''; | |
/** | |
* The array of matching image tags found. | |
*/ | |
private $matches = array(); | |
/** | |
* The array index of the current match being imported. | |
*/ | |
private $matchesCurrent = 0; | |
/** | |
* Indicates if the content has been queried and parsed. | |
*/ | |
private $contentImported = FALSE; | |
/** | |
* {@inheritdoc} | |
*/ | |
public function __construct($options = array()) { | |
parent::__construct($options); | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function __toString() { | |
return t('Create and download images referenced from a string of markup.'); | |
} | |
/** | |
* Return the number of available source records. | |
*/ | |
public function computeCount() { | |
$this->importContent(); | |
$matches = $this->matches; | |
if (is_array($matches)) { | |
return count($matches); | |
} | |
else { | |
return 0; | |
} | |
} | |
/** | |
* Returns a list of fields available to be mapped from the source. | |
*/ | |
public function fields() { | |
return array( | |
'alt' => t('Alt text'), | |
'title' => t('Title text'), | |
'url' => t('URL'), | |
'credit' => t('Credit'), | |
'filename' => t('Filename'), | |
'node_title' => t('Title'), | |
); | |
} | |
/** | |
* Do whatever needs to be done to start a fresh traversal of the source data. | |
* | |
* This is always called at the start of an import, so tasks such as opening | |
* file handles, running queries, and so on should be performed here. | |
*/ | |
public function performRewind() { | |
$this->matchesCurrent = 0; | |
} | |
/** | |
* Fetch the next row of data, returning it as an object. | |
* | |
* Return FALSE when there is no more data available. | |
*/ | |
public function getNextRow() { | |
$this->importContent(); | |
if ($this->matchesCurrent < $this->computeCount()) { | |
$row = new stdClass(); | |
// Add all of the values found in @see findMatches(). | |
$match = array_shift(array_slice($this->matches, $this->matchesCurrent, 1)); | |
foreach ($match as $key => $value) { | |
$row->{$key} = $value; | |
} | |
// Increment the current match counter. | |
$this->matchesCurrent++; | |
return $row; | |
} | |
else { | |
return FALSE; | |
} | |
} | |
/** | |
* Find and parse the source data if it hasn't already been done. | |
*/ | |
private function importContent() { | |
if (!$this->contentImported) { | |
// Build the content string to parse for images. | |
$this->buildContent(); | |
// Find the images in the string and populate the matches array. | |
$this->findImages(); | |
// Note that the import has been completed and does not need to be | |
// performed again. | |
$this->contentImported = TRUE; | |
} | |
} | |
/** | |
* Get all of the HTML that needs to be filtered for image tags and tokens. | |
*/ | |
private function buildContent() { | |
$query = $this->contentQuery(); | |
$content = $query->execute()->fetchAll(); | |
if (!empty($content)) { | |
// This builds one long string for parsing that can done on long strings | |
// without using too much memory. Here, we add fields ‘foo’ and ‘bar’ from | |
// the query. | |
foreach ($content as $item) { | |
$this->content .= $item->foo; | |
$this->content .= $item->bar; | |
} | |
// This builds an array of content for parsing operations that need to be | |
// performed on smaller chunks of the source data to avoid memory issues. | |
// This is is only required if you run into parsing issues, otherwise it | |
// can be removed. | |
$this->contentArray[] = array( | |
'title' => $item->post_title, | |
'content' => $item->post_content, | |
'id' => $item->id, | |
); | |
} | |
} | |
/** | |
* Creates the query that will be used to build the content string. | |
* | |
* @return object | |
* A SelectQuery object. | |
*/ | |
protected function contentQuery() { | |
$query = Database::getConnection('default', 'example')->select('table', 't'); | |
$query->fields('t', array( | |
'foo', | |
'bar', | |
)); | |
$query->condition('t.foo', '', '!='); | |
$query->condition('t.bar', '', '!='); | |
return $query; | |
} | |
/** | |
* Finds the desired elements in the markup. | |
*/ | |
private function findImages() { | |
// Verify that content was found. | |
if (empty($this->content)) { | |
$message = 'No HTML content with image tags to download could be found.'; | |
watchdog('example_migrate', $message); | |
return FALSE; | |
} | |
// Find images where the entire source content string can be parsed at once. | |
$this->findImageMethodOne(); | |
// Find images where the source content must be parsed in chunks. | |
foreach ($this->contentArray as $id => $post) { | |
$this->findImageMethodTwo($post); | |
} | |
} | |
/** | |
* This is an example of a image finding method. | |
*/ | |
private function findImageMethodOne() { | |
// Create a regex to look through the content. | |
$matches = array(); | |
$regex = '/regex/to/find/images/'; | |
preg_match_all($regex, $this->content, $matches, PREG_SET_ORDER); | |
// Set a unique row identifier from some captured pattern of the regex- | |
// this would likely be the full path to the image. You might need to | |
// perform cleanup on this value to standardize it, as the path | |
// to /foo/bar/image.jpg, example.com/foo/bar/image.jpg, and | |
// http://example.com/foo/bar/image.jpg should not create three unique | |
// source records. Standardizing the URL is key for not just avoiding | |
// creating duplicate source records, but the URL is also the ID value you | |
// will use in your destination class mapping callback that looks up the | |
// resulting image entity ID from the data it finds in the body field. | |
$id = 'http://example.com/foo/bar/image.jpg'; | |
// Add to the list of matches after performing more custom logic to | |
// find all of the correct chunks of data we need. Be sure to set | |
// every value here that you will need when constructing your entity later. | |
$this->matches[$id] = array( | |
'url' => $src, | |
'alt' => $alttext, | |
'title' => $description, | |
'credit' => $credit, | |
'id' => $id, | |
'filename' => $filename, | |
'custom_thing' => $custom_thing, | |
); | |
} | |
/** | |
* This is another example of a image finding method. | |
*/ | |
private function findImageMethodTwo() { | |
// Some DOM library parsing code could live here. Then the images that were | |
// found would be added to $this->matches just like they are added in the | |
// findImageMethodOne() method. | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment