Last active
March 12, 2019 19:50
-
-
Save ursbraem/098c7b66df4b7784b6662d65aeac112e to your computer and use it in GitHub Desktop.
Mask indexer for ke_search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace STUBR\StubrIndexer; | |
/* | |
* This file is part of the TYPO3 CMS project. | |
* | |
* It is free software; you can redistribute it and/or modify it under | |
* the terms of the GNU General Public License, either version 2 | |
* of the License, or any later version. | |
* | |
* For the full copyright and license information, please read the | |
* LICENSE.txt file that was distributed with this source code. | |
* | |
* The TYPO3 project - inspiring people to share! | |
*/ | |
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; | |
use TYPO3\CMS\Core\Utility\GeneralUtility; | |
/** | |
* This class implements a hook into the TYPO3 ke_search extension. | |
* | |
* @author Urs Bräm <[email protected]> | |
*/ | |
class RegisterMaskcontentIndexer { | |
// TODO: EXTENDEN AUS ABSTRACT | |
/** | |
* Name of this extension. | |
* @var string | |
*/ | |
private $extension_name = 'stubr_indexer'; | |
/* | |
* Name of the icon file used to display this indexer in the backend. | |
* @var string | |
*/ | |
private $icon_file = 'Icons/mask.gif'; | |
/** | |
* Description for the indexer that will appear in the backend. | |
* @var string | |
*/ | |
private $indexer_description = 'Content (mask)'; | |
/** | |
* The content type (CType) of mask elements. | |
* @var string | |
*/ | |
private $indexer_type = 'mask_content'; | |
/** | |
* Hook method used by ke_search to register indexer configurations.<br> | |
* Creates an array containing the configuration and appends it to the | |
* given $params array. | |
* | |
* @author Urs Bräm <[email protected]> | |
* @param array $params An array containing all indexer configurations? | |
* @param array $pObj The parent object? | |
*/ | |
function registerIndexerConfiguration(&$params, $pObj) { | |
/* Gets the path to this extension */ | |
$path = ExtensionManagementUtility::extRelPath($this->extension_name); | |
/* Add new indexer item to the list */ | |
$params['items'][] = array( | |
$this->indexer_description, | |
$this->indexer_type, | |
$path . $this->icon_file | |
); | |
} | |
/** | |
* Hook method used by ke_search to register a custom indexer.<br> | |
* This method creates an indexer instance and returns the status | |
* | |
* @param array $indexerConfig Configuration from TYPO3 Backend | |
* @param array $indexerObject Reference to indexer class. | |
* | |
* @author Urs Bräm <[email protected]> | |
* @return string Status for indexer run in the backend. | |
*/ | |
public function customIndexer(&$indexerConfig, &$indexerObject) { | |
/* Prepare the output variable */ | |
$content = ''; | |
/* If the indexer configuration is ment for this indexer */ | |
if ($indexerConfig['type'] == $this->indexer_type) { | |
/* Get an instance for the TYPO3 ObjectManager */ | |
$this->objectManager = GeneralUtility::makeInstance( | |
'TYPO3\CMS\Extbase\Object\ObjectManager' | |
); | |
/* Get a fresh indexer instance from the ObjectManager */ | |
$indexer = $this->objectManager->get( | |
'STUBR\StubrIndexer\TypesMaskcontent', | |
$indexerObject | |
); | |
/* Returns the indexer status */ | |
$content = $indexer->startIndexing(); | |
} | |
return $content; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace STUBR\StubrIndexer; | |
/* | |
* This file is part of the TYPO3 CMS project. | |
* | |
* It is free software; you can redistribute it and/or modify it under | |
* the terms of the GNU General Public License, either version 2 | |
* of the License, or any later version. | |
* | |
* For the full copyright and license information, please read the | |
* LICENSE.txt file that was distributed with this source code. | |
* | |
* The TYPO3 project - inspiring people to share! | |
*/ | |
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; | |
use TYPO3\CMS\Core\Utility\GeneralUtility; | |
/** | |
* This class implements a hook into the TYPO3 ke_search extension. | |
* | |
* @author Urs Bräm <[email protected]> | |
*/ | |
class RegisterMaskpageIndexer { | |
// TODO: EXTENDEN AUS ABSTRACT | |
/** | |
* Name of this extension. | |
* @var string | |
*/ | |
private $extension_name = 'stubr_indexer'; | |
/* | |
* Name of the icon file used to display this indexer in the backend. | |
* @var string | |
*/ | |
private $icon_file = 'Icons/mask.gif'; | |
/** | |
* Description for the indexer that will appear in the backend. | |
* @var string | |
*/ | |
private $indexer_description = 'Pages (mask)'; | |
/** | |
* The content type (CType) of mask elements. | |
* @var string | |
*/ | |
private $indexer_type = 'mask_page'; | |
/** | |
* Hook method used by ke_search to register indexer configurations.<br> | |
* Creates an array containing the configuration and appends it to the | |
* given $params array. | |
* | |
* @author Urs Bräm <[email protected]> | |
* @param array $params An array containing all indexer configurations? | |
* @param array $pObj The parent object? | |
*/ | |
function registerIndexerConfiguration(&$params, $pObj) { | |
/* Gets the path to this extension */ | |
$path = ExtensionManagementUtility::extRelPath($this->extension_name); | |
/* Add new indexer item to the list */ | |
$params['items'][] = array( | |
$this->indexer_description, | |
$this->indexer_type, | |
$path . $this->icon_file | |
); | |
} | |
/** | |
* Hook method used by ke_search to register a custom indexer.<br> | |
* This method creates an indexer instance and returns the status | |
* | |
* @param array $indexerConfig Configuration from TYPO3 Backend | |
* @param array $indexerObject Reference to indexer class. | |
* | |
* @author Urs Bräm <[email protected]> | |
* @return string Status for indexer run in the backend. | |
*/ | |
public function customIndexer(&$indexerConfig, &$indexerObject) { | |
/* Prepare the output variable */ | |
$content = ''; | |
/* If the indexer configuration is ment for this indexer */ | |
if ($indexerConfig['type'] == $this->indexer_type) { | |
/* Get an instance for the TYPO3 ObjectManager */ | |
$this->objectManager = GeneralUtility::makeInstance( | |
'TYPO3\CMS\Extbase\Object\ObjectManager' | |
); | |
/* Get a fresh indexer instance from the ObjectManager */ | |
$indexer = $this->objectManager->get( | |
'STUBR\StubrIndexer\TypesMaskpage', | |
$indexerObject | |
); | |
/* Returns the indexer status */ | |
$content = $indexer->startIndexing(); | |
} | |
return $content; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace STUBR\StubrIndexer; | |
/* | |
* This file is part of the TYPO3 CMS project. | |
* | |
* It is free software; you can redistribute it and/or modify it under | |
* the terms of the GNU General Public License, either version 2 | |
* of the License, or any later version. | |
* | |
* For the full copyright and license information, please read the | |
* LICENSE.txt file that was distributed with this source code. | |
* | |
* The TYPO3 project - inspiring people to share! | |
*/ | |
use STUBR\StubrIndexer\RegisterMaskcontentIndexer; | |
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; | |
/* Oldschool require for non namespaced classes */ | |
require_once( | |
ExtensionManagementUtility::extPath('ke_search') | |
. 'Classes/indexer/types/class.tx_kesearch_indexer_types_tt_content.php' | |
); | |
/** | |
* | |
* @author Urs Bräm <[email protected]> | |
*/ | |
class TypesMaskcontent extends \tx_kesearch_indexer_types_tt_content { | |
/** | |
* The indexer's type. | |
* @var string | |
*/ | |
private $indexer_type = 'mask_content'; | |
/** | |
* Overwrites the parent implementation to set a the where clause for the | |
* database query. The CType should match the type used by mask. | |
* | |
* @author Urs Bräm <[email protected]> | |
* @param int $uid uid of page | |
*/ | |
public function getPageContent($uid) { | |
/* Overwrite the where clause to only look for mask */ | |
$this->whereClauseForCType = sprintf( | |
'CType="%s"', | |
'mask_content' | |
); | |
/* Now let the parent implementation work */ | |
parent::getPageContent($uid); | |
} | |
/** | |
* Parses content from a mask_content element.<br> | |
* The content string should come from the parent implementation of <br> | |
* <code> | |
* tx_kesearch_indexer_types_tt_content->getPageContent() | |
* </code> | |
* | |
* @author Urs Bräm <[email protected]> | |
* @param string $ttContentRow A row from tt_content | |
* @return string the parsed content? | |
*/ | |
public function getContentFromContentElement($ttContentRow) { | |
/* Parse the flexform data and return it */ | |
return $this->parseFlexformData($ttContentRow['pi_flexform']); | |
} | |
/** | |
* Helper function to process the flexform data. | |
* | |
* @author Urs Bräm <[email protected]> | |
* @param string $flexformData TYPO3 flexform data from tt_content | |
* @return string | |
*/ | |
protected function parseFlexformData($flexformData) { | |
/* Process the data */ | |
$parsedData = strip_tags( // 3. strip html? | |
html_entity_decode( // 2. decode actual content? | |
strip_tags($flexformData) // 1. strip xml? | |
) | |
); | |
/* Replace any kind of whitespace with a single space */ | |
$output = preg_replace('/\s\s+/', ' ', $parsedData); | |
return $output; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace STUBR\StubrIndexer; | |
/* | |
* This file is part of the TYPO3 CMS project. | |
* | |
* It is free software; you can redistribute it and/or modify it under | |
* the terms of the GNU General Public License, either version 2 | |
* of the License, or any later version. | |
* | |
* For the full copyright and license information, please read the | |
* LICENSE.txt file that was distributed with this source code. | |
* | |
* The TYPO3 project - inspiring people to share! | |
*/ | |
use STUBR\StubrIndexer\RegisterMaskcontentIndexer; | |
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; | |
/* Oldschool require for non namespaced classes */ | |
require_once( | |
ExtensionManagementUtility::extPath('ke_search') | |
. '/Classes/lib/class.tx_kesearch_lib_helper.php' | |
); | |
/** | |
* | |
* @author Urs Bräm <[email protected]> | |
*/ | |
class TypesMaskpage extends \tx_kesearch_indexer_types_page { | |
/** | |
* get content of current page and save data to db | |
* @param $uid page-UID that has to be indexed | |
*/ | |
public function getPageContent($uid) { | |
// get content elements for this page // edit ub | |
$fields = 'uid, pid, header, bodytext, CType, sys_language_uid, header_layout, fe_group, subheader, tx_mask_plaintext'; | |
// hook to modify the page content fields | |
if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPageContentFields'])) { | |
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPageContentFields'] as $_classRef) { | |
$_procObj = & \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef); | |
$_procObj->modifyPageContentFields( | |
$fields, $this | |
); | |
} | |
} | |
$table = 'tt_content'; | |
$where = 'pid = ' . intval($uid); | |
$where .= ' AND (' . $this->whereClauseForCType . ')'; | |
// add condition for not indexing gridelement columns with colPos = -2 (= invalid) | |
if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded('gridelements')) { | |
$where .= ' AND colPos <> -2 '; | |
} | |
$where .= \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields($table); | |
$where .= \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($table); | |
// Get access restrictions for this page, this access restrictions apply to all | |
// content elements of this pages. Individual access restrictions | |
// set for the content elements will be ignored. Use the content | |
// element indexer if you need that feature! | |
$pageAccessRestrictions = $this->getInheritedAccessRestrictions($uid); | |
// add ke_search tags current page | |
$tags = $this->pageRecords[intval($uid)]['tags']; | |
// add system categories as tags | |
\tx_kesearch_helper::makeSystemCategoryTags($tags, $uid, $table); | |
// Compile content for this page from individual content elements with | |
// respect to the language. | |
// While doing so, fetch also content from attached files and write | |
// their content directly to the index. | |
$GLOBALS['TYPO3_DB']->store_lastBuiltQuery = 1; | |
//$ttContentRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows($fields, $table, $where); | |
$ttContentRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows($fields, $table, $where); | |
//echo $GLOBALS['TYPO3_DB']->debug_lastBuiltQuery; | |
$pageContent = array(); | |
if (count($ttContentRows)) { | |
foreach ($ttContentRows as $ttContentRow) { | |
$content = ' '; | |
// index header | |
// add header only if not set to "hidden" | |
if ($ttContentRow['header_layout'] != 100) { | |
$content .= strip_tags($ttContentRow['header']) . "\n"; | |
} | |
// index content of this content element and find attached or linked files. | |
// Attached files are saved as file references, the RTE links directly to | |
// a file, thus we get file objects. | |
// Files go into the index no matter if "index_content_with_restrictions" is set | |
// or not, that means even if protected content elements do not go into the index, | |
// files do. Since each file gets it's own index entry with correct access | |
// restrictons, that's no problem from a access permission perspective (in fact, it's a feature). | |
if (in_array($ttContentRow['CType'], $this->fileCTypes)) { | |
$fileObjects = $this->findAttachedFiles($ttContentRow); | |
} else { | |
$fileObjects = $this->findLinkedFilesInRte($ttContentRow); | |
$content .= $this->getContentFromContentElement($ttContentRow) . "\n"; | |
} | |
// index the files found | |
if (!$pageAccessRestrictions['hidden']) { | |
$this->indexFiles($fileObjects, $ttContentRow, $pageAccessRestrictions['fe_group'], $tags) . "\n"; | |
} | |
// add content from this content element to page content | |
// ONLY if this content element is not access protected | |
// or protected content elements should go into the index | |
// by configuration. | |
if ($this->indexerConfig['index_content_with_restrictions'] == 'yes' || $ttContentRow['fe_group'] == '' || $ttContentRow['fe_group'] == '0') { | |
$pageContent[$ttContentRow['sys_language_uid']] .= $content; | |
} | |
} | |
} else { | |
$this->counterWithoutContent++; | |
return; | |
} | |
// make it possible to modify the indexerConfig via hook | |
$indexerConfig = $this->indexerConfig; | |
// make it possible to modify the default values via hook | |
$indexEntryDefaultValues = array( | |
'type' => 'page', | |
'uid' => $uid, | |
'params' => '', | |
'feGroupsPages' => $pageAccessRestrictions['fe_group'], | |
'debugOnly' => FALSE | |
); | |
// hook for custom modifications of the indexed data, e. g. the tags | |
if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPagesIndexEntry'])) { | |
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyPagesIndexEntry'] as $_classRef) { | |
$_procObj = & \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef); | |
$_procObj->modifyPagesIndexEntry( | |
$uid, $pageContent, $tags, $this->cachedPageRecords, $additionalFields, $indexerConfig, $indexEntryDefaultValues, $this | |
); | |
} | |
} | |
// store record in index table | |
if (count($pageContent)) { | |
foreach ($pageContent as $language_uid => $content) { | |
if (!$pageAccessRestrictions['hidden'] && $this->checkIfpageShouldBeIndexed($uid, $language_uid)) { | |
// overwrite access restrictions with language overlay values | |
$accessRestrictionsLanguageOverlay = $pageAccessRestrictions; | |
$pageAccessRestrictions['fe_group'] = $indexEntryDefaultValues['feGroupsPages']; | |
if ($language_uid > 0) { | |
if ($this->cachedPageRecords[$language_uid][$uid]['fe_group']) { | |
$accessRestrictionsLanguageOverlay['fe_group'] = $this->cachedPageRecords[$language_uid][$uid]['fe_group']; | |
} | |
if ($this->cachedPageRecords[$language_uid][$uid]['starttime']) { | |
$accessRestrictionsLanguageOverlay['starttime'] = $this->cachedPageRecords[$language_uid][$uid]['starttime']; | |
} | |
if ($this->cachedPageRecords[$language_uid][$uid]['endtime']) { | |
$accessRestrictionsLanguageOverlay['endtime'] = $this->cachedPageRecords[$language_uid][$uid]['endtime']; | |
} | |
} | |
$title = $this->cachedPageRecords[$language_uid][$uid]['title']; | |
$this->pObj->storeInIndex( | |
$indexerConfig['storagepid'], // storage PID | |
$title, // page title | |
$indexEntryDefaultValues['type'], // content type | |
$indexEntryDefaultValues['uid'], // target PID: where is the single view? | |
$content, // indexed content, includes the title (linebreak after title) | |
$tags, // tags | |
$indexEntryDefaultValues['params'], // typolink params for singleview | |
substr($content,0,strpos(preg_replace( "/\r|\n/", " ",$content), ' ', 200)).' …', // abstract // edit ub | |
$language_uid, // language uid | |
$accessRestrictionsLanguageOverlay['starttime'], // starttime | |
$accessRestrictionsLanguageOverlay['endtime'], // endtime | |
$accessRestrictionsLanguageOverlay['fe_group'], // fe_group | |
$indexEntryDefaultValues['debugOnly'], // debug only? | |
$additionalFields // additional fields added by hooks | |
); | |
$this->counter++; | |
} | |
} | |
} | |
return; | |
} | |
/** | |
* | |
* Extracts content from content element and returns it as plain text | |
* for writing it directly to the index | |
* | |
* @author Christian Bülter <[email protected]> | |
* @since 24.09.13 | |
* @param array $ttContentRow content element | |
* @return string | |
*/ | |
public function getContentFromContentElement($ttContentRow) { | |
// bodytext // edit ub | |
$bodytext = $ttContentRow['header'].' '.$ttContentRow['subheader'].' '.$ttContentRow['bodytext'].' '.$ttContentRow['tx_mask_plaintext']; | |
// following lines prevents having words one after the other like: HelloAllTogether | |
$bodytext = str_replace('<td', ' <td', $bodytext); | |
$bodytext = str_replace('<br', ' <br', $bodytext); | |
$bodytext = str_replace('<p', ' <p', $bodytext); | |
$bodytext = str_replace('<li', ' <li', $bodytext); | |
if ($ttContentRow['CType'] == 'table') { | |
// replace table dividers with whitespace | |
$bodytext = str_replace('|', ' ', $bodytext); | |
} | |
$bodytext = strip_tags($bodytext); | |
// hook for modifiying a content elements content | |
if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyContentFromContentElement'])) { | |
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyContentFromContentElement'] as $_classRef) { | |
$_procObj = & \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef); | |
$_procObj->modifyContentFromContentElement( | |
$bodytext, $ttContentRow, $this | |
); | |
} | |
} | |
// edit ub | |
/* Replace any kind of whitespace with a single space */ | |
$bodytext = preg_replace('/\s\s+/', ' ', $bodytext); | |
return $bodytext; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment