How to work/flow Enterprise_PageCache_Model_Crawler class in Magento? (Magento ver. 1.13.0.2)
Object: $this => Enterprise_PageCache_Model_Crawler Following constants is used in above class :
Crawler Settings:
- const XML_PATH_CRAWLER_ENABLED = ‘system/page_crawl/enable’
- const XML_PATH_CRAWLER_THREADS = ‘system/page_crawl/threads’;
- const XML_PATH_CRAWL_MULTICURRENCY = ‘system/page_cra/multicurrency’;
rawler User Agent Name:
- const USER_AGENT = ‘MagentoCrawler’;
Following variables is used in above class :
- $_visitedUrls
- $_app
- $_adapterFactory
Some important functions and its purpose in this class:
- crawl() : This function is used for crawl all system urls.
- _executeRequests() : This function is used to prepares and executes requests by given request_paths values
- _getCrawlerThreads() : This function is used for get number of crawler threads.
- _isCrawlerEnabled() : Checks whether crawler is enabled for given store.
Functions in class Enterprise_PageCache_Model_Crawler
-
/**Initialize application, adapter factory * @param array $args*/ public function __construct(array $args = array()) {}
-
/** Set resource model **/ protected function _construct() { $this->_init('enterprise_pagecache/crawler'); }
-
/**Get internal links from page content * @deprecated after 1.11.0.0 * @param string $pageContent * @return array **/ public function getUrls($pageContent){}
-
/** Get configuration for stores base urls. * array( * $index => array( * 'store_id' => $storeId, * 'base_url' => $url, * 'cookie' => $cookie)) * @return array **/ public function getStoresInfo(){}
-
/**Crawl all system urls * @return Enterprise_PageCache_Model_Crawler **/ public function crawl(){}
-
/** Prepares and executes requests by given request_paths values * @param array $info * @param Varien_Http_Adapter_Curl $adapter **/ protected function _executeRequests(array $info, Varien_Http_Adapter_Curl $adapter){}
-
/**Retrieves number of crawler threads * @param int $storeId * @return int **/ protected function _getCrawlerThreads($storeId) { return (int)$this->_app->getStore($storeId) ->getConfig(self::XML_PATH_CRAWLER_THREADS); }
-
/**Checks whether crawler is enabled for given store * @param int $storeId * @return null|string */ protected function _isCrawlerEnabled($storeId) { return (bool)(string)$this->_app->getStore($storeId)- >getConfig(self::XML_PATH_CRAWLER_ENABLED); }
Sample data for cron.php
Variables:
URL :https://localhost/oscp-store/mage-ent-test/cron.php
-
$_visitedUrls = array
[0]$_app = Mage_Core_Model_App $_areas = Array [1] global = Mage_Core_Model_App_Area $_store = Mage_Core_Model_Store $_underscoreCache = Array [7] IsDefault = is_default DefaultGroupId = default_group_id JobCode = job_code ScheduledAt = scheduled_at ExecutedAt = executed_at Status = status FinishedAt = finished_at $_cacheTag = true $_eventPrefix = store $_eventObjectstore $_priceFilter = <Uninitialized> $_website = Mage_Core_Model_Website $_group = Mage_Core_Model_Store_Group $_configCache = <Uninitialized> $_configCacheBaseNodes = Array [10] $_dirCache = Array [0] $_urlCache = Array [0] $_baseUrlCache = Array [0] $_session = <Uninitialized> $_isAdminSecure = <Uninitialized> $_isFrontSecure = <Uninitialized> $_frontendName = <Uninitialized> $_isReadOnly = false $_resourceName = core/store $_resource = <Uninitialized> $_resourceCollectionName = core/store_collection $_dataSaveAllowed = true $_isObjectNew = <Uninitialized> $_data = Array [7] $_hasDataChanges = true $_origData = Array [7] $_idFieldName =<Uninitialized> $_isDeleted = false $_oldFieldsMap = Array [0] $_syncFieldsMap = Array [0] $_website = Mage_Core_Model_Website $_underscoreCache = Array [7] IsDefault = is_default DefaultGroupId = default_group_id JobCode = job_code ScheduledAt = scheduled_at ExecutedAt = executed_at Status = status FinishedAt = finished_at $_cacheTag = true $_eventPrefix = website $_eventObject = website $_configCache = Array [0] $_groups = Array [1] $_groupIds = Array [1] $_groupsCount = 1 $_stores = Array [3] $_storeIds = Array [3] $_storeCodes = Array [3] $_storesCount = 3 $_defaultGroup = Mage_Core_Model_Store_Group $_defaultStore = Mage_Core_Model_Store $_isCanDelete = <Uninitialized> $_isReadOnly = false $_resourceName = core/website $_resource = <Uninitialized> $_resourceCollectionName = core/website_collection $_dataSaveAllowed = true $_isObjectNew = <Uninitialized> $_data = Array [10] $_hasDataChanges = true $_origData = Array [10] $_idFieldName = website_id $_isDeleted = false $_oldFieldsMap = Array [0] $_syncFieldsMap = Array [0] $_locale = <Uninitialized> $_translator = <Uninitialized> $_design = <Uninitialized> $_layout = <Uninitialized> $_config = Mage_Core_Model_Config $_useCache = false $_cacheSections = Array [6] $_cacheLoadedSections = Array [0] $_options = Mage_Core_Model_Config_Options $_classNameCache = Array [1] $_blockClassNameCache = Array [0] $_secureUrlCache = Array [0] $_distroServerVars = <Uninitialized> $_substServerVars = <Uninitialized> $_resourceModel = Mage_Core_Model_Resource_Config $_eventAreas = Array [2] $_dirExists = Array [0] $_allowCacheForInit = true $_cachePartsForSave = Array [0] $_prototype = Mage_Core_Model_Config_Base $_isLocalConfigLoaded = true $_baseDirCache = Array [0] $_customEtcDir = <Uninitialized> $_canUseLocalModules = true $_moduleNamespaces = <Uninitialized> $_allowedModules = Array [0] $_xml = Mage_Core_Model_Config_Element $_cacheId = config_global $_cacheTags = Array [0] $_cacheLifetime = <Uninitialized> $_cacheChecksum = <Uninitialized> $_cacheSaved = false $_cache = <Uninitialized> $_elementClass = Mage_Core_Model_Config_Element $_xpathExtends = //*[@extends] $_frontController = <Uninitialized> $_cache = Mage_Core_Model_Cache $_idPrefix = d4c_ $_frontend = Varien_Cache_Core $_shmBackends = Array [6] $_defaultBackend = File $_defaultBackendOptions = Array [4] $_requestProcessors = Array [1] $_disallowSave = false $_allowedCacheOptions = Array [9] $_dbConnection = core_write $_useCache = <Uninitialized> $_websites = Array [6] 0 = Mage_Core_Model_Website admin = Mage_Core_Model_Website 1 = Mage_Core_Model_Website base = Mage_Core_Model_Website 2 = Mage_Core_Model_Website private = Mage_Core_Model_Website $_groups = Array [3] 0 = Mage_Core_Model_Store_Group 1 = Mage_Core_Model_Store_Group 2 = Mage_Core_Model_Store_Group $_stores = Array [10] 0 = Mage_Core_Model_Store admin = Mage_Core_Model_Store 1 = Mage_Core_Model_Store default = Mage_Core_Model_Store 3 = Mage_Core_Model_Store french = Mage_Core_Model_Store 2 = Mage_Core_Model_Store german = Mage_Core_Model_Store 4 = Mage_Core_Model_Store privatesales = Mage_Core_Model_Store $_isSingleStore = false $_isSingleStoreAllowed = true $_currentStore = admin $_request = Mage_Core_Controller_Request_Http $_response = <Uninitialized> $_events = Array [2] global = Array [10] crontab = Array [10] $_updateMode = false $_useSessionInUrl = false $_useSessionVar = false $_isCacheLocked = false
$_adapterFactory = Enterprise_PageCache_Model_Adapter_Factory
Note: To debug crawler.php, i had done some changes in file config.xml is given below:
Path: app\code\core\Enterprise\PageCache\etc\config.xml
<crontab> <jobs> <enterprise_page_cache_crawler> <schedule> <cron_expr>* * * * *</cron_expr> </schedule> <run> <model>enterprise_pagecache/crawler::crawl</model> </run> </enterprise_page_cache_crawler> </jobs> </crontab>